A Discrete-Event Network Simulator
API
dpdk-net-device.cc
Go to the documentation of this file.
1/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2/*
3 * Copyright (c) 2019 NITK Surathkal
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation;
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Author: Harsh Patel <thadodaharsh10@gmail.com>
19 * Hrishikesh Hiraskar <hrishihiraskar@gmail.com>
20 * Mohit P. Tahiliani <tahiliani@nitk.edu.in>
21 */
22
23#include "dpdk-net-device.h"
24
25#include "ns3/log.h"
26#include "ns3/net-device-queue-interface.h"
27#include "ns3/simulator.h"
28#include "ns3/system-mutex.h"
29#include "ns3/uinteger.h"
30
31#include <sys/ioctl.h>
32#include <sys/mman.h>
33#include <sys/signal.h>
34#include <unistd.h>
35
36#include <poll.h>
37
38#include <rte_eal.h>
39#include <rte_ethdev.h>
40#include <rte_common.h>
41#include <rte_mempool.h>
42#include <rte_mbuf.h>
43#include <rte_malloc.h>
44#include <rte_cycles.h>
45#include <rte_port.h>
46
47namespace ns3 {
48
49NS_LOG_COMPONENT_DEFINE ("DpdkNetDevice");
50
51NS_OBJECT_ENSURE_REGISTERED (DpdkNetDevice);
52
53volatile bool DpdkNetDevice::m_forceQuit = false;
54
55TypeId
57{
58 static TypeId tid = TypeId ("ns3::DpdkNetDevice")
60 .SetGroupName ("FdNetDevice")
61 .AddConstructor<DpdkNetDevice> ()
62 .AddAttribute ("TxTimeout",
63 "The time to wait before transmitting burst from Tx buffer.",
64 TimeValue (MicroSeconds (2000)),
67 .AddAttribute ("MaxRxBurst",
68 "Size of Rx Burst.",
69 UintegerValue (64),
71 MakeUintegerChecker<uint32_t> ())
72 .AddAttribute ("MaxTxBurst",
73 "Size of Tx Burst.",
74 UintegerValue (64),
76 MakeUintegerChecker<uint32_t> ())
77 .AddAttribute ("MempoolCacheSize",
78 "Size of mempool cache.",
79 UintegerValue (256),
81 MakeUintegerChecker<uint32_t> ())
82 .AddAttribute ("NbRxDesc",
83 "Number of Rx descriptors.",
84 UintegerValue (1024),
86 MakeUintegerChecker<uint16_t> ())
87 .AddAttribute ("NbTxDesc",
88 "Number of Tx descriptors.",
89 UintegerValue (1024),
91 MakeUintegerChecker<uint16_t> ())
92 ;
93 return tid;
94}
95
97 : m_mempool (NULL)
98{
99 NS_LOG_FUNCTION (this);
100}
101
103{
104 NS_LOG_FUNCTION (this);
106 m_forceQuit = true;
107
108 rte_eal_wait_lcore (1);
109 rte_eth_dev_stop (m_portId);
110 rte_eth_dev_close (m_portId);
111}
112
113void
114DpdkNetDevice::SetDeviceName (std::string deviceName)
115{
116 NS_LOG_FUNCTION (this);
117
118 m_deviceName = deviceName;
119}
120
121void
123{
124 NS_LOG_FUNCTION (this);
125
126 #define CHECK_INTERVAL 100 /* 100ms */
127 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
128 uint8_t count, allPortsUp, printFlag = 0;
129 struct rte_eth_link link;
130
131 for (count = 0; count <= MAX_CHECK_TIME; count++)
132 {
133
134 allPortsUp = 1;
135
136 if (m_forceQuit)
137 {
138 return;
139 }
140 if ((1 << m_portId) == 0)
141 {
142 continue;
143 }
144 memset (&link, 0, sizeof(link));
145 rte_eth_link_get (m_portId, &link);
146 /* print link status if flag set */
147 if (printFlag == 1)
148 {
149 if (link.link_status)
150 {
151 continue;
152 }
153 else
154 {
155 printf ("Port %d Link Down\n", m_portId);
156 }
157 continue;
158 }
159 /* clear allPortsUp flag if any link down */
160 if (link.link_status == ETH_LINK_DOWN)
161 {
162 allPortsUp = 0;
163 break;
164 }
165
166 /* after finally printing all link status, get out */
167 if (printFlag == 1)
168 {
169 break;
170 }
171
172 if (allPortsUp == 0)
173 {
174 fflush (stdout);
175 rte_delay_ms (CHECK_INTERVAL);
176 }
177
178 /* set the printFlag if all ports up or timeout */
179 if (allPortsUp == 1 || count == (MAX_CHECK_TIME - 1))
180 {
181 printFlag = 1;
182 }
183 }
184}
185
186void
188{
189 if (signum == SIGINT || signum == SIGTERM)
190 {
191 printf ("\n\nSignal %d received, preparing to exit...\n",
192 signum);
193 m_forceQuit = true;
194 }
195}
196
197void
199{
200 int queueId = 0;
201 rte_eth_tx_buffer_flush (m_portId, queueId, m_txBuffer);
202}
203
204void
206{
207 int queueId = 0;
208 m_rxBuffer->length = rte_eth_rx_burst (m_portId,
209 queueId,
210 m_rxBuffer->pkts,
212
213 for (uint16_t i = 0; i < m_rxBuffer->length; i++)
214 {
215 struct rte_mbuf *pkt = NULL;
216 pkt = m_rxBuffer->pkts[i];
217
218 if (!pkt)
219 {
220 continue;
221 }
222
223 uint8_t * buf = rte_pktmbuf_mtod (pkt, uint8_t *);
224 size_t length = pkt->data_len;
225 FdNetDevice::ReceiveCallback (buf,length);
226 }
227
228 m_rxBuffer->length = 0;
229}
230
231int
233{
234 DpdkNetDevice *dpdkNetDevice = (DpdkNetDevice*) arg;
235 unsigned lcoreId;
236 lcoreId = rte_lcore_id ();
237 if (lcoreId != 1)
238 {
239 return 0;
240 }
241
242 while (!m_forceQuit)
243 {
244 dpdkNetDevice->HandleRx ();
245 }
246
247 return 0;
248}
249
250bool
252{
253 // Refer https://mails.dpdk.org/archives/users/2018-December/003822.html
254 return true;
255}
256
257void
258DpdkNetDevice::InitDpdk (int argc, char** argv, std::string dpdkDriver)
259{
260 NS_LOG_FUNCTION (this << argc << argv);
261
262 NS_LOG_INFO ("Binding device to DPDK");
263 std::string command;
264 command.append ("dpdk-devbind.py --force ");
265 command.append ("--bind=");
266 command.append (dpdkDriver.c_str ());
267 command.append (" ");
268 command.append (m_deviceName.c_str ());
269 printf ("Executing: %s\n", command.c_str ());
270 if (system (command.c_str ()))
271 {
272 rte_exit (EXIT_FAILURE, "Execution failed - bye\n");
273 }
274
275 // wait for the device to bind to Dpdk
276 sleep (5); /* 5 seconds */
277
278 NS_LOG_INFO ("Initialize DPDK EAL");
279 int ret = rte_eal_init (argc, argv);
280 if (ret < 0)
281 {
282 rte_exit (EXIT_FAILURE, "Invalid EAL arguments\n");
283 }
284
285 m_forceQuit = false;
286 signal (SIGINT, SignalHandler);
287 signal (SIGTERM, SignalHandler);
288
289 unsigned nbPorts = rte_eth_dev_count_avail ();
290 if (nbPorts == 0)
291 {
292 rte_exit (EXIT_FAILURE, "No Ethernet ports - bye\n");
293 }
294
295 NS_LOG_INFO ("Get port id of the device");
296 if (rte_eth_dev_get_port_by_name (m_deviceName.c_str (), &m_portId) != 0)
297 {
298 rte_exit (EXIT_FAILURE, "Cannot get port id - bye\n");
299 }
300
301 // Set number of logical cores to 2
302 unsigned int nbLcores = 2;
303
304 unsigned int nbMbufs = RTE_MAX (nbPorts * (m_nbRxDesc + m_nbTxDesc + m_maxRxPktBurst +
306 nbLcores * m_mempoolCacheSize),
307 8192U);
308
309 NS_LOG_INFO ("Create the mbuf pool");
310 m_mempool = rte_pktmbuf_pool_create ("mbuf_pool", nbMbufs,
312 RTE_MBUF_DEFAULT_BUF_SIZE,
313 rte_socket_id ());
314
315 if (m_mempool == NULL)
316 {
317 rte_exit (EXIT_FAILURE, "Cannot init mbuf pool\n");
318 }
319
320 NS_LOG_INFO ("Initialize port");
321 static struct rte_eth_conf portConf = {};
322 portConf.rxmode = {};
323 portConf.rxmode.split_hdr_size = 0;
324 portConf.txmode = {};
325 portConf.txmode.mq_mode = ETH_MQ_TX_NONE;
326
327 struct rte_eth_rxconf reqConf;
328 struct rte_eth_txconf txqConf;
329 struct rte_eth_conf localPortConf = portConf;
330 struct rte_eth_dev_info devInfo;
331
332 fflush (stdout);
333 rte_eth_dev_info_get (m_portId, &devInfo);
334 if (devInfo.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
335 {
336 localPortConf.txmode.offloads |=
337 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
338 }
339 ret = rte_eth_dev_configure (m_portId, 1, 1, &localPortConf);
340 if (ret < 0)
341 {
342 rte_exit (EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
343 ret, m_portId);
344 }
345
346 ret = rte_eth_dev_adjust_nb_rx_tx_desc (m_portId, &m_nbRxDesc, &m_nbTxDesc);
347 if (ret < 0)
348 {
349 rte_exit (EXIT_FAILURE,
350 "Cannot adjust number of descriptors: err=%d, port=%u\n",
351 ret, m_portId);
352 }
353
354 NS_LOG_INFO ("Initialize one Rx queue");
355 fflush (stdout);
356 reqConf = devInfo.default_rxconf;
357 reqConf.offloads = localPortConf.rxmode.offloads;
358 ret = rte_eth_rx_queue_setup (m_portId, 0, m_nbRxDesc,
359 rte_eth_dev_socket_id (m_portId),
360 &reqConf,
361 m_mempool);
362 if (ret < 0)
363 {
364 rte_exit (EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n",
365 ret, m_portId);
366 }
367
368 NS_LOG_INFO ("Initialize one Tx queue per port");
369 fflush (stdout);
370 txqConf = devInfo.default_txconf;
371 txqConf.offloads = localPortConf.txmode.offloads;
372 ret = rte_eth_tx_queue_setup (m_portId, 0, m_nbTxDesc,
373 rte_eth_dev_socket_id (m_portId),
374 &txqConf);
375 if (ret < 0)
376 {
377 rte_exit (EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
378 ret, m_portId);
379 }
380
381 NS_LOG_INFO ("Initialize Tx buffers");
382 m_txBuffer = (rte_eth_dev_tx_buffer*)
383 rte_zmalloc_socket ("tx_buffer",
384 RTE_ETH_TX_BUFFER_SIZE (m_maxTxPktBurst), 0,
385 rte_eth_dev_socket_id (m_portId));
386 NS_LOG_INFO ("Initialize Rx buffers");
387 m_rxBuffer = (rte_eth_dev_tx_buffer*)
388 rte_zmalloc_socket ("rx_buffer",
389 RTE_ETH_TX_BUFFER_SIZE (m_maxRxPktBurst), 0,
390 rte_eth_dev_socket_id (m_portId));
391 if (m_txBuffer == NULL || m_rxBuffer == NULL)
392 {
393 rte_exit (EXIT_FAILURE, "Cannot allocate buffer for rx/tx on port %u\n",
394 m_portId);
395 }
396
397 rte_eth_tx_buffer_init (m_txBuffer, m_maxTxPktBurst);
398 rte_eth_tx_buffer_init (m_rxBuffer, m_maxRxPktBurst);
399
400 NS_LOG_INFO ("Start the device");
401 ret = rte_eth_dev_start (m_portId);
402 if (ret < 0)
403 {
404 rte_exit (EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
405 ret, m_portId);
406 }
407
408 rte_eth_promiscuous_enable (m_portId);
409
411
412 NS_LOG_INFO ("Launching core threads");
413 rte_eal_mp_remote_launch (LaunchCore, this, CALL_MASTER);
414}
415
416uint8_t*
418{
419 struct rte_mbuf *pkt = rte_pktmbuf_alloc (m_mempool);
420 if (!pkt)
421 {
422 return NULL;
423 }
424 uint8_t *buf = rte_pktmbuf_mtod (pkt, uint8_t *);
425 return buf;
426}
427
428void
430{
431 struct rte_mbuf *pkt;
432
433 if (!buf)
434 {
435 return;
436 }
437 pkt = (struct rte_mbuf *)
438 RTE_PTR_SUB ( buf,
439 sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
440
441 rte_pktmbuf_free (pkt);
442}
443
444ssize_t
445DpdkNetDevice::Write (uint8_t *buffer, size_t length)
446{
447 struct rte_mbuf ** pkt = new struct rte_mbuf*[1];
448 int queueId = 0;
449
450 if (buffer == NULL || m_txBuffer->length == m_maxTxPktBurst)
451 {
452 NS_LOG_ERROR ("Error allocating mbuf" << buffer);
453 return -1;
454 }
455
456 pkt[0] = (struct rte_mbuf *)
457 RTE_PTR_SUB ( buffer,
458 sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
459
460 pkt[0]->pkt_len = length;
461 pkt[0]->data_len = length;
462 rte_eth_tx_buffer (m_portId, queueId, m_txBuffer, pkt[0]);
463
464 if (m_txBuffer->length == 1)
465 {
466 // If this is a first packet in buffer, schedule a tx.
469 }
470
471 return length;
472}
473
474void
476{
478
479 while (!m_pendingQueue.empty ())
480 {
481 std::pair<uint8_t *, ssize_t> next = m_pendingQueue.front ();
482 m_pendingQueue.pop ();
483
484 FreeBuffer (next.first);
485 }
486}
487
488} // namespace ns3
A class which provides a simple way to implement a Critical Section.
Definition: system-mutex.h:119
a NetDevice to read/write network traffic from/into a Dpdk enabled port.
static int LaunchCore(void *arg)
A function to handle rx & tx operations.
virtual void FreeBuffer(uint8_t *buf)
Free the given packet buffer.
uint32_t m_maxRxPktBurst
Size of Rx burst.
void InitDpdk(int argc, char **argv, std::string dpdkDriver)
Initialize Dpdk.
void SetDeviceName(std::string deviceName)
Set device name.
void HandleTx()
Transmit packets in burst from the tx_buffer to the nic.
static void SignalHandler(int signum)
A signal handler for SIGINT and SIGTERM signals.
~DpdkNetDevice()
Destructor for the DpdkNetDevice.
struct rte_eth_dev_tx_buffer * m_txBuffer
Buffer to handle burst transmission.
struct rte_eth_dev_tx_buffer * m_rxBuffer
Buffer to handle burst reception.
uint32_t m_maxTxPktBurst
Size of Tx burst.
EventId m_txEvent
Event for stale packet transmission.
std::string m_deviceName
The device name;.
static volatile bool m_forceQuit
Condition variable for Dpdk to stop.
void DoFinishStoppingDevice(void)
Complete additional actions, if any, to tear down the device.
uint16_t m_nbTxDesc
Number of Tx descriptors.
uint16_t m_nbRxDesc
Number of Rx descriptors.
bool IsLinkUp(void) const
Check the status of the link.
struct rte_mempool * m_mempool
Packet memory pool.
uint16_t m_portId
The port number of the device to be used.
virtual uint8_t * AllocateBuffer(size_t len)
Allocate packet buffer.
DpdkNetDevice()
Constructor for the DpdkNetDevice.
void HandleRx()
Receive packets in burst from the nic to the rx_buffer.
void CheckAllPortsLinkStatus(void)
Check the link status of all ports in up to 9s and print them finally.
Time m_txTimeout
The time to wait before transmitting burst from Tx buffer.
ssize_t Write(uint8_t *buffer, size_t length)
Write packet data to device.
static TypeId GetTypeId(void)
Get the type ID.
uint32_t m_mempoolCacheSize
Mempool cache size.
a NetDevice to read/write network traffic from/into a file descriptor.
Definition: fd-net-device.h:86
std::queue< std::pair< uint8_t *, ssize_t > > m_pendingQueue
Number of packets that were received and scheduled for read but not yet read.
SystemMutex m_pendingReadMutex
Mutex to increase pending read counter.
Callback< bool, Ptr< NetDevice >, Ptr< const Packet >, uint16_t, const Address & > ReceiveCallback
Definition: net-device.h:318
static void Cancel(const EventId &id)
Set the cancel bit on this event: the event's associated function will not be invoked when it expires...
Definition: simulator.cc:268
static EventId Schedule(Time const &delay, FUNC f, Ts &&... args)
Schedule an event to expire after delay.
Definition: simulator.h:556
AttributeValue implementation for Time.
Definition: nstime.h:1308
a unique identifier for an interface.
Definition: type-id.h:59
TypeId SetParent(TypeId tid)
Set the parent TypeId.
Definition: type-id.cc:922
Hold an unsigned integer type.
Definition: uinteger.h:44
#define MAX_CHECK_TIME
#define CHECK_INTERVAL
Ptr< const AttributeAccessor > MakeTimeAccessor(T1 a1)
Definition: nstime.h:1309
Ptr< const AttributeAccessor > MakeUintegerAccessor(T1 a1)
Definition: uinteger.h:45
#define NS_LOG_ERROR(msg)
Use NS_LOG to output a message of level LOG_ERROR.
Definition: log.h:257
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:205
#define NS_LOG_FUNCTION(parameters)
If log level LOG_FUNCTION is enabled, this macro will output all input parameters separated by ",...
#define NS_LOG_INFO(msg)
Use NS_LOG to output a message of level LOG_INFO.
Definition: log.h:281
#define NS_OBJECT_ENSURE_REGISTERED(type)
Register an Object subclass with the TypeId system.
Definition: object-base.h:45
Time MicroSeconds(uint64_t value)
Construct a Time in the indicated unit.
Definition: nstime.h:1260
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Ptr< const AttributeChecker > MakeTimeChecker(const Time min, const Time max)
Helper to make a Time checker with bounded range.
Definition: time.cc:536