A Discrete-Event Network Simulator
API
dpdk-net-device.cc
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * Copyright (c) 2019 NITK Surathkal
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation;
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  *
18  * Author: Harsh Patel <thadodaharsh10@gmail.com>
19  * Hrishikesh Hiraskar <hrishihiraskar@gmail.com>
20  * Mohit P. Tahiliani <tahiliani@nitk.edu.in>
21  */
22 
23 #include "dpdk-net-device.h"
24 
25 #include "ns3/log.h"
26 #include "ns3/net-device-queue-interface.h"
27 #include "ns3/simulator.h"
28 #include "ns3/system-thread.h"
29 #include "ns3/system-condition.h"
30 #include "ns3/system-mutex.h"
31 #include "ns3/uinteger.h"
32 
33 #include <sys/ioctl.h>
34 #include <sys/mman.h>
35 #include <sys/signal.h>
36 #include <unistd.h>
37 
38 #include <poll.h>
39 
40 #include <rte_eal.h>
41 #include <rte_ethdev.h>
42 #include <rte_common.h>
43 #include <rte_mempool.h>
44 #include <rte_mbuf.h>
45 #include <rte_malloc.h>
46 #include <rte_cycles.h>
47 #include <rte_port.h>
48 
49 namespace ns3 {
50 
51 NS_LOG_COMPONENT_DEFINE ("DpdkNetDevice");
52 
53 NS_OBJECT_ENSURE_REGISTERED (DpdkNetDevice);
54 
55 volatile bool DpdkNetDevice::m_forceQuit = false;
56 
57 TypeId
59 {
60  static TypeId tid = TypeId ("ns3::DpdkNetDevice")
62  .SetGroupName ("FdNetDevice")
63  .AddConstructor<DpdkNetDevice> ()
64  .AddAttribute ("TxTimeout",
65  "The time to wait before transmitting burst from Tx buffer.",
66  TimeValue (MicroSeconds (2000)),
68  MakeTimeChecker ())
69  .AddAttribute ("MaxRxBurst",
70  "Size of Rx Burst.",
71  UintegerValue (64),
73  MakeUintegerChecker<uint32_t> ())
74  .AddAttribute ("MaxTxBurst",
75  "Size of Tx Burst.",
76  UintegerValue (64),
78  MakeUintegerChecker<uint32_t> ())
79  .AddAttribute ("MempoolCacheSize",
80  "Size of mempool cache.",
81  UintegerValue (256),
83  MakeUintegerChecker<uint32_t> ())
84  .AddAttribute ("NbRxDesc",
85  "Number of Rx descriptors.",
86  UintegerValue (1024),
88  MakeUintegerChecker<uint16_t> ())
89  .AddAttribute ("NbTxDesc",
90  "Number of Tx descriptors.",
91  UintegerValue (1024),
93  MakeUintegerChecker<uint16_t> ())
94  ;
95  return tid;
96 }
97 
99  : m_mempool (NULL)
100 {
101  NS_LOG_FUNCTION (this);
102 }
103 
105 {
106  NS_LOG_FUNCTION (this);
108  m_forceQuit = true;
109 
110  rte_eal_wait_lcore (1);
111  rte_eth_dev_stop (m_portId);
112  rte_eth_dev_close (m_portId);
113 }
114 
115 void
116 DpdkNetDevice::SetDeviceName (std::string deviceName)
117 {
118  NS_LOG_FUNCTION (this);
119 
120  m_deviceName = deviceName;
121 }
122 
123 void
125 {
126  NS_LOG_FUNCTION (this);
127 
128  #define CHECK_INTERVAL 100 /* 100ms */
129  #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
130  uint8_t count, allPortsUp, printFlag = 0;
131  struct rte_eth_link link;
132 
133  for (count = 0; count <= MAX_CHECK_TIME; count++)
134  {
135 
136  allPortsUp = 1;
137 
138  if (m_forceQuit)
139  {
140  return;
141  }
142  if ((1 << m_portId) == 0)
143  {
144  continue;
145  }
146  memset (&link, 0, sizeof(link));
147  rte_eth_link_get (m_portId, &link);
148  /* print link status if flag set */
149  if (printFlag == 1)
150  {
151  if (link.link_status)
152  {
153  continue;
154  }
155  else
156  {
157  printf ("Port %d Link Down\n", m_portId);
158  }
159  continue;
160  }
161  /* clear allPortsUp flag if any link down */
162  if (link.link_status == ETH_LINK_DOWN)
163  {
164  allPortsUp = 0;
165  break;
166  }
167 
168  /* after finally printing all link status, get out */
169  if (printFlag == 1)
170  {
171  break;
172  }
173 
174  if (allPortsUp == 0)
175  {
176  fflush (stdout);
177  rte_delay_ms (CHECK_INTERVAL);
178  }
179 
180  /* set the printFlag if all ports up or timeout */
181  if (allPortsUp == 1 || count == (MAX_CHECK_TIME - 1))
182  {
183  printFlag = 1;
184  }
185  }
186 }
187 
188 void
190 {
191  if (signum == SIGINT || signum == SIGTERM)
192  {
193  printf ("\n\nSignal %d received, preparing to exit...\n",
194  signum);
195  m_forceQuit = true;
196  }
197 }
198 
199 void
201 {
202  int queueId = 0;
203  rte_eth_tx_buffer_flush (m_portId, queueId, m_txBuffer);
204 }
205 
206 void
208 {
209  int queueId = 0;
210  m_rxBuffer->length = rte_eth_rx_burst (m_portId,
211  queueId,
212  m_rxBuffer->pkts,
214 
215  for (uint16_t i = 0; i < m_rxBuffer->length; i++)
216  {
217  struct rte_mbuf *pkt = NULL;
218  pkt = m_rxBuffer->pkts[i];
219 
220  if (!pkt)
221  {
222  continue;
223  }
224 
225  uint8_t * buf = rte_pktmbuf_mtod (pkt, uint8_t *);
226  size_t length = pkt->data_len;
227  FdNetDevice::ReceiveCallback (buf,length);
228  }
229 
230  m_rxBuffer->length = 0;
231 }
232 
233 int
235 {
236  DpdkNetDevice *dpdkNetDevice = (DpdkNetDevice*) arg;
237  unsigned lcoreId;
238  lcoreId = rte_lcore_id ();
239  if (lcoreId != 1)
240  {
241  return 0;
242  }
243 
244  while (!m_forceQuit)
245  {
246  dpdkNetDevice->HandleRx ();
247  }
248 
249  return 0;
250 }
251 
252 bool
254 {
255  // Refer https://mails.dpdk.org/archives/users/2018-December/003822.html
256  return true;
257 }
258 
259 void
260 DpdkNetDevice::InitDpdk (int argc, char** argv, std::string dpdkDriver)
261 {
262  NS_LOG_FUNCTION (this << argc << argv);
263 
264  NS_LOG_INFO ("Binding device to DPDK");
265  std::string command;
266  command.append ("dpdk-devbind.py --force ");
267  command.append ("--bind=");
268  command.append (dpdkDriver.c_str ());
269  command.append (" ");
270  command.append (m_deviceName.c_str ());
271  printf ("Executing: %s\n", command.c_str ());
272  if (system (command.c_str ()))
273  {
274  rte_exit (EXIT_FAILURE, "Execution failed - bye\n");
275  }
276 
277  // wait for the device to bind to Dpdk
278  sleep (5); /* 5 seconds */
279 
280  NS_LOG_INFO ("Initialize DPDK EAL");
281  int ret = rte_eal_init (argc, argv);
282  if (ret < 0)
283  {
284  rte_exit (EXIT_FAILURE, "Invalid EAL arguments\n");
285  }
286 
287  m_forceQuit = false;
288  signal (SIGINT, SignalHandler);
289  signal (SIGTERM, SignalHandler);
290 
291  unsigned nbPorts = rte_eth_dev_count_avail ();
292  if (nbPorts == 0)
293  {
294  rte_exit (EXIT_FAILURE, "No Ethernet ports - bye\n");
295  }
296 
297  NS_LOG_INFO ("Get port id of the device");
298  if (rte_eth_dev_get_port_by_name (m_deviceName.c_str (), &m_portId) != 0)
299  {
300  rte_exit (EXIT_FAILURE, "Cannot get port id - bye\n");
301  }
302 
303  // Set number of logical cores to 2
304  unsigned int nbLcores = 2;
305 
306  unsigned int nbMbufs = RTE_MAX (nbPorts * (m_nbRxDesc + m_nbTxDesc + m_maxRxPktBurst +
308  nbLcores * m_mempoolCacheSize),
309  8192U);
310 
311  NS_LOG_INFO ("Create the mbuf pool");
312  m_mempool = rte_pktmbuf_pool_create ("mbuf_pool", nbMbufs,
314  RTE_MBUF_DEFAULT_BUF_SIZE,
315  rte_socket_id ());
316 
317  if (m_mempool == NULL)
318  {
319  rte_exit (EXIT_FAILURE, "Cannot init mbuf pool\n");
320  }
321 
322  NS_LOG_INFO ("Initialize port");
323  static struct rte_eth_conf portConf = {};
324  portConf.rxmode = {};
325  portConf.rxmode.split_hdr_size = 0;
326  portConf.txmode = {};
327  portConf.txmode.mq_mode = ETH_MQ_TX_NONE;
328 
329  struct rte_eth_rxconf reqConf;
330  struct rte_eth_txconf txqConf;
331  struct rte_eth_conf localPortConf = portConf;
332  struct rte_eth_dev_info devInfo;
333 
334  fflush (stdout);
335  rte_eth_dev_info_get (m_portId, &devInfo);
336  if (devInfo.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
337  {
338  localPortConf.txmode.offloads |=
339  DEV_TX_OFFLOAD_MBUF_FAST_FREE;
340  }
341  ret = rte_eth_dev_configure (m_portId, 1, 1, &localPortConf);
342  if (ret < 0)
343  {
344  rte_exit (EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
345  ret, m_portId);
346  }
347 
348  ret = rte_eth_dev_adjust_nb_rx_tx_desc (m_portId, &m_nbRxDesc, &m_nbTxDesc);
349  if (ret < 0)
350  {
351  rte_exit (EXIT_FAILURE,
352  "Cannot adjust number of descriptors: err=%d, port=%u\n",
353  ret, m_portId);
354  }
355 
356  NS_LOG_INFO ("Initialize one Rx queue");
357  fflush (stdout);
358  reqConf = devInfo.default_rxconf;
359  reqConf.offloads = localPortConf.rxmode.offloads;
360  ret = rte_eth_rx_queue_setup (m_portId, 0, m_nbRxDesc,
361  rte_eth_dev_socket_id (m_portId),
362  &reqConf,
363  m_mempool);
364  if (ret < 0)
365  {
366  rte_exit (EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n",
367  ret, m_portId);
368  }
369 
370  NS_LOG_INFO ("Initialize one Tx queue per port");
371  fflush (stdout);
372  txqConf = devInfo.default_txconf;
373  txqConf.offloads = localPortConf.txmode.offloads;
374  ret = rte_eth_tx_queue_setup (m_portId, 0, m_nbTxDesc,
375  rte_eth_dev_socket_id (m_portId),
376  &txqConf);
377  if (ret < 0)
378  {
379  rte_exit (EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
380  ret, m_portId);
381  }
382 
383  NS_LOG_INFO ("Initialize Tx buffers");
384  m_txBuffer = (rte_eth_dev_tx_buffer*)
385  rte_zmalloc_socket ("tx_buffer",
386  RTE_ETH_TX_BUFFER_SIZE (m_maxTxPktBurst), 0,
387  rte_eth_dev_socket_id (m_portId));
388  NS_LOG_INFO ("Initialize Rx buffers");
389  m_rxBuffer = (rte_eth_dev_tx_buffer*)
390  rte_zmalloc_socket ("rx_buffer",
391  RTE_ETH_TX_BUFFER_SIZE (m_maxRxPktBurst), 0,
392  rte_eth_dev_socket_id (m_portId));
393  if (m_txBuffer == NULL || m_rxBuffer == NULL)
394  {
395  rte_exit (EXIT_FAILURE, "Cannot allocate buffer for rx/tx on port %u\n",
396  m_portId);
397  }
398 
399  rte_eth_tx_buffer_init (m_txBuffer, m_maxTxPktBurst);
400  rte_eth_tx_buffer_init (m_rxBuffer, m_maxRxPktBurst);
401 
402  NS_LOG_INFO ("Start the device");
403  ret = rte_eth_dev_start (m_portId);
404  if (ret < 0)
405  {
406  rte_exit (EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
407  ret, m_portId);
408  }
409 
410  rte_eth_promiscuous_enable (m_portId);
411 
413 
414  NS_LOG_INFO ("Launching core threads");
415  rte_eal_mp_remote_launch (LaunchCore, this, CALL_MASTER);
416 }
417 
418 uint8_t*
420 {
421  struct rte_mbuf *pkt = rte_pktmbuf_alloc (m_mempool);
422  if (!pkt)
423  {
424  return NULL;
425  }
426  uint8_t *buf = rte_pktmbuf_mtod (pkt, uint8_t *);
427  return buf;
428 }
429 
430 void
432 {
433  struct rte_mbuf *pkt;
434 
435  if (!buf)
436  {
437  return;
438  }
439  pkt = (struct rte_mbuf *)
440  RTE_PTR_SUB ( buf,
441  sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
442 
443  rte_pktmbuf_free (pkt);
444 }
445 
446 ssize_t
447 DpdkNetDevice::Write (uint8_t *buffer, size_t length)
448 {
449  struct rte_mbuf ** pkt = new struct rte_mbuf*[1];
450  int queueId = 0;
451 
452  if (buffer == NULL || m_txBuffer->length == m_maxTxPktBurst)
453  {
454  NS_LOG_ERROR ("Error allocating mbuf" << buffer);
455  return -1;
456  }
457 
458  pkt[0] = (struct rte_mbuf *)
459  RTE_PTR_SUB ( buffer,
460  sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
461 
462  pkt[0]->pkt_len = length;
463  pkt[0]->data_len = length;
464  rte_eth_tx_buffer (m_portId, queueId, m_txBuffer, pkt[0]);
465 
466  if (m_txBuffer->length == 1)
467  {
468  // If this is a first packet in buffer, schedule a tx.
471  }
472 
473  return length;
474 }
475 
476 void
478 {
480 
481  while (!m_pendingQueue.empty ())
482  {
483  std::pair<uint8_t *, ssize_t> next = m_pendingQueue.front ();
484  m_pendingQueue.pop ();
485 
486  FreeBuffer (next.first);
487  }
488 }
489 
490 } // namespace ns3
#define CHECK_INTERVAL
#define MAX_CHECK_TIME
static EventId Schedule(Time const &delay, FUNC f, Ts &&... args)
Schedule an event to expire after delay.
Definition: simulator.h:557
#define NS_LOG_FUNCTION(parameters)
If log level LOG_FUNCTION is enabled, this macro will output all input parameters separated by "...
Time m_txTimeout
The time to wait before transmitting burst from Tx buffer.
struct rte_eth_dev_tx_buffer * m_rxBuffer
Buffer to handle burst reception.
#define NS_OBJECT_ENSURE_REGISTERED(type)
Register an Object subclass with the TypeId system.
Definition: object-base.h:45
ssize_t Write(uint8_t *buffer, size_t length)
Write packet data to device.
void SetDeviceName(std::string deviceName)
Set device name.
void HandleRx()
Receive packets in burst from the nic to the rx_buffer.
uint16_t m_nbRxDesc
Number of Rx descriptors.
uint16_t m_nbTxDesc
Number of Tx descriptors.
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:205
std::queue< std::pair< uint8_t *, ssize_t > > m_pendingQueue
Number of packets that were received and scheduled for read but not yet read.
#define NS_LOG_INFO(msg)
Use NS_LOG to output a message of level LOG_INFO.
Definition: log.h:281
static void Cancel(const EventId &id)
Set the cancel bit on this event: the event&#39;s associated function will not be invoked when it expires...
Definition: simulator.cc:268
bool IsLinkUp(void) const
Check the status of the link.
static void SignalHandler(int signum)
A signal handler for SIGINT and SIGTERM signals.
uint32_t m_maxRxPktBurst
Size of Rx burst.
DpdkNetDevice()
Constructor for the DpdkNetDevice.
EventId m_txEvent
Event for stale packet transmission.
static volatile bool m_forceQuit
Condition variable for Dpdk to stop.
AttributeValue implementation for Time.
Definition: nstime.h:1353
A class which provides a simple way to implement a Critical Section.
Definition: system-mutex.h:118
static TypeId GetTypeId(void)
Get the type ID.
Hold an unsigned integer type.
Definition: uinteger.h:44
void ReceiveCallback(uint8_t *buf, ssize_t len)
Callback to invoke when a new frame is received.
uint16_t m_portId
The port number of the device to be used.
virtual uint8_t * AllocateBuffer(size_t len)
Allocate packet buffer.
static int LaunchCore(void *arg)
A function to handle rx & tx operations.
virtual void FreeBuffer(uint8_t *buf)
Free the given packet buffer.
Every class exported by the ns3 library is enclosed in the ns3 namespace.
struct rte_mempool * m_mempool
Packet memory pool.
struct rte_eth_dev_tx_buffer * m_txBuffer
Buffer to handle burst transmission.
Ptr< const AttributeAccessor > MakeTimeAccessor(T1 a1)
Create an AttributeAccessor for a class data member, or a lone class get functor or set method...
Definition: nstime.h:1354
void DoFinishStoppingDevice(void)
Complete additional actions, if any, to tear down the device.
~DpdkNetDevice()
Destructor for the DpdkNetDevice.
std::string m_deviceName
The device name;.
void CheckAllPortsLinkStatus(void)
Check the link status of all ports in up to 9s and print them finally.
Ptr< const AttributeChecker > MakeTimeChecker(const Time min, const Time max)
Helper to make a Time checker with bounded range.
Definition: time.cc:533
Time MicroSeconds(uint64_t value)
Construct a Time in the indicated unit.
Definition: nstime.h:1305
#define NS_LOG_ERROR(msg)
Use NS_LOG to output a message of level LOG_ERROR.
Definition: log.h:257
uint32_t m_mempoolCacheSize
Mempool cache size.
a NetDevice to read/write network traffic from/into a file descriptor.
Definition: fd-net-device.h:84
void InitDpdk(int argc, char **argv, std::string dpdkDriver)
Initialize Dpdk.
uint32_t m_maxTxPktBurst
Size of Tx burst.
Ptr< const AttributeAccessor > MakeUintegerAccessor(T1 a1)
Create an AttributeAccessor for a class data member, or a lone class get functor or set method...
Definition: uinteger.h:45
a unique identifier for an interface.
Definition: type-id.h:58
TypeId SetParent(TypeId tid)
Set the parent TypeId.
Definition: type-id.cc:923
a NetDevice to read/write network traffic from/into a Dpdk enabled port.
SystemMutex m_pendingReadMutex
Mutex to increase pending read counter.
void HandleTx()
Transmit packets in burst from the tx_buffer to the nic.