A Discrete-Event Network Simulator
API
dpdk-net-device.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 NITK Surathkal
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation;
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16  *
17  * Author: Harsh Patel <thadodaharsh10@gmail.com>
18  * Hrishikesh Hiraskar <hrishihiraskar@gmail.com>
19  * Mohit P. Tahiliani <tahiliani@nitk.edu.in>
20  */
21 
22 #include "dpdk-net-device.h"
23 
24 #include "ns3/log.h"
25 #include "ns3/net-device-queue-interface.h"
26 #include "ns3/simulator.h"
27 #include "ns3/uinteger.h"
28 
29 #include <mutex>
30 #include <poll.h>
31 #include <rte_common.h>
32 #include <rte_cycles.h>
33 #include <rte_eal.h>
34 #include <rte_ethdev.h>
35 #include <rte_malloc.h>
36 #include <rte_mbuf.h>
37 #include <rte_mempool.h>
38 #include <rte_port.h>
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <sys/signal.h>
42 #include <unistd.h>
43 
44 namespace ns3
45 {
46 
47 NS_LOG_COMPONENT_DEFINE("DpdkNetDevice");
48 
49 NS_OBJECT_ENSURE_REGISTERED(DpdkNetDevice);
50 
51 volatile bool DpdkNetDevice::m_forceQuit = false;
52 
53 TypeId
55 {
56  static TypeId tid =
57  TypeId("ns3::DpdkNetDevice")
59  .SetGroupName("FdNetDevice")
60  .AddConstructor<DpdkNetDevice>()
61  .AddAttribute("TxTimeout",
62  "The time to wait before transmitting burst from Tx buffer.",
63  TimeValue(MicroSeconds(2000)),
66  .AddAttribute("MaxRxBurst",
67  "Size of Rx Burst.",
68  UintegerValue(64),
70  MakeUintegerChecker<uint32_t>())
71  .AddAttribute("MaxTxBurst",
72  "Size of Tx Burst.",
73  UintegerValue(64),
75  MakeUintegerChecker<uint32_t>())
76  .AddAttribute("MempoolCacheSize",
77  "Size of mempool cache.",
78  UintegerValue(256),
80  MakeUintegerChecker<uint32_t>())
81  .AddAttribute("NbRxDesc",
82  "Number of Rx descriptors.",
83  UintegerValue(1024),
85  MakeUintegerChecker<uint16_t>())
86  .AddAttribute("NbTxDesc",
87  "Number of Tx descriptors.",
88  UintegerValue(1024),
90  MakeUintegerChecker<uint16_t>());
91  return tid;
92 }
93 
95  : m_mempool(nullptr)
96 {
97  NS_LOG_FUNCTION(this);
98 }
99 
101 {
102  NS_LOG_FUNCTION(this);
104  m_forceQuit = true;
105 
106  rte_eal_wait_lcore(1);
107  rte_eth_dev_stop(m_portId);
108  rte_eth_dev_close(m_portId);
109 }
110 
111 void
112 DpdkNetDevice::SetDeviceName(std::string deviceName)
113 {
114  NS_LOG_FUNCTION(this);
115 
116  m_deviceName = deviceName;
117 }
118 
119 void
121 {
122  NS_LOG_FUNCTION(this);
123 
124 #define CHECK_INTERVAL 100 /* 100ms */
125 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
126 
127  uint8_t printFlag = 0;
128  struct rte_eth_link link;
129 
130  for (uint8_t count = 0; count <= MAX_CHECK_TIME; count++)
131  {
132  uint8_t allPortsUp = 1;
133 
134  if (m_forceQuit)
135  {
136  return;
137  }
138  if ((1 << m_portId) == 0)
139  {
140  continue;
141  }
142  memset(&link, 0, sizeof(link));
143  rte_eth_link_get(m_portId, &link);
144  /* print link status if flag set */
145  if (printFlag == 1)
146  {
147  if (!link.link_status)
148  {
149  NS_LOG_INFO("Port " << +m_portId << " Link Down");
150  }
151 
152  continue;
153  }
154  /* clear allPortsUp flag if any link down */
155  if (link.link_status == ETH_LINK_DOWN)
156  {
157  allPortsUp = 0;
158  break;
159  }
160 
161  /* after finally printing all link status, get out */
162  if (printFlag == 1)
163  {
164  break;
165  }
166 
167  if (allPortsUp == 0)
168  {
169  fflush(stdout);
170  rte_delay_ms(CHECK_INTERVAL);
171  }
172 
173  /* set the printFlag if all ports up or timeout */
174  if (allPortsUp == 1 || count == (MAX_CHECK_TIME - 1))
175  {
176  printFlag = 1;
177  }
178  }
179 }
180 
181 void
183 {
184  if (signum == SIGINT || signum == SIGTERM)
185  {
186  NS_LOG_INFO("Signal " << signum << " received, preparing to exit...");
187  m_forceQuit = true;
188  }
189 }
190 
191 void
193 {
194  int queueId = 0;
195  rte_eth_tx_buffer_flush(m_portId, queueId, m_txBuffer);
196 }
197 
198 void
200 {
201  int queueId = 0;
202  m_rxBuffer->length = rte_eth_rx_burst(m_portId, queueId, m_rxBuffer->pkts, m_maxRxPktBurst);
203 
204  for (uint16_t i = 0; i < m_rxBuffer->length; i++)
205  {
206  struct rte_mbuf* pkt = nullptr;
207  pkt = m_rxBuffer->pkts[i];
208 
209  if (!pkt)
210  {
211  continue;
212  }
213 
214  uint8_t* buf = rte_pktmbuf_mtod(pkt, uint8_t*);
215  size_t length = pkt->data_len;
216  FdNetDevice::ReceiveCallback(buf, length);
217  }
218 
219  m_rxBuffer->length = 0;
220 }
221 
222 int
224 {
225  DpdkNetDevice* dpdkNetDevice = (DpdkNetDevice*)arg;
226  unsigned lcoreId;
227  lcoreId = rte_lcore_id();
228  if (lcoreId != 1)
229  {
230  return 0;
231  }
232 
233  while (!m_forceQuit)
234  {
235  dpdkNetDevice->HandleRx();
236  }
237 
238  return 0;
239 }
240 
241 bool
243 {
244  // Refer https://mails.dpdk.org/archives/users/2018-December/003822.html
245  return true;
246 }
247 
248 void
249 DpdkNetDevice::InitDpdk(int argc, char** argv, std::string dpdkDriver)
250 {
251  NS_LOG_FUNCTION(this << argc << argv);
252 
253  NS_LOG_INFO("Binding device to DPDK");
254  std::string command;
255  command.append("dpdk-devbind.py --force ");
256  command.append("--bind=");
257  command.append(dpdkDriver);
258  command.append(" ");
259  command.append(m_deviceName);
260  NS_LOG_INFO("Executing: " << command);
261  if (system(command.c_str()))
262  {
263  rte_exit(EXIT_FAILURE, "Execution failed - bye\n");
264  }
265 
266  // wait for the device to bind to Dpdk
267  sleep(5); /* 5 seconds */
268 
269  NS_LOG_INFO("Initialize DPDK EAL");
270  int ret = rte_eal_init(argc, argv);
271  if (ret < 0)
272  {
273  rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
274  }
275 
276  m_forceQuit = false;
277  signal(SIGINT, SignalHandler);
278  signal(SIGTERM, SignalHandler);
279 
280  unsigned nbPorts = rte_eth_dev_count_avail();
281  if (nbPorts == 0)
282  {
283  rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
284  }
285 
286  NS_LOG_INFO("Get port id of the device");
287  if (rte_eth_dev_get_port_by_name(m_deviceName.c_str(), &m_portId) != 0)
288  {
289  rte_exit(EXIT_FAILURE, "Cannot get port id - bye\n");
290  }
291 
292  // Set number of logical cores to 2
293  unsigned int nbLcores = 2;
294 
295  unsigned int nbMbufs = RTE_MAX(nbPorts * (m_nbRxDesc + m_nbTxDesc + m_maxRxPktBurst +
296  m_maxTxPktBurst + nbLcores * m_mempoolCacheSize),
297  8192U);
298 
299  NS_LOG_INFO("Create the mbuf pool");
300  m_mempool = rte_pktmbuf_pool_create("mbuf_pool",
301  nbMbufs,
303  0,
304  RTE_MBUF_DEFAULT_BUF_SIZE,
305  rte_socket_id());
306 
307  if (!m_mempool)
308  {
309  rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
310  }
311 
312  NS_LOG_INFO("Initialize port");
313  static struct rte_eth_conf portConf = {};
314  portConf.rxmode = {};
315  portConf.rxmode.split_hdr_size = 0;
316  portConf.txmode = {};
317  portConf.txmode.mq_mode = ETH_MQ_TX_NONE;
318 
319  struct rte_eth_rxconf reqConf;
320  struct rte_eth_txconf txqConf;
321  struct rte_eth_conf localPortConf = portConf;
322  struct rte_eth_dev_info devInfo;
323 
324  fflush(stdout);
325  rte_eth_dev_info_get(m_portId, &devInfo);
326  if (devInfo.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
327  {
328  localPortConf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
329  }
330  ret = rte_eth_dev_configure(m_portId, 1, 1, &localPortConf);
331  if (ret < 0)
332  {
333  rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", ret, m_portId);
334  }
335 
336  ret = rte_eth_dev_adjust_nb_rx_tx_desc(m_portId, &m_nbRxDesc, &m_nbTxDesc);
337  if (ret < 0)
338  {
339  rte_exit(EXIT_FAILURE,
340  "Cannot adjust number of descriptors: err=%d, port=%u\n",
341  ret,
342  m_portId);
343  }
344 
345  NS_LOG_INFO("Initialize one Rx queue");
346  fflush(stdout);
347  reqConf = devInfo.default_rxconf;
348  reqConf.offloads = localPortConf.rxmode.offloads;
349  ret = rte_eth_rx_queue_setup(m_portId,
350  0,
351  m_nbRxDesc,
352  rte_eth_dev_socket_id(m_portId),
353  &reqConf,
354  m_mempool);
355  if (ret < 0)
356  {
357  rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n", ret, m_portId);
358  }
359 
360  NS_LOG_INFO("Initialize one Tx queue per port");
361  fflush(stdout);
362  txqConf = devInfo.default_txconf;
363  txqConf.offloads = localPortConf.txmode.offloads;
364  ret =
365  rte_eth_tx_queue_setup(m_portId, 0, m_nbTxDesc, rte_eth_dev_socket_id(m_portId), &txqConf);
366  if (ret < 0)
367  {
368  rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n", ret, m_portId);
369  }
370 
371  NS_LOG_INFO("Initialize Tx buffers");
372  m_txBuffer = (rte_eth_dev_tx_buffer*)rte_zmalloc_socket("tx_buffer",
373  RTE_ETH_TX_BUFFER_SIZE(m_maxTxPktBurst),
374  0,
375  rte_eth_dev_socket_id(m_portId));
376  NS_LOG_INFO("Initialize Rx buffers");
377  m_rxBuffer = (rte_eth_dev_tx_buffer*)rte_zmalloc_socket("rx_buffer",
378  RTE_ETH_TX_BUFFER_SIZE(m_maxRxPktBurst),
379  0,
380  rte_eth_dev_socket_id(m_portId));
381  if (!m_txBuffer || !m_rxBuffer)
382  {
383  rte_exit(EXIT_FAILURE, "Cannot allocate buffer for rx/tx on port %u\n", m_portId);
384  }
385 
386  rte_eth_tx_buffer_init(m_txBuffer, m_maxTxPktBurst);
387  rte_eth_tx_buffer_init(m_rxBuffer, m_maxRxPktBurst);
388 
389  NS_LOG_INFO("Start the device");
390  ret = rte_eth_dev_start(m_portId);
391  if (ret < 0)
392  {
393  rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", ret, m_portId);
394  }
395 
396  rte_eth_promiscuous_enable(m_portId);
397 
399 
400  NS_LOG_INFO("Launching core threads");
401  rte_eal_mp_remote_launch(LaunchCore, this, CALL_MASTER);
402 }
403 
404 uint8_t*
406 {
407  struct rte_mbuf* pkt = rte_pktmbuf_alloc(m_mempool);
408  if (!pkt)
409  {
410  return nullptr;
411  }
412  uint8_t* buf = rte_pktmbuf_mtod(pkt, uint8_t*);
413  return buf;
414 }
415 
416 void
418 {
419  struct rte_mbuf* pkt;
420 
421  if (!buf)
422  {
423  return;
424  }
425  pkt = (struct rte_mbuf*)RTE_PTR_SUB(buf, sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
426 
427  rte_pktmbuf_free(pkt);
428 }
429 
430 ssize_t
431 DpdkNetDevice::Write(uint8_t* buffer, size_t length)
432 {
433  struct rte_mbuf** pkt = new struct rte_mbuf*[1];
434  int queueId = 0;
435 
436  if (!buffer || m_txBuffer->length == m_maxTxPktBurst)
437  {
438  NS_LOG_ERROR("Error allocating mbuf" << buffer);
439  return -1;
440  }
441 
442  pkt[0] = (struct rte_mbuf*)RTE_PTR_SUB(buffer, sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
443 
444  pkt[0]->pkt_len = length;
445  pkt[0]->data_len = length;
446  rte_eth_tx_buffer(m_portId, queueId, m_txBuffer, pkt[0]);
447 
448  if (m_txBuffer->length == 1)
449  {
450  // If this is a first packet in buffer, schedule a tx.
453  }
454 
455  return length;
456 }
457 
458 void
460 {
461  std::unique_lock lock{m_pendingReadMutex};
462 
463  while (!m_pendingQueue.empty())
464  {
465  std::pair<uint8_t*, ssize_t> next = m_pendingQueue.front();
466  m_pendingQueue.pop();
467 
468  FreeBuffer(next.first);
469  }
470 }
471 
472 } // namespace ns3
a NetDevice to read/write network traffic from/into a Dpdk enabled port.
static int LaunchCore(void *arg)
A function to handle rx & tx operations.
uint32_t m_maxRxPktBurst
Size of Rx burst.
void InitDpdk(int argc, char **argv, std::string dpdkDriver)
Initialize Dpdk.
void SetDeviceName(std::string deviceName)
Set device name.
void HandleTx()
Transmit packets in burst from the tx_buffer to the nic.
static void SignalHandler(int signum)
A signal handler for SIGINT and SIGTERM signals.
void FreeBuffer(uint8_t *buf) override
Free the given packet buffer.
struct rte_eth_dev_tx_buffer * m_txBuffer
Buffer to handle burst transmission.
struct rte_eth_dev_tx_buffer * m_rxBuffer
Buffer to handle burst reception.
uint32_t m_maxTxPktBurst
Size of Tx burst.
static TypeId GetTypeId()
Get the type ID.
EventId m_txEvent
Event for stale packet transmission.
ssize_t Write(uint8_t *buffer, size_t length) override
Write packet data to device.
std::string m_deviceName
The device name;.
~DpdkNetDevice() override
Destructor for the DpdkNetDevice.
static volatile bool m_forceQuit
Condition variable for Dpdk to stop.
bool IsLinkUp() const override
Check the status of the link.
uint16_t m_nbTxDesc
Number of Tx descriptors.
uint16_t m_nbRxDesc
Number of Rx descriptors.
uint8_t * AllocateBuffer(size_t len) override
Allocate packet buffer.
struct rte_mempool * m_mempool
Packet memory pool.
void CheckAllPortsLinkStatus()
Check the link status of all ports in up to 9s and print them finally.
uint16_t m_portId
The port number of the device to be used.
void DoFinishStoppingDevice() override
Complete additional actions, if any, to tear down the device.
DpdkNetDevice()
Constructor for the DpdkNetDevice.
void HandleRx()
Receive packets in burst from the nic to the rx_buffer.
Time m_txTimeout
The time to wait before transmitting burst from Tx buffer.
uint32_t m_mempoolCacheSize
Mempool cache size.
a NetDevice to read/write network traffic from/into a file descriptor.
Definition: fd-net-device.h:84
std::mutex m_pendingReadMutex
Mutex to increase pending read counter.
std::queue< std::pair< uint8_t *, ssize_t > > m_pendingQueue
Number of packets that were received and scheduled for read but not yet read.
Callback< bool, Ptr< NetDevice >, Ptr< const Packet >, uint16_t, const Address & > ReceiveCallback
Definition: net-device.h:322
static EventId Schedule(const Time &delay, FUNC f, Ts &&... args)
Schedule an event to expire after delay.
Definition: simulator.h:571
static void Cancel(const EventId &id)
Set the cancel bit on this event: the event's associated function will not be invoked when it expires...
Definition: simulator.cc:285
a unique identifier for an interface.
Definition: type-id.h:59
TypeId SetParent(TypeId tid)
Set the parent TypeId.
Definition: type-id.cc:931
Hold an unsigned integer type.
Definition: uinteger.h:45
#define MAX_CHECK_TIME
#define CHECK_INTERVAL
#define NS_LOG_ERROR(msg)
Use NS_LOG to output a message of level LOG_ERROR.
Definition: log.h:254
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:202
#define NS_LOG_FUNCTION(parameters)
If log level LOG_FUNCTION is enabled, this macro will output all input parameters separated by ",...
#define NS_LOG_INFO(msg)
Use NS_LOG to output a message of level LOG_INFO.
Definition: log.h:275
#define NS_OBJECT_ENSURE_REGISTERED(type)
Register an Object subclass with the TypeId system.
Definition: object-base.h:46
Time MicroSeconds(uint64_t value)
Construct a Time in the indicated unit.
Definition: nstime.h:1350
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Ptr< const AttributeAccessor > MakeTimeAccessor(T1 a1)
Definition: nstime.h:1414
Ptr< const AttributeChecker > MakeTimeChecker(const Time min, const Time max)
Helper to make a Time checker with bounded range.
Definition: time.cc:533
Ptr< const AttributeAccessor > MakeUintegerAccessor(T1 a1)
Definition: uinteger.h:46