Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vmxnet3: add receive data ring support

vmxnet3 driver preallocates buffers for receiving packets and posts the
buffers to the emulation. In order to deliver a received packet to the
guest, the emulation must map buffer(s) and copy the packet into it.

To avoid this memory mapping overhead, this patch introduces the receive
data ring - a set of small sized buffers that are always mapped by
the emulation. If a packet fits into the receive data ring buffer, the
emulation delivers the packet via the receive data ring (which must be
copied by the guest driver), or else the usual receive path is used.

Receive Data Ring buffer length is configurable via ethtool -G ethX rx-mini

Signed-off-by: Shrikrishna Khare <skhare@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Shrikrishna Khare and committed by
David S. Miller
50a5ce3e 3c8b3efc

+193 -45
+12 -2
drivers/net/vmxnet3/vmxnet3_defs.h
··· 174 174 u8 data[VMXNET3_HDR_COPY_SIZE]; 175 175 }; 176 176 177 + typedef u8 Vmxnet3_RxDataDesc; 178 + 177 179 #define VMXNET3_TCD_GEN_SHIFT 31 178 180 #define VMXNET3_TCD_GEN_SIZE 1 179 181 #define VMXNET3_TCD_TXIDX_SHIFT 0 ··· 384 382 #define VMXNET3_TXDATA_DESC_SIZE_ALIGN 64 385 383 #define VMXNET3_TXDATA_DESC_SIZE_MASK (VMXNET3_TXDATA_DESC_SIZE_ALIGN - 1) 386 384 385 + /* Rx Data Ring buffer size must be a multiple of 64 */ 386 + #define VMXNET3_RXDATA_DESC_SIZE_ALIGN 64 387 + #define VMXNET3_RXDATA_DESC_SIZE_MASK (VMXNET3_RXDATA_DESC_SIZE_ALIGN - 1) 388 + 387 389 /* Max ring size */ 388 390 #define VMXNET3_TX_RING_MAX_SIZE 4096 389 391 #define VMXNET3_TC_RING_MAX_SIZE 4096 ··· 397 391 398 392 #define VMXNET3_TXDATA_DESC_MIN_SIZE 128 399 393 #define VMXNET3_TXDATA_DESC_MAX_SIZE 2048 394 + 395 + #define VMXNET3_RXDATA_DESC_MAX_SIZE 2048 400 396 401 397 /* a list of reasons for queue stop */ 402 398 ··· 496 488 __le64 rxRingBasePA[2]; 497 489 __le64 compRingBasePA; 498 490 __le64 ddPA; /* driver data */ 499 - __le64 reserved; 491 + __le64 rxDataRingBasePA; 500 492 __le32 rxRingSize[2]; /* # of rx desc */ 501 493 __le32 compRingSize; /* # of rx comp desc */ 502 494 __le32 ddLen; /* size of driver data */ 503 495 u8 intrIdx; 504 - u8 _pad[7]; 496 + u8 _pad1[1]; 497 + __le16 rxDataRingDescSize; /* size of rx data ring buffer */ 498 + u8 _pad2[4]; 505 499 }; 506 500 507 501
+122 -31
drivers/net/vmxnet3/vmxnet3_drv.c
··· 1284 1284 */ 1285 1285 break; 1286 1286 } 1287 - BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2); 1287 + BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 && 1288 + rcd->rqID != rq->dataRingQid); 1288 1289 idx = rcd->rxdIdx; 1289 - ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1; 1290 + ring_idx = VMXNET3_GET_RING_IDX(adapter, rcd->rqID); 1290 1291 ring = rq->rx_ring + ring_idx; 1291 1292 vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd, 1292 1293 &rxCmdDesc); ··· 1302 1301 } 1303 1302 1304 1303 if (rcd->sop) { /* first buf of the pkt */ 1304 + bool rxDataRingUsed; 1305 + u16 len; 1306 + 1305 1307 BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD || 1306 - rcd->rqID != rq->qid); 1308 + (rcd->rqID != rq->qid && 1309 + rcd->rqID != rq->dataRingQid)); 1307 1310 1308 1311 BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB); 1309 1312 BUG_ON(ctx->skb != NULL || rbi->skb == NULL); ··· 1323 1318 1324 1319 skip_page_frags = false; 1325 1320 ctx->skb = rbi->skb; 1321 + 1322 + rxDataRingUsed = 1323 + VMXNET3_RX_DATA_RING(adapter, rcd->rqID); 1324 + len = rxDataRingUsed ? rcd->len : rbi->len; 1326 1325 new_skb = netdev_alloc_skb_ip_align(adapter->netdev, 1327 - rbi->len); 1326 + len); 1328 1327 if (new_skb == NULL) { 1329 1328 /* Skb allocation failed, do not handover this 1330 1329 * skb to stack. Reuse it. Drop the existing pkt ··· 1339 1330 skip_page_frags = true; 1340 1331 goto rcd_done; 1341 1332 } 1342 - new_dma_addr = dma_map_single(&adapter->pdev->dev, 1343 - new_skb->data, rbi->len, 1344 - PCI_DMA_FROMDEVICE); 1345 - if (dma_mapping_error(&adapter->pdev->dev, 1346 - new_dma_addr)) { 1347 - dev_kfree_skb(new_skb); 1348 - /* Skb allocation failed, do not handover this 1349 - * skb to stack. Reuse it. Drop the existing pkt 1350 - */ 1351 - rq->stats.rx_buf_alloc_failure++; 1352 - ctx->skb = NULL; 1353 - rq->stats.drop_total++; 1354 - skip_page_frags = true; 1355 - goto rcd_done; 1356 - } 1357 1333 1358 - dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr, 1359 - rbi->len, 1360 - PCI_DMA_FROMDEVICE); 1334 + if (rxDataRingUsed) { 1335 + size_t sz; 1336 + 1337 + BUG_ON(rcd->len > rq->data_ring.desc_size); 1338 + 1339 + ctx->skb = new_skb; 1340 + sz = rcd->rxdIdx * rq->data_ring.desc_size; 1341 + memcpy(new_skb->data, 1342 + &rq->data_ring.base[sz], rcd->len); 1343 + } else { 1344 + ctx->skb = rbi->skb; 1345 + 1346 + new_dma_addr = 1347 + dma_map_single(&adapter->pdev->dev, 1348 + new_skb->data, rbi->len, 1349 + PCI_DMA_FROMDEVICE); 1350 + if (dma_mapping_error(&adapter->pdev->dev, 1351 + new_dma_addr)) { 1352 + dev_kfree_skb(new_skb); 1353 + /* Skb allocation failed, do not 1354 + * handover this skb to stack. Reuse 1355 + * it. Drop the existing pkt. 1356 + */ 1357 + rq->stats.rx_buf_alloc_failure++; 1358 + ctx->skb = NULL; 1359 + rq->stats.drop_total++; 1360 + skip_page_frags = true; 1361 + goto rcd_done; 1362 + } 1363 + 1364 + dma_unmap_single(&adapter->pdev->dev, 1365 + rbi->dma_addr, 1366 + rbi->len, 1367 + PCI_DMA_FROMDEVICE); 1368 + 1369 + /* Immediate refill */ 1370 + rbi->skb = new_skb; 1371 + rbi->dma_addr = new_dma_addr; 1372 + rxd->addr = cpu_to_le64(rbi->dma_addr); 1373 + rxd->len = rbi->len; 1374 + } 1361 1375 1362 1376 #ifdef VMXNET3_RSS 1363 1377 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && ··· 1391 1359 #endif 1392 1360 skb_put(ctx->skb, rcd->len); 1393 1361 1394 - /* Immediate refill */ 1395 - rbi->skb = new_skb; 1396 - rbi->dma_addr = new_dma_addr; 1397 - rxd->addr = cpu_to_le64(rbi->dma_addr); 1398 - rxd->len = rbi->len; 1399 1362 if (VMXNET3_VERSION_GE_2(adapter) && 1400 1363 rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) { 1401 1364 struct Vmxnet3_RxCompDescExt *rcdlro; ··· 1617 1590 rq->buf_info[i] = NULL; 1618 1591 } 1619 1592 1593 + if (rq->data_ring.base) { 1594 + dma_free_coherent(&adapter->pdev->dev, 1595 + rq->rx_ring[0].size * rq->data_ring.desc_size, 1596 + rq->data_ring.base, rq->data_ring.basePA); 1597 + rq->data_ring.base = NULL; 1598 + } 1599 + 1620 1600 if (rq->comp_ring.base) { 1621 1601 dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size 1622 1602 * sizeof(struct Vmxnet3_RxCompDesc), ··· 1639 1605 } 1640 1606 } 1641 1607 1608 + void 1609 + vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter) 1610 + { 1611 + int i; 1612 + 1613 + for (i = 0; i < adapter->num_rx_queues; i++) { 1614 + struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i]; 1615 + 1616 + if (rq->data_ring.base) { 1617 + dma_free_coherent(&adapter->pdev->dev, 1618 + (rq->rx_ring[0].size * 1619 + rq->data_ring.desc_size), 1620 + rq->data_ring.base, 1621 + rq->data_ring.basePA); 1622 + rq->data_ring.base = NULL; 1623 + rq->data_ring.desc_size = 0; 1624 + } 1625 + } 1626 + } 1642 1627 1643 1628 static int 1644 1629 vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, ··· 1751 1698 } 1752 1699 } 1753 1700 1701 + if ((adapter->rxdataring_enabled) && (rq->data_ring.desc_size != 0)) { 1702 + sz = rq->rx_ring[0].size * rq->data_ring.desc_size; 1703 + rq->data_ring.base = 1704 + dma_alloc_coherent(&adapter->pdev->dev, sz, 1705 + &rq->data_ring.basePA, 1706 + GFP_KERNEL); 1707 + if (!rq->data_ring.base) { 1708 + netdev_err(adapter->netdev, 1709 + "rx data ring will be disabled\n"); 1710 + adapter->rxdataring_enabled = false; 1711 + } 1712 + } else { 1713 + rq->data_ring.base = NULL; 1714 + rq->data_ring.desc_size = 0; 1715 + } 1716 + 1754 1717 sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc); 1755 1718 rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz, 1756 1719 &rq->comp_ring.basePA, ··· 1799 1730 { 1800 1731 int i, err = 0; 1801 1732 1733 + adapter->rxdataring_enabled = VMXNET3_VERSION_GE_3(adapter); 1734 + 1802 1735 for (i = 0; i < adapter->num_rx_queues; i++) { 1803 1736 err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter); 1804 1737 if (unlikely(err)) { ··· 1810 1739 goto err_out; 1811 1740 } 1812 1741 } 1742 + 1743 + if (!adapter->rxdataring_enabled) 1744 + vmxnet3_rq_destroy_all_rxdataring(adapter); 1745 + 1813 1746 return err; 1814 1747 err_out: 1815 1748 vmxnet3_rq_destroy_all(adapter); ··· 2121 2046 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i]; 2122 2047 rq->qid = i; 2123 2048 rq->qid2 = i + adapter->num_rx_queues; 2049 + rq->dataRingQid = i + 2 * adapter->num_rx_queues; 2124 2050 } 2125 - 2126 - 2127 2051 2128 2052 /* init our intr settings */ 2129 2053 for (i = 0; i < intr->num_intrs; i++) ··· 2436 2362 (rqc->rxRingSize[0] + 2437 2363 rqc->rxRingSize[1])); 2438 2364 rqc->intrIdx = rq->comp_ring.intr_idx; 2365 + if (VMXNET3_VERSION_GE_3(adapter)) { 2366 + rqc->rxDataRingBasePA = 2367 + cpu_to_le64(rq->data_ring.basePA); 2368 + rqc->rxDataRingDescSize = 2369 + cpu_to_le16(rq->data_ring.desc_size); 2370 + } 2439 2371 } 2440 2372 2441 2373 #ifdef VMXNET3_RSS ··· 2772 2692 int 2773 2693 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size, 2774 2694 u32 rx_ring_size, u32 rx_ring2_size, 2775 - u16 txdata_desc_size) 2695 + u16 txdata_desc_size, u16 rxdata_desc_size) 2776 2696 { 2777 2697 int err = 0, i; 2778 2698 ··· 2798 2718 adapter->rx_queue[0].rx_ring[0].size = rx_ring_size; 2799 2719 adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size; 2800 2720 vmxnet3_adjust_rx_ring_size(adapter); 2721 + 2722 + adapter->rxdataring_enabled = VMXNET3_VERSION_GE_3(adapter); 2801 2723 for (i = 0; i < adapter->num_rx_queues; i++) { 2802 2724 struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i]; 2803 2725 /* qid and qid2 for rx queues will be assigned later when num 2804 2726 * of rx queues is finalized after allocating intrs */ 2805 2727 rq->shared = &adapter->rqd_start[i].ctrl; 2806 2728 rq->adapter = adapter; 2729 + rq->data_ring.desc_size = rxdata_desc_size; 2807 2730 err = vmxnet3_rq_create(rq, adapter); 2808 2731 if (err) { 2809 2732 if (i == 0) { ··· 2824 2741 } 2825 2742 } 2826 2743 } 2744 + 2745 + if (!adapter->rxdataring_enabled) 2746 + vmxnet3_rq_destroy_all_rxdataring(adapter); 2747 + 2827 2748 return err; 2828 2749 queue_err: 2829 2750 vmxnet3_tq_destroy_all(adapter); ··· 2872 2785 adapter->tx_ring_size, 2873 2786 adapter->rx_ring_size, 2874 2787 adapter->rx_ring2_size, 2875 - adapter->txdata_desc_size); 2788 + adapter->txdata_desc_size, 2789 + adapter->rxdata_desc_size); 2876 2790 if (err) 2877 2791 goto queue_err; 2878 2792 ··· 3347 3259 3348 3260 SET_NETDEV_DEV(netdev, &pdev->dev); 3349 3261 vmxnet3_declare_features(adapter, dma64); 3262 + 3263 + adapter->rxdata_desc_size = VMXNET3_VERSION_GE_3(adapter) ? 3264 + VMXNET3_DEF_RXDATA_DESC_SIZE : 0; 3350 3265 3351 3266 if (adapter->num_tx_queues == adapter->num_rx_queues) 3352 3267 adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
+37 -11
drivers/net/vmxnet3/vmxnet3_ethtool.c
··· 430 430 buf[j++] = rq->rx_ring[1].next2comp; 431 431 buf[j++] = rq->rx_ring[1].gen; 432 432 433 - /* receive data ring */ 434 - buf[j++] = 0; 435 - buf[j++] = 0; 436 - buf[j++] = 0; 437 - buf[j++] = 0; 433 + buf[j++] = VMXNET3_GET_ADDR_LO(rq->data_ring.basePA); 434 + buf[j++] = VMXNET3_GET_ADDR_HI(rq->data_ring.basePA); 435 + buf[j++] = rq->rx_ring[0].size; 436 + buf[j++] = rq->data_ring.desc_size; 438 437 439 438 buf[j++] = VMXNET3_GET_ADDR_LO(rq->comp_ring.basePA); 440 439 buf[j++] = VMXNET3_GET_ADDR_HI(rq->comp_ring.basePA); ··· 502 503 503 504 param->rx_max_pending = VMXNET3_RX_RING_MAX_SIZE; 504 505 param->tx_max_pending = VMXNET3_TX_RING_MAX_SIZE; 505 - param->rx_mini_max_pending = 0; 506 + param->rx_mini_max_pending = VMXNET3_VERSION_GE_3(adapter) ? 507 + VMXNET3_RXDATA_DESC_MAX_SIZE : 0; 506 508 param->rx_jumbo_max_pending = VMXNET3_RX_RING2_MAX_SIZE; 507 509 508 510 param->rx_pending = adapter->rx_ring_size; 509 511 param->tx_pending = adapter->tx_ring_size; 510 - param->rx_mini_pending = 0; 512 + param->rx_mini_pending = VMXNET3_VERSION_GE_3(adapter) ? 513 + adapter->rxdata_desc_size : 0; 511 514 param->rx_jumbo_pending = adapter->rx_ring2_size; 512 515 } 513 516 ··· 520 519 { 521 520 struct vmxnet3_adapter *adapter = netdev_priv(netdev); 522 521 u32 new_tx_ring_size, new_rx_ring_size, new_rx_ring2_size; 522 + u16 new_rxdata_desc_size; 523 523 u32 sz; 524 524 int err = 0; 525 525 ··· 541 539 netdev_err(netdev, "adapter not completely initialized, " 542 540 "ring size cannot be changed yet\n"); 543 541 return -EOPNOTSUPP; 542 + } 543 + 544 + if (VMXNET3_VERSION_GE_3(adapter)) { 545 + if (param->rx_mini_pending < 0 || 546 + param->rx_mini_pending > VMXNET3_RXDATA_DESC_MAX_SIZE) { 547 + return -EINVAL; 548 + } 549 + } else if (param->rx_mini_pending != 0) { 550 + return -EINVAL; 544 551 } 545 552 546 553 /* round it up to a multiple of VMXNET3_RING_SIZE_ALIGN */ ··· 578 567 new_rx_ring2_size = min_t(u32, new_rx_ring2_size, 579 568 VMXNET3_RX_RING2_MAX_SIZE); 580 569 570 + /* rx data ring buffer size has to be a multiple of 571 + * VMXNET3_RXDATA_DESC_SIZE_ALIGN 572 + */ 573 + new_rxdata_desc_size = 574 + (param->rx_mini_pending + VMXNET3_RXDATA_DESC_SIZE_MASK) & 575 + ~VMXNET3_RXDATA_DESC_SIZE_MASK; 576 + new_rxdata_desc_size = min_t(u16, new_rxdata_desc_size, 577 + VMXNET3_RXDATA_DESC_MAX_SIZE); 578 + 581 579 if (new_tx_ring_size == adapter->tx_ring_size && 582 580 new_rx_ring_size == adapter->rx_ring_size && 583 - new_rx_ring2_size == adapter->rx_ring2_size) { 581 + new_rx_ring2_size == adapter->rx_ring2_size && 582 + new_rxdata_desc_size == adapter->rxdata_desc_size) { 584 583 return 0; 585 584 } 586 585 ··· 612 591 613 592 err = vmxnet3_create_queues(adapter, new_tx_ring_size, 614 593 new_rx_ring_size, new_rx_ring2_size, 615 - adapter->txdata_desc_size); 616 - 594 + adapter->txdata_desc_size, 595 + new_rxdata_desc_size); 617 596 if (err) { 618 597 /* failed, most likely because of OOM, try default 619 598 * size */ ··· 622 601 new_rx_ring_size = VMXNET3_DEF_RX_RING_SIZE; 623 602 new_rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE; 624 603 new_tx_ring_size = VMXNET3_DEF_TX_RING_SIZE; 604 + new_rxdata_desc_size = VMXNET3_VERSION_GE_3(adapter) ? 605 + VMXNET3_DEF_RXDATA_DESC_SIZE : 0; 606 + 625 607 err = vmxnet3_create_queues(adapter, 626 608 new_tx_ring_size, 627 609 new_rx_ring_size, 628 610 new_rx_ring2_size, 629 - adapter->txdata_desc_size); 611 + adapter->txdata_desc_size, 612 + new_rxdata_desc_size); 630 613 if (err) { 631 614 netdev_err(netdev, "failed to create queues " 632 615 "with default sizes. Closing it\n"); ··· 646 621 adapter->tx_ring_size = new_tx_ring_size; 647 622 adapter->rx_ring_size = new_rx_ring_size; 648 623 adapter->rx_ring2_size = new_rx_ring2_size; 624 + adapter->rxdata_desc_size = new_rxdata_desc_size; 649 625 650 626 out: 651 627 clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
+22 -1
drivers/net/vmxnet3/vmxnet3_int.h
··· 272 272 u64 rx_buf_alloc_failure; 273 273 }; 274 274 275 + struct vmxnet3_rx_data_ring { 276 + Vmxnet3_RxDataDesc *base; 277 + dma_addr_t basePA; 278 + u16 desc_size; 279 + }; 280 + 275 281 struct vmxnet3_rx_queue { 276 282 char name[IFNAMSIZ + 8]; /* To identify interrupt */ 277 283 struct vmxnet3_adapter *adapter; 278 284 struct napi_struct napi; 279 285 struct vmxnet3_cmd_ring rx_ring[2]; 286 + struct vmxnet3_rx_data_ring data_ring; 280 287 struct vmxnet3_comp_ring comp_ring; 281 288 struct vmxnet3_rx_ctx rx_ctx; 282 289 u32 qid; /* rqID in RCD for buffer from 1st ring */ 283 290 u32 qid2; /* rqID in RCD for buffer from 2nd ring */ 291 + u32 dataRingQid; /* rqID in RCD for buffer from data ring */ 284 292 struct vmxnet3_rx_buf_info *buf_info[2]; 285 293 dma_addr_t buf_info_pa; 286 294 struct Vmxnet3_RxQueueCtrl *shared; ··· 374 366 375 367 /* Size of buffer in the data ring */ 376 368 u16 txdata_desc_size; 369 + u16 rxdata_desc_size; 370 + 371 + bool rxdataring_enabled; 377 372 378 373 struct work_struct work; 379 374 ··· 416 405 #define VMXNET3_DEF_RX_RING_SIZE 256 417 406 #define VMXNET3_DEF_RX_RING2_SIZE 128 418 407 408 + #define VMXNET3_DEF_RXDATA_DESC_SIZE 128 409 + 419 410 #define VMXNET3_MAX_ETH_HDR_SIZE 22 420 411 #define VMXNET3_MAX_SKB_BUF_SIZE (3*1024) 412 + 413 + #define VMXNET3_GET_RING_IDX(adapter, rqID) \ 414 + ((rqID >= adapter->num_rx_queues && \ 415 + rqID < 2 * adapter->num_rx_queues) ? 1 : 0) \ 416 + 417 + #define VMXNET3_RX_DATA_RING(adapter, rqID) \ 418 + (rqID >= 2 * adapter->num_rx_queues && \ 419 + rqID < 3 * adapter->num_rx_queues) \ 421 420 422 421 int 423 422 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter); ··· 453 432 int 454 433 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, 455 434 u32 tx_ring_size, u32 rx_ring_size, u32 rx_ring2_size, 456 - u16 txdata_desc_size); 435 + u16 txdata_desc_size, u16 rxdata_desc_size); 457 436 458 437 void vmxnet3_set_ethtool_ops(struct net_device *netdev); 459 438