Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'fec-XDP_TX'

Wei Fang says:

====================
net: fec: add XDP_TX feature support

This patch set is to support the XDP_TX feature of FEC driver, the first
patch is add initial XDP_TX support, and the second patch improves the
performance of XDP_TX by not using xdp_convert_buff_to_frame(). Please
refer to the commit message of each patch for more details.
====================

Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

+134 -63
+2 -4
drivers/net/ethernet/freescale/fec.h
··· 548 548 enum fec_txbuf_type { 549 549 FEC_TXBUF_T_SKB, 550 550 FEC_TXBUF_T_XDP_NDO, 551 + FEC_TXBUF_T_XDP_TX, 551 552 }; 552 553 553 554 struct fec_tx_buffer { 554 - union { 555 - struct sk_buff *skb; 556 - struct xdp_frame *xdp; 557 - }; 555 + void *buf_p; 558 556 enum fec_txbuf_type type; 559 557 }; 560 558
+132 -59
drivers/net/ethernet/freescale/fec_main.c
··· 69 69 #include <soc/imx/cpuidle.h> 70 70 #include <linux/filter.h> 71 71 #include <linux/bpf.h> 72 + #include <linux/bpf_trace.h> 72 73 73 74 #include <asm/cacheflush.h> 74 75 ··· 77 76 78 77 static void set_multicast_list(struct net_device *ndev); 79 78 static void fec_enet_itr_coal_set(struct net_device *ndev); 79 + static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep, 80 + int cpu, struct xdp_buff *xdp, 81 + u32 dma_sync_len); 80 82 81 83 #define DRIVER_NAME "fec" 82 84 ··· 400 396 fec16_to_cpu(bdp->cbd_sc), 401 397 fec32_to_cpu(bdp->cbd_bufaddr), 402 398 fec16_to_cpu(bdp->cbd_datlen), 403 - txq->tx_buf[index].skb); 399 + txq->tx_buf[index].buf_p); 404 400 bdp = fec_enet_get_nextdesc(bdp, &txq->bd); 405 401 index++; 406 402 } while (bdp != txq->bd.base); ··· 657 653 658 654 index = fec_enet_get_bd_index(last_bdp, &txq->bd); 659 655 /* Save skb pointer */ 660 - txq->tx_buf[index].skb = skb; 656 + txq->tx_buf[index].buf_p = skb; 661 657 662 658 /* Make sure the updates to rest of the descriptor are performed before 663 659 * transferring ownership. ··· 863 859 } 864 860 865 861 /* Save skb pointer */ 866 - txq->tx_buf[index].skb = skb; 862 + txq->tx_buf[index].buf_p = skb; 867 863 868 864 skb_tx_timestamp(skb); 869 865 txq->bd.cur = bdp; ··· 960 956 fec32_to_cpu(bdp->cbd_bufaddr), 961 957 fec16_to_cpu(bdp->cbd_datlen), 962 958 DMA_TO_DEVICE); 963 - if (txq->tx_buf[i].skb) { 964 - dev_kfree_skb_any(txq->tx_buf[i].skb); 965 - txq->tx_buf[i].skb = NULL; 966 - } 967 - } else { 959 + if (txq->tx_buf[i].buf_p) 960 + dev_kfree_skb_any(txq->tx_buf[i].buf_p); 961 + } else if (txq->tx_buf[i].type == FEC_TXBUF_T_XDP_NDO) { 968 962 if (bdp->cbd_bufaddr) 969 963 dma_unmap_single(&fep->pdev->dev, 970 964 fec32_to_cpu(bdp->cbd_bufaddr), 971 965 fec16_to_cpu(bdp->cbd_datlen), 972 966 DMA_TO_DEVICE); 973 967 974 - if (txq->tx_buf[i].xdp) { 975 - xdp_return_frame(txq->tx_buf[i].xdp); 976 - txq->tx_buf[i].xdp = NULL; 977 - } 968 + if (txq->tx_buf[i].buf_p) 969 + xdp_return_frame(txq->tx_buf[i].buf_p); 970 + } else { 971 + struct page *page = txq->tx_buf[i].buf_p; 978 972 979 - /* restore default tx buffer type: FEC_TXBUF_T_SKB */ 980 - txq->tx_buf[i].type = FEC_TXBUF_T_SKB; 973 + if (page) 974 + page_pool_put_page(page->pp, page, 0, false); 981 975 } 982 976 977 + txq->tx_buf[i].buf_p = NULL; 978 + /* restore default tx buffer type: FEC_TXBUF_T_SKB */ 979 + txq->tx_buf[i].type = FEC_TXBUF_T_SKB; 983 980 bdp->cbd_bufaddr = cpu_to_fec32(0); 984 981 bdp = fec_enet_get_nextdesc(bdp, &txq->bd); 985 982 } ··· 1387 1382 struct netdev_queue *nq; 1388 1383 int index = 0; 1389 1384 int entries_free; 1385 + struct page *page; 1386 + int frame_len; 1390 1387 1391 1388 fep = netdev_priv(ndev); 1392 1389 ··· 1410 1403 index = fec_enet_get_bd_index(bdp, &txq->bd); 1411 1404 1412 1405 if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { 1413 - skb = txq->tx_buf[index].skb; 1414 - txq->tx_buf[index].skb = NULL; 1406 + skb = txq->tx_buf[index].buf_p; 1415 1407 if (bdp->cbd_bufaddr && 1416 1408 !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) 1417 1409 dma_unmap_single(&fep->pdev->dev, ··· 1429 1423 if (unlikely(!budget)) 1430 1424 break; 1431 1425 1432 - xdpf = txq->tx_buf[index].xdp; 1433 - if (bdp->cbd_bufaddr) 1434 - dma_unmap_single(&fep->pdev->dev, 1435 - fec32_to_cpu(bdp->cbd_bufaddr), 1436 - fec16_to_cpu(bdp->cbd_datlen), 1437 - DMA_TO_DEVICE); 1426 + if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO) { 1427 + xdpf = txq->tx_buf[index].buf_p; 1428 + if (bdp->cbd_bufaddr) 1429 + dma_unmap_single(&fep->pdev->dev, 1430 + fec32_to_cpu(bdp->cbd_bufaddr), 1431 + fec16_to_cpu(bdp->cbd_datlen), 1432 + DMA_TO_DEVICE); 1433 + } else { 1434 + page = txq->tx_buf[index].buf_p; 1435 + } 1436 + 1438 1437 bdp->cbd_bufaddr = cpu_to_fec32(0); 1439 - if (!xdpf) { 1438 + if (unlikely(!txq->tx_buf[index].buf_p)) { 1440 1439 txq->tx_buf[index].type = FEC_TXBUF_T_SKB; 1441 1440 goto tx_buf_done; 1442 1441 } 1442 + 1443 + frame_len = fec16_to_cpu(bdp->cbd_datlen); 1443 1444 } 1444 1445 1445 1446 /* Check for errors. */ ··· 1470 1457 if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) 1471 1458 ndev->stats.tx_bytes += skb->len; 1472 1459 else 1473 - ndev->stats.tx_bytes += xdpf->len; 1460 + ndev->stats.tx_bytes += frame_len; 1474 1461 } 1475 1462 1476 1463 /* Deferred means some collisions occurred during transmit, ··· 1495 1482 1496 1483 /* Free the sk buffer associated with this last transmit */ 1497 1484 dev_kfree_skb_any(skb); 1498 - } else { 1499 - xdp_return_frame(xdpf); 1500 - 1501 - txq->tx_buf[index].xdp = NULL; 1502 - /* restore default tx buffer type: FEC_TXBUF_T_SKB */ 1503 - txq->tx_buf[index].type = FEC_TXBUF_T_SKB; 1485 + } else if (txq->tx_buf[index].type == FEC_TXBUF_T_XDP_NDO) { 1486 + xdp_return_frame_rx_napi(xdpf); 1487 + } else { /* recycle pages of XDP_TX frames */ 1488 + /* The dma_sync_size = 0 as XDP_TX has already synced DMA for_device */ 1489 + page_pool_put_page(page->pp, page, 0, true); 1504 1490 } 1491 + 1492 + txq->tx_buf[index].buf_p = NULL; 1493 + /* restore default tx buffer type: FEC_TXBUF_T_SKB */ 1494 + txq->tx_buf[index].type = FEC_TXBUF_T_SKB; 1505 1495 1506 1496 tx_buf_done: 1507 1497 /* Make sure the update to bdp and tx_buf are performed ··· 1558 1542 1559 1543 static u32 1560 1544 fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog, 1561 - struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int index) 1545 + struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int cpu) 1562 1546 { 1563 1547 unsigned int sync, len = xdp->data_end - xdp->data; 1564 1548 u32 ret = FEC_ENET_XDP_PASS; ··· 1568 1552 1569 1553 act = bpf_prog_run_xdp(prog, xdp); 1570 1554 1571 - /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ 1572 - sync = xdp->data_end - xdp->data_hard_start - FEC_ENET_XDP_HEADROOM; 1555 + /* Due xdp_adjust_tail and xdp_adjust_head: DMA sync for_device cover 1556 + * max len CPU touch 1557 + */ 1558 + sync = xdp->data_end - xdp->data; 1573 1559 sync = max(sync, len); 1574 1560 1575 1561 switch (act) { ··· 1592 1574 } 1593 1575 break; 1594 1576 1595 - default: 1596 - bpf_warn_invalid_xdp_action(fep->netdev, prog, act); 1597 - fallthrough; 1598 - 1599 1577 case XDP_TX: 1578 + err = fec_enet_xdp_tx_xmit(fep, cpu, xdp, sync); 1579 + if (unlikely(err)) { 1580 + ret = FEC_ENET_XDP_CONSUMED; 1581 + page = virt_to_head_page(xdp->data); 1582 + page_pool_put_page(rxq->page_pool, page, sync, true); 1583 + trace_xdp_exception(fep->netdev, prog, act); 1584 + } else { 1585 + ret = FEC_ENET_XDP_TX; 1586 + } 1587 + break; 1588 + 1589 + default: 1600 1590 bpf_warn_invalid_xdp_action(fep->netdev, prog, act); 1601 1591 fallthrough; 1602 1592 ··· 1646 1620 struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog); 1647 1621 u32 ret, xdp_result = FEC_ENET_XDP_PASS; 1648 1622 u32 data_start = FEC_ENET_XDP_HEADROOM; 1623 + int cpu = smp_processor_id(); 1649 1624 struct xdp_buff xdp; 1650 1625 struct page *page; 1651 1626 u32 sub_len = 4; ··· 1725 1698 /* subtract 16bit shift and FCS */ 1726 1699 xdp_prepare_buff(&xdp, page_address(page), 1727 1700 data_start, pkt_len - sub_len, false); 1728 - ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, index); 1701 + ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, cpu); 1729 1702 xdp_result |= ret; 1730 1703 if (ret != FEC_ENET_XDP_PASS) 1731 1704 goto rx_processing_done; ··· 3235 3208 { 3236 3209 struct fec_enet_private *fep = netdev_priv(ndev); 3237 3210 unsigned int i; 3238 - struct sk_buff *skb; 3239 3211 struct fec_enet_priv_tx_q *txq; 3240 3212 struct fec_enet_priv_rx_q *rxq; 3241 3213 unsigned int q; ··· 3259 3233 kfree(txq->tx_bounce[i]); 3260 3234 txq->tx_bounce[i] = NULL; 3261 3235 3262 - if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { 3263 - skb = txq->tx_buf[i].skb; 3264 - txq->tx_buf[i].skb = NULL; 3265 - dev_kfree_skb(skb); 3266 - } else { 3267 - if (txq->tx_buf[i].xdp) { 3268 - xdp_return_frame(txq->tx_buf[i].xdp); 3269 - txq->tx_buf[i].xdp = NULL; 3270 - } 3271 - 3236 + if (!txq->tx_buf[i].buf_p) { 3272 3237 txq->tx_buf[i].type = FEC_TXBUF_T_SKB; 3238 + continue; 3273 3239 } 3240 + 3241 + if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { 3242 + dev_kfree_skb(txq->tx_buf[i].buf_p); 3243 + } else if (txq->tx_buf[i].type == FEC_TXBUF_T_XDP_NDO) { 3244 + xdp_return_frame(txq->tx_buf[i].buf_p); 3245 + } else { 3246 + struct page *page = txq->tx_buf[i].buf_p; 3247 + 3248 + page_pool_put_page(page->pp, page, 0, false); 3249 + } 3250 + 3251 + txq->tx_buf[i].buf_p = NULL; 3252 + txq->tx_buf[i].type = FEC_TXBUF_T_SKB; 3274 3253 } 3275 3254 } 3276 3255 } ··· 3798 3767 3799 3768 static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, 3800 3769 struct fec_enet_priv_tx_q *txq, 3801 - struct xdp_frame *frame) 3770 + void *frame, u32 dma_sync_len, 3771 + bool ndo_xmit) 3802 3772 { 3803 3773 unsigned int index, status, estatus; 3804 3774 struct bufdesc *bdp; 3805 3775 dma_addr_t dma_addr; 3806 3776 int entries_free; 3777 + u16 frame_len; 3807 3778 3808 3779 entries_free = fec_enet_get_free_txdesc_num(txq); 3809 3780 if (entries_free < MAX_SKB_FRAGS + 1) { ··· 3820 3787 3821 3788 index = fec_enet_get_bd_index(bdp, &txq->bd); 3822 3789 3823 - dma_addr = dma_map_single(&fep->pdev->dev, frame->data, 3824 - frame->len, DMA_TO_DEVICE); 3825 - if (dma_mapping_error(&fep->pdev->dev, dma_addr)) 3826 - return -ENOMEM; 3790 + if (ndo_xmit) { 3791 + struct xdp_frame *xdpf = frame; 3792 + 3793 + dma_addr = dma_map_single(&fep->pdev->dev, xdpf->data, 3794 + xdpf->len, DMA_TO_DEVICE); 3795 + if (dma_mapping_error(&fep->pdev->dev, dma_addr)) 3796 + return -ENOMEM; 3797 + 3798 + frame_len = xdpf->len; 3799 + txq->tx_buf[index].buf_p = xdpf; 3800 + txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO; 3801 + } else { 3802 + struct xdp_buff *xdpb = frame; 3803 + struct page *page; 3804 + 3805 + page = virt_to_page(xdpb->data); 3806 + dma_addr = page_pool_get_dma_addr(page) + 3807 + (xdpb->data - xdpb->data_hard_start); 3808 + dma_sync_single_for_device(&fep->pdev->dev, dma_addr, 3809 + dma_sync_len, DMA_BIDIRECTIONAL); 3810 + frame_len = xdpb->data_end - xdpb->data; 3811 + txq->tx_buf[index].buf_p = page; 3812 + txq->tx_buf[index].type = FEC_TXBUF_T_XDP_TX; 3813 + } 3827 3814 3828 3815 status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST); 3829 3816 if (fep->bufdesc_ex) 3830 3817 estatus = BD_ENET_TX_INT; 3831 3818 3832 3819 bdp->cbd_bufaddr = cpu_to_fec32(dma_addr); 3833 - bdp->cbd_datlen = cpu_to_fec16(frame->len); 3820 + bdp->cbd_datlen = cpu_to_fec16(frame_len); 3834 3821 3835 3822 if (fep->bufdesc_ex) { 3836 3823 struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; ··· 3861 3808 ebdp->cbd_bdu = 0; 3862 3809 ebdp->cbd_esc = cpu_to_fec32(estatus); 3863 3810 } 3864 - 3865 - txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO; 3866 - txq->tx_buf[index].xdp = frame; 3867 3811 3868 3812 /* Make sure the updates to rest of the descriptor are performed before 3869 3813 * transferring ownership. ··· 3887 3837 return 0; 3888 3838 } 3889 3839 3840 + static int fec_enet_xdp_tx_xmit(struct fec_enet_private *fep, 3841 + int cpu, struct xdp_buff *xdp, 3842 + u32 dma_sync_len) 3843 + { 3844 + struct fec_enet_priv_tx_q *txq; 3845 + struct netdev_queue *nq; 3846 + int queue, ret; 3847 + 3848 + queue = fec_enet_xdp_get_tx_queue(fep, cpu); 3849 + txq = fep->tx_queue[queue]; 3850 + nq = netdev_get_tx_queue(fep->netdev, queue); 3851 + 3852 + __netif_tx_lock(nq, cpu); 3853 + 3854 + /* Avoid tx timeout as XDP shares the queue with kernel stack */ 3855 + txq_trans_cond_update(nq); 3856 + ret = fec_enet_txq_xmit_frame(fep, txq, xdp, dma_sync_len, false); 3857 + 3858 + __netif_tx_unlock(nq); 3859 + 3860 + return ret; 3861 + } 3862 + 3890 3863 static int fec_enet_xdp_xmit(struct net_device *dev, 3891 3864 int num_frames, 3892 3865 struct xdp_frame **frames, ··· 3932 3859 /* Avoid tx timeout as XDP shares the queue with kernel stack */ 3933 3860 txq_trans_cond_update(nq); 3934 3861 for (i = 0; i < num_frames; i++) { 3935 - if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0) 3862 + if (fec_enet_txq_xmit_frame(fep, txq, frames[i], 0, true) < 0) 3936 3863 break; 3937 3864 sent_frames++; 3938 3865 }