Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lan966x: Don't use xdp_frame when action is XDP_TX

When the action of an xdp program was XDP_TX, lan966x was creating
a xdp_frame and use this one to send the frame back. But it is also
possible to send back the frame without needing a xdp_frame, because
it is possible to send it back using the page.
And then once the frame is transmitted is possible to use directly
page_pool_recycle_direct as lan966x is using page pools.
This would save some CPU usage on this path, which results in higher
number of transmitted frames. Bellow are the statistics:
Frame size: Improvement:
64 ~8%
256 ~11%
512 ~8%
1000 ~0%
1500 ~0%

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/r/20230422142344.3630602-1-horatiu.vultur@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Horatiu Vultur and committed by
Jakub Kicinski
700f11eb ee3392ed

+28 -23
+23 -12
drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
··· 390 390 static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight) 391 391 { 392 392 struct lan966x_tx *tx = &lan966x->tx; 393 + struct lan966x_rx *rx = &lan966x->rx; 393 394 struct lan966x_tx_dcb_buf *dcb_buf; 394 395 struct xdp_frame_bulk bq; 395 396 struct lan966x_db *db; ··· 433 432 if (dcb_buf->xdp_ndo) 434 433 xdp_return_frame_bulk(dcb_buf->data.xdpf, &bq); 435 434 else 436 - xdp_return_frame_rx_napi(dcb_buf->data.xdpf); 435 + page_pool_recycle_direct(rx->page_pool, 436 + dcb_buf->data.page); 437 437 } 438 438 439 439 clear = true; ··· 701 699 tx->last_in_use = next_to_use; 702 700 } 703 701 704 - int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, 705 - struct xdp_frame *xdpf, 706 - struct page *page, 707 - bool dma_map) 702 + int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, void *ptr, u32 len) 708 703 { 709 704 struct lan966x *lan966x = port->lan966x; 710 705 struct lan966x_tx_dcb_buf *next_dcb_buf; 711 706 struct lan966x_tx *tx = &lan966x->tx; 707 + struct xdp_frame *xdpf; 712 708 dma_addr_t dma_addr; 709 + struct page *page; 713 710 int next_to_use; 714 711 __be32 *ifh; 715 712 int ret = 0; ··· 723 722 goto out; 724 723 } 725 724 725 + /* Get the next buffer */ 726 + next_dcb_buf = &tx->dcbs_buf[next_to_use]; 727 + 726 728 /* Generate new IFH */ 727 - if (dma_map) { 729 + if (!len) { 730 + xdpf = ptr; 731 + 728 732 if (xdpf->headroom < IFH_LEN_BYTES) { 729 733 ret = NETDEV_TX_OK; 730 734 goto out; ··· 749 743 goto out; 750 744 } 751 745 746 + next_dcb_buf->data.xdpf = xdpf; 747 + next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES; 748 + 752 749 /* Setup next dcb */ 753 750 lan966x_fdma_tx_setup_dcb(tx, next_to_use, 754 751 xdpf->len + IFH_LEN_BYTES, 755 752 dma_addr); 756 753 } else { 754 + page = ptr; 755 + 757 756 ifh = page_address(page) + XDP_PACKET_HEADROOM; 758 757 memset(ifh, 0x0, sizeof(__be32) * IFH_LEN); 759 758 lan966x_ifh_set_bypass(ifh, 1); ··· 767 756 dma_addr = page_pool_get_dma_addr(page); 768 757 dma_sync_single_for_device(lan966x->dev, 769 758 dma_addr + XDP_PACKET_HEADROOM, 770 - xdpf->len + IFH_LEN_BYTES, 759 + len + IFH_LEN_BYTES, 771 760 DMA_TO_DEVICE); 761 + 762 + next_dcb_buf->data.page = page; 763 + next_dcb_buf->len = len + IFH_LEN_BYTES; 772 764 773 765 /* Setup next dcb */ 774 766 lan966x_fdma_tx_setup_dcb(tx, next_to_use, 775 - xdpf->len + IFH_LEN_BYTES, 767 + len + IFH_LEN_BYTES, 776 768 dma_addr + XDP_PACKET_HEADROOM); 777 769 } 778 770 779 771 /* Fill up the buffer */ 780 - next_dcb_buf = &tx->dcbs_buf[next_to_use]; 781 772 next_dcb_buf->use_skb = false; 782 - next_dcb_buf->data.xdpf = xdpf; 783 - next_dcb_buf->xdp_ndo = dma_map; 784 - next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES; 773 + next_dcb_buf->xdp_ndo = !len; 785 774 next_dcb_buf->dma_addr = dma_addr; 786 775 next_dcb_buf->used = true; 787 776 next_dcb_buf->ptp = false;
+2 -4
drivers/net/ethernet/microchip/lan966x/lan966x_main.h
··· 243 243 union { 244 244 struct sk_buff *skb; 245 245 struct xdp_frame *xdpf; 246 + struct page *page; 246 247 } data; 247 248 u32 len; 248 249 u32 used : 1; ··· 542 541 int lan966x_ptp_del_traps(struct lan966x_port *port); 543 542 544 543 int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev); 545 - int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, 546 - struct xdp_frame *frame, 547 - struct page *page, 548 - bool dma_map); 544 + int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, void *ptr, u32 len); 549 545 int lan966x_fdma_change_mtu(struct lan966x *lan966x); 550 546 void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev); 551 547 void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev);
+3 -7
drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
··· 62 62 struct xdp_frame *xdpf = frames[i]; 63 63 int err; 64 64 65 - err = lan966x_fdma_xmit_xdpf(port, xdpf, NULL, true); 65 + err = lan966x_fdma_xmit_xdpf(port, xdpf, 0); 66 66 if (err) 67 67 break; 68 68 ··· 76 76 { 77 77 struct bpf_prog *xdp_prog = port->xdp_prog; 78 78 struct lan966x *lan966x = port->lan966x; 79 - struct xdp_frame *xdpf; 80 79 struct xdp_buff xdp; 81 80 u32 act; 82 81 ··· 89 90 case XDP_PASS: 90 91 return FDMA_PASS; 91 92 case XDP_TX: 92 - xdpf = xdp_convert_buff_to_frame(&xdp); 93 - if (!xdpf) 94 - return FDMA_DROP; 95 - 96 - return lan966x_fdma_xmit_xdpf(port, xdpf, page, false) ? 93 + return lan966x_fdma_xmit_xdpf(port, page, 94 + data_len - IFH_LEN_BYTES) ? 97 95 FDMA_DROP : FDMA_TX; 98 96 case XDP_REDIRECT: 99 97 if (xdp_do_redirect(port->dev, &xdp, xdp_prog))