Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'add-af_xdp-zero-copy-support'

Meghana Malladi says:

====================
Add AF_XDP zero copy support

This series adds AF_XDP zero coppy support to icssg driver.

Tests were performed on AM64x-EVM with xdpsock application [1].

A clear improvement is seen Transmit (txonly) and receive (rxdrop)
for 64 byte packets. 1500 byte test seems to be limited by line
rate (1G link) so no improvement seen there in packet rate

Having some issue with l2fwd as the benchmarking numbers show 0
for 64 byte packets after forwading first batch packets and I am
currently looking into it.

AF_XDP performance using 64 byte packets in Kpps.
AF_XDP performance using 64 byte packets in Kpps.
Benchmark: XDP-SKB XDP-Native XDP-Native(ZeroCopy)
rxdrop 253 473 656
txonly 350 354 855
l2fwd 178 240 0

AF_XDP performance using 1500 byte packets in Kpps.
Benchmark: XDP-SKB XDP-Native XDP-Native(ZeroCopy)
rxdrop 82 82 82
txonly 81 82 82
l2fwd 81 82 82

[1]: https://github.com/xdp-project/bpf-examples/tree/master/AF_XDP-example
v5: https://lore.kernel.org/all/20251111101523.3160680-1-m-malladi@ti.com/
====================

Link: https://patch.msgid.link/20251118135542.380574-1-m-malladi@ti.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+738 -148
+396 -71
drivers/net/ethernet/ti/icssg/icssg_common.c
··· 93 93 } 94 94 EXPORT_SYMBOL_GPL(prueth_ndev_del_tx_napi); 95 95 96 + static int emac_xsk_xmit_zc(struct prueth_emac *emac, 97 + unsigned int q_idx) 98 + { 99 + struct prueth_tx_chn *tx_chn = &emac->tx_chns[q_idx]; 100 + struct xsk_buff_pool *pool = tx_chn->xsk_pool; 101 + struct net_device *ndev = emac->ndev; 102 + struct cppi5_host_desc_t *host_desc; 103 + dma_addr_t dma_desc, dma_buf; 104 + struct prueth_swdata *swdata; 105 + struct xdp_desc xdp_desc; 106 + int num_tx = 0, pkt_len; 107 + int descs_avail, ret; 108 + u32 *epib; 109 + int i; 110 + 111 + descs_avail = k3_cppi_desc_pool_avail(tx_chn->desc_pool); 112 + /* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS 113 + * will be available for normal TX path and queue is stopped there if 114 + * necessary 115 + */ 116 + if (descs_avail <= MAX_SKB_FRAGS) 117 + return 0; 118 + 119 + descs_avail -= MAX_SKB_FRAGS; 120 + 121 + for (i = 0; i < descs_avail; i++) { 122 + if (!xsk_tx_peek_desc(pool, &xdp_desc)) 123 + break; 124 + 125 + dma_buf = xsk_buff_raw_get_dma(pool, xdp_desc.addr); 126 + pkt_len = xdp_desc.len; 127 + xsk_buff_raw_dma_sync_for_device(pool, dma_buf, pkt_len); 128 + 129 + host_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); 130 + if (unlikely(!host_desc)) 131 + break; 132 + 133 + cppi5_hdesc_init(host_desc, CPPI5_INFO0_HDESC_EPIB_PRESENT, 134 + PRUETH_NAV_PS_DATA_SIZE); 135 + cppi5_hdesc_set_pkttype(host_desc, 0); 136 + epib = host_desc->epib; 137 + epib[0] = 0; 138 + epib[1] = 0; 139 + cppi5_hdesc_set_pktlen(host_desc, pkt_len); 140 + cppi5_desc_set_tags_ids(&host_desc->hdr, 0, 141 + (emac->port_id | (q_idx << 8))); 142 + 143 + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &dma_buf); 144 + cppi5_hdesc_attach_buf(host_desc, dma_buf, pkt_len, dma_buf, 145 + pkt_len); 146 + 147 + swdata = cppi5_hdesc_get_swdata(host_desc); 148 + swdata->type = PRUETH_SWDATA_XSK; 149 + 150 + dma_desc = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, 151 + host_desc); 152 + ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, 153 + host_desc, dma_desc); 154 + 155 + if (ret) { 156 + ndev->stats.tx_errors++; 157 + k3_cppi_desc_pool_free(tx_chn->desc_pool, host_desc); 158 + break; 159 + } 160 + 161 + num_tx++; 162 + } 163 + 164 + xsk_tx_release(tx_chn->xsk_pool); 165 + return num_tx; 166 + } 167 + 96 168 void prueth_xmit_free(struct prueth_tx_chn *tx_chn, 97 169 struct cppi5_host_desc_t *desc) 98 170 { 99 171 struct cppi5_host_desc_t *first_desc, *next_desc; 100 172 dma_addr_t buf_dma, next_desc_dma; 173 + struct prueth_swdata *swdata; 101 174 u32 buf_dma_len; 102 175 103 176 first_desc = desc; 104 177 next_desc = first_desc; 178 + swdata = cppi5_hdesc_get_swdata(first_desc); 179 + if (swdata->type == PRUETH_SWDATA_XSK) 180 + goto free_pool; 105 181 106 182 cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); 107 183 k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); ··· 202 126 k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); 203 127 } 204 128 129 + free_pool: 205 130 k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); 206 131 } 207 132 EXPORT_SYMBOL_GPL(prueth_xmit_free); ··· 216 139 struct prueth_swdata *swdata; 217 140 struct prueth_tx_chn *tx_chn; 218 141 unsigned int total_bytes = 0; 142 + int xsk_frames_done = 0; 219 143 struct xdp_frame *xdpf; 144 + unsigned int pkt_len; 220 145 struct sk_buff *skb; 221 146 dma_addr_t desc_dma; 222 147 int res, num_tx = 0; ··· 255 176 total_bytes += xdpf->len; 256 177 xdp_return_frame(xdpf); 257 178 break; 179 + case PRUETH_SWDATA_XSK: 180 + pkt_len = cppi5_hdesc_get_pktlen(desc_tx); 181 + dev_sw_netstats_tx_add(ndev, 1, pkt_len); 182 + xsk_frames_done++; 183 + break; 258 184 default: 259 185 prueth_xmit_free(tx_chn, desc_tx); 260 186 ndev->stats.tx_dropped++; ··· 288 204 __netif_tx_unlock(netif_txq); 289 205 } 290 206 207 + if (tx_chn->xsk_pool) { 208 + if (xsk_frames_done) 209 + xsk_tx_completed(tx_chn->xsk_pool, xsk_frames_done); 210 + 211 + if (xsk_uses_need_wakeup(tx_chn->xsk_pool)) 212 + xsk_set_tx_need_wakeup(tx_chn->xsk_pool); 213 + 214 + netif_txq = netdev_get_tx_queue(ndev, chn); 215 + txq_trans_cond_update(netif_txq); 216 + emac_xsk_xmit_zc(emac, chn); 217 + } 218 + 291 219 return num_tx; 292 220 } 293 221 ··· 308 212 struct prueth_tx_chn *tx_chns = 309 213 container_of(timer, struct prueth_tx_chn, tx_hrtimer); 310 214 311 - enable_irq(tx_chns->irq); 215 + if (tx_chns->irq_disabled) { 216 + tx_chns->irq_disabled = false; 217 + enable_irq(tx_chns->irq); 218 + } 312 219 return HRTIMER_NORESTART; 313 220 } 314 221 ··· 334 235 ns_to_ktime(tx_chn->tx_pace_timeout_ns), 335 236 HRTIMER_MODE_REL_PINNED); 336 237 } else { 337 - enable_irq(tx_chn->irq); 238 + if (tx_chn->irq_disabled) { 239 + tx_chn->irq_disabled = false; 240 + enable_irq(tx_chn->irq); 241 + } 338 242 } 339 243 } 340 244 ··· 348 246 { 349 247 struct prueth_tx_chn *tx_chn = dev_id; 350 248 249 + tx_chn->irq_disabled = true; 351 250 disable_irq_nosync(irq); 352 251 napi_schedule(&tx_chn->napi_tx); 353 252 ··· 465 362 } 466 363 EXPORT_SYMBOL_GPL(prueth_init_tx_chns); 467 364 365 + static struct page_pool *prueth_create_page_pool(struct prueth_emac *emac, 366 + struct device *dma_dev, 367 + int size) 368 + { 369 + struct page_pool_params pp_params = { 0 }; 370 + struct page_pool *pool; 371 + 372 + pp_params.order = 0; 373 + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; 374 + pp_params.pool_size = size; 375 + pp_params.nid = dev_to_node(emac->prueth->dev); 376 + pp_params.dma_dir = DMA_BIDIRECTIONAL; 377 + pp_params.dev = dma_dev; 378 + pp_params.napi = &emac->napi_rx; 379 + pp_params.max_len = PAGE_SIZE; 380 + 381 + pool = page_pool_create(&pp_params); 382 + if (IS_ERR(pool)) 383 + netdev_err(emac->ndev, "cannot create rx page pool\n"); 384 + 385 + return pool; 386 + } 387 + 468 388 int prueth_init_rx_chns(struct prueth_emac *emac, 469 389 struct prueth_rx_chn *rx_chn, 470 390 char *name, u32 max_rflows, ··· 497 371 struct device *dev = emac->prueth->dev; 498 372 struct net_device *ndev = emac->ndev; 499 373 u32 fdqring_id, hdesc_size; 374 + struct page_pool *pool; 500 375 int i, ret = 0, slice; 501 376 int flow_id_base; 502 377 ··· 539 412 netdev_err(ndev, "Failed to create rx pool: %d\n", ret); 540 413 goto fail; 541 414 } 415 + 416 + pool = prueth_create_page_pool(emac, rx_chn->dma_dev, rx_chn->descs_num); 417 + if (IS_ERR(pool)) { 418 + ret = PTR_ERR(pool); 419 + goto fail; 420 + } 421 + 422 + rx_chn->pg_pool = pool; 542 423 543 424 flow_id_base = k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn); 544 425 if (emac->is_sr1 && !strcmp(name, "rxmgm")) { ··· 679 544 * emac_xmit_xdp_frame - transmits an XDP frame 680 545 * @emac: emac device 681 546 * @xdpf: data to transmit 682 - * @page: page from page pool if already DMA mapped 683 547 * @q_idx: queue id 548 + * @buff_type: Type of buffer to be transmitted 684 549 * 685 550 * Return: XDP state 686 551 */ 687 552 u32 emac_xmit_xdp_frame(struct prueth_emac *emac, 688 553 struct xdp_frame *xdpf, 689 - struct page *page, 690 - unsigned int q_idx) 554 + unsigned int q_idx, 555 + enum prueth_tx_buff_type buff_type) 691 556 { 692 557 struct cppi5_host_desc_t *first_desc; 693 558 struct net_device *ndev = emac->ndev; ··· 695 560 struct prueth_tx_chn *tx_chn; 696 561 dma_addr_t desc_dma, buf_dma; 697 562 struct prueth_swdata *swdata; 563 + struct page *page; 698 564 u32 *epib; 699 565 int ret; 700 566 ··· 712 576 return ICSSG_XDP_CONSUMED; /* drop */ 713 577 } 714 578 715 - if (page) { /* already DMA mapped by page_pool */ 579 + if (buff_type == PRUETH_TX_BUFF_TYPE_XDP_TX) { /* already DMA mapped by page_pool */ 580 + page = virt_to_head_page(xdpf->data); 581 + if (unlikely(!page)) { 582 + netdev_err(ndev, "xdp tx: failed to get page from xdpf\n"); 583 + goto drop_free_descs; 584 + } 716 585 buf_dma = page_pool_get_dma_addr(page); 717 586 buf_dma += xdpf->headroom + sizeof(struct xdp_frame); 718 587 } else { /* Map the linear buffer */ ··· 772 631 * emac_run_xdp - run an XDP program 773 632 * @emac: emac device 774 633 * @xdp: XDP buffer containing the frame 775 - * @page: page with RX data if already DMA mapped 776 634 * @len: Rx descriptor packet length 777 635 * 778 636 * Return: XDP state 779 637 */ 780 - static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, 781 - struct page *page, u32 *len) 638 + static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, u32 *len) 782 639 { 783 640 struct net_device *ndev = emac->ndev; 784 641 struct netdev_queue *netif_txq; ··· 803 664 q_idx = cpu % emac->tx_ch_num; 804 665 netif_txq = netdev_get_tx_queue(ndev, q_idx); 805 666 __netif_tx_lock(netif_txq, cpu); 806 - result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); 667 + result = emac_xmit_xdp_frame(emac, xdpf, q_idx, 668 + PRUETH_TX_BUFF_TYPE_XDP_TX); 807 669 __netif_tx_unlock(netif_txq); 808 670 if (result == ICSSG_XDP_CONSUMED) { 809 671 ndev->stats.tx_dropped++; ··· 829 689 fallthrough; /* handle aborts by dropping packet */ 830 690 case XDP_DROP: 831 691 ndev->stats.rx_dropped++; 832 - page_pool_recycle_direct(emac->rx_chns.pg_pool, page); 833 692 return ICSSG_XDP_CONSUMED; 834 693 } 694 + } 695 + 696 + static int prueth_dma_rx_push_mapped_zc(struct prueth_emac *emac, 697 + struct prueth_rx_chn *rx_chn, 698 + struct xdp_buff *xdp) 699 + { 700 + struct net_device *ndev = emac->ndev; 701 + struct cppi5_host_desc_t *desc_rx; 702 + struct prueth_swdata *swdata; 703 + dma_addr_t desc_dma; 704 + dma_addr_t buf_dma; 705 + int buf_len; 706 + 707 + buf_dma = xsk_buff_xdp_get_dma(xdp); 708 + desc_rx = k3_cppi_desc_pool_alloc(rx_chn->desc_pool); 709 + if (!desc_rx) { 710 + netdev_err(ndev, "rx push: failed to allocate descriptor\n"); 711 + return -ENOMEM; 712 + } 713 + desc_dma = k3_cppi_desc_pool_virt2dma(rx_chn->desc_pool, desc_rx); 714 + 715 + cppi5_hdesc_init(desc_rx, CPPI5_INFO0_HDESC_EPIB_PRESENT, 716 + PRUETH_NAV_PS_DATA_SIZE); 717 + k3_udma_glue_rx_dma_to_cppi5_addr(rx_chn->rx_chn, &buf_dma); 718 + buf_len = xsk_pool_get_rx_frame_size(rx_chn->xsk_pool); 719 + cppi5_hdesc_attach_buf(desc_rx, buf_dma, buf_len, buf_dma, buf_len); 720 + swdata = cppi5_hdesc_get_swdata(desc_rx); 721 + swdata->type = PRUETH_SWDATA_XSK; 722 + swdata->data.xdp = xdp; 723 + 724 + return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, PRUETH_RX_FLOW_DATA, 725 + desc_rx, desc_dma); 726 + } 727 + 728 + static int prueth_rx_alloc_zc(struct prueth_emac *emac, int budget) 729 + { 730 + struct prueth_rx_chn *rx_chn = &emac->rx_chns; 731 + struct xdp_buff *xdp; 732 + int i, ret; 733 + 734 + for (i = 0; i < budget; i++) { 735 + xdp = xsk_buff_alloc(rx_chn->xsk_pool); 736 + if (!xdp) 737 + break; 738 + 739 + ret = prueth_dma_rx_push_mapped_zc(emac, rx_chn, xdp); 740 + if (ret) { 741 + netdev_err(emac->ndev, "rx alloc: failed to map descriptors to xdp buff\n"); 742 + xsk_buff_free(xdp); 743 + break; 744 + } 745 + } 746 + 747 + return i; 748 + } 749 + 750 + static void emac_dispatch_skb_zc(struct prueth_emac *emac, struct xdp_buff *xdp, u32 *psdata) 751 + { 752 + unsigned int headroom = xdp->data - xdp->data_hard_start; 753 + unsigned int pkt_len = xdp->data_end - xdp->data; 754 + struct net_device *ndev = emac->ndev; 755 + struct sk_buff *skb; 756 + 757 + skb = napi_alloc_skb(&emac->napi_rx, xdp->data_end - xdp->data_hard_start); 758 + if (unlikely(!skb)) { 759 + ndev->stats.rx_dropped++; 760 + return; 761 + } 762 + 763 + skb_reserve(skb, headroom); 764 + skb_put(skb, pkt_len); 765 + skb->dev = ndev; 766 + 767 + /* RX HW timestamp */ 768 + if (emac->rx_ts_enabled) 769 + emac_rx_timestamp(emac, skb, psdata); 770 + 771 + if (emac->prueth->is_switch_mode) 772 + skb->offload_fwd_mark = emac->offload_fwd_mark; 773 + skb->protocol = eth_type_trans(skb, ndev); 774 + 775 + skb_mark_for_recycle(skb); 776 + napi_gro_receive(&emac->napi_rx, skb); 777 + ndev->stats.rx_bytes += pkt_len; 778 + ndev->stats.rx_packets++; 779 + } 780 + 781 + static int emac_rx_packet_zc(struct prueth_emac *emac, u32 flow_id, 782 + int budget) 783 + { 784 + struct prueth_rx_chn *rx_chn = &emac->rx_chns; 785 + u32 buf_dma_len, pkt_len, port_id = 0; 786 + struct net_device *ndev = emac->ndev; 787 + struct cppi5_host_desc_t *desc_rx; 788 + struct prueth_swdata *swdata; 789 + dma_addr_t desc_dma, buf_dma; 790 + struct xdp_buff *xdp; 791 + int xdp_status = 0; 792 + int count = 0; 793 + u32 *psdata; 794 + int ret; 795 + 796 + while (count < budget) { 797 + ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma); 798 + if (ret) { 799 + if (ret != -ENODATA) 800 + netdev_err(ndev, "rx pop: failed: %d\n", ret); 801 + break; 802 + } 803 + 804 + if (cppi5_desc_is_tdcm(desc_dma)) { 805 + complete(&emac->tdown_complete); 806 + break; 807 + } 808 + 809 + desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); 810 + swdata = cppi5_hdesc_get_swdata(desc_rx); 811 + if (swdata->type != PRUETH_SWDATA_XSK) { 812 + netdev_err(ndev, "rx_pkt: invalid swdata->type %d\n", swdata->type); 813 + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); 814 + break; 815 + } 816 + 817 + xdp = swdata->data.xdp; 818 + cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); 819 + k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); 820 + pkt_len = cppi5_hdesc_get_pktlen(desc_rx); 821 + /* firmware adds 4 CRC bytes, strip them */ 822 + pkt_len -= 4; 823 + cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); 824 + psdata = cppi5_hdesc_get_psdata(desc_rx); 825 + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); 826 + count++; 827 + xsk_buff_set_size(xdp, pkt_len); 828 + xsk_buff_dma_sync_for_cpu(xdp); 829 + 830 + if (prueth_xdp_is_enabled(emac)) { 831 + ret = emac_run_xdp(emac, xdp, &pkt_len); 832 + switch (ret) { 833 + case ICSSG_XDP_PASS: 834 + /* prepare skb and send to n/w stack */ 835 + emac_dispatch_skb_zc(emac, xdp, psdata); 836 + xsk_buff_free(xdp); 837 + break; 838 + case ICSSG_XDP_CONSUMED: 839 + xsk_buff_free(xdp); 840 + break; 841 + case ICSSG_XDP_TX: 842 + case ICSSG_XDP_REDIR: 843 + xdp_status |= ret; 844 + break; 845 + } 846 + } else { 847 + /* prepare skb and send to n/w stack */ 848 + emac_dispatch_skb_zc(emac, xdp, psdata); 849 + xsk_buff_free(xdp); 850 + } 851 + } 852 + 853 + if (xdp_status & ICSSG_XDP_REDIR) 854 + xdp_do_flush(); 855 + 856 + /* Allocate xsk buffers from the pool for the "count" number of 857 + * packets processed in order to be able to receive more packets. 858 + */ 859 + ret = prueth_rx_alloc_zc(emac, count); 860 + 861 + if (xsk_uses_need_wakeup(rx_chn->xsk_pool)) { 862 + /* If the user space doesn't provide enough buffers then it must 863 + * explicitly wake up the kernel when new buffers are available 864 + */ 865 + if (ret < count) 866 + xsk_set_rx_need_wakeup(rx_chn->xsk_pool); 867 + else 868 + xsk_clear_rx_need_wakeup(rx_chn->xsk_pool); 869 + } 870 + 871 + return count; 835 872 } 836 873 837 874 static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id, u32 *xdp_state) ··· 1036 719 return ret; 1037 720 } 1038 721 1039 - if (cppi5_desc_is_tdcm(desc_dma)) /* Teardown ? */ 722 + if (cppi5_desc_is_tdcm(desc_dma)) { 723 + complete(&emac->tdown_complete); 1040 724 return 0; 725 + } 1041 726 1042 727 desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); 1043 728 swdata = cppi5_hdesc_get_swdata(desc_rx); ··· 1057 738 /* firmware adds 4 CRC bytes, strip them */ 1058 739 pkt_len -= 4; 1059 740 cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); 1060 - 1061 741 k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); 1062 742 1063 743 /* if allocation fails we drop the packet but push the ··· 1070 752 } 1071 753 1072 754 pa = page_address(page); 1073 - if (emac->xdp_prog) { 755 + if (prueth_xdp_is_enabled(emac)) { 1074 756 xdp_init_buff(&xdp, PAGE_SIZE, &rx_chn->xdp_rxq); 1075 757 xdp_prepare_buff(&xdp, pa, PRUETH_HEADROOM, pkt_len, false); 1076 758 1077 - *xdp_state = emac_run_xdp(emac, &xdp, page, &pkt_len); 759 + *xdp_state = emac_run_xdp(emac, &xdp, &pkt_len); 1078 760 if (*xdp_state != ICSSG_XDP_PASS) 1079 761 goto requeue; 1080 762 headroom = xdp.data - xdp.data_hard_start; ··· 1122 804 return ret; 1123 805 } 1124 806 1125 - static void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) 807 + void prueth_rx_cleanup(void *data, dma_addr_t desc_dma) 1126 808 { 1127 809 struct prueth_rx_chn *rx_chn = data; 1128 810 struct cppi5_host_desc_t *desc_rx; 1129 811 struct prueth_swdata *swdata; 1130 812 struct page_pool *pool; 813 + struct xdp_buff *xdp; 1131 814 struct page *page; 1132 815 1133 816 pool = rx_chn->pg_pool; 1134 817 desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); 1135 818 swdata = cppi5_hdesc_get_swdata(desc_rx); 1136 - if (swdata->type == PRUETH_SWDATA_PAGE) { 819 + if (rx_chn->xsk_pool) { 820 + xdp = swdata->data.xdp; 821 + xsk_buff_free(xdp); 822 + } else { 1137 823 page = swdata->data.page; 1138 824 page_pool_recycle_direct(pool, page); 1139 825 } 1140 826 1141 827 k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); 1142 828 } 829 + EXPORT_SYMBOL_GPL(prueth_rx_cleanup); 1143 830 1144 831 static int prueth_tx_ts_cookie_get(struct prueth_emac *emac) 1145 832 { ··· 1348 1025 } 1349 1026 EXPORT_SYMBOL_GPL(icssg_ndo_start_xmit); 1350 1027 1351 - static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) 1028 + void prueth_tx_cleanup(void *data, dma_addr_t desc_dma) 1352 1029 { 1353 1030 struct prueth_tx_chn *tx_chn = data; 1354 1031 struct cppi5_host_desc_t *desc_tx; 1032 + struct xsk_buff_pool *xsk_pool; 1355 1033 struct prueth_swdata *swdata; 1356 1034 struct xdp_frame *xdpf; 1357 1035 struct sk_buff *skb; ··· 1369 1045 xdpf = swdata->data.xdpf; 1370 1046 xdp_return_frame(xdpf); 1371 1047 break; 1048 + case PRUETH_SWDATA_XSK: 1049 + xsk_pool = tx_chn->xsk_pool; 1050 + xsk_tx_completed(xsk_pool, 1); 1051 + break; 1372 1052 default: 1373 1053 break; 1374 1054 } 1375 1055 1376 1056 prueth_xmit_free(tx_chn, desc_tx); 1377 1057 } 1058 + EXPORT_SYMBOL_GPL(prueth_tx_cleanup); 1378 1059 1379 1060 irqreturn_t prueth_rx_irq(int irq, void *dev_id) 1380 1061 { 1381 1062 struct prueth_emac *emac = dev_id; 1382 1063 1064 + emac->rx_chns.irq_disabled = true; 1383 1065 disable_irq_nosync(irq); 1384 1066 napi_schedule(&emac->napi_rx); 1385 1067 ··· 1413 1083 PRUETH_RX_FLOW_DATA_SR1 : PRUETH_RX_FLOW_DATA; 1414 1084 int flow = emac->is_sr1 ? 1415 1085 PRUETH_MAX_RX_FLOWS_SR1 : PRUETH_MAX_RX_FLOWS; 1086 + struct prueth_rx_chn *rx_chn = &emac->rx_chns; 1416 1087 int xdp_state_or = 0; 1417 1088 int num_rx = 0; 1418 1089 int cur_budget; ··· 1421 1090 int ret; 1422 1091 1423 1092 while (flow--) { 1424 - cur_budget = budget - num_rx; 1093 + if (rx_chn->xsk_pool) { 1094 + num_rx = emac_rx_packet_zc(emac, flow, budget); 1095 + } else { 1096 + cur_budget = budget - num_rx; 1425 1097 1426 - while (cur_budget--) { 1427 - ret = emac_rx_packet(emac, flow, &xdp_state); 1428 - xdp_state_or |= xdp_state; 1429 - if (ret) 1430 - break; 1431 - num_rx++; 1098 + while (cur_budget--) { 1099 + ret = emac_rx_packet(emac, flow, &xdp_state); 1100 + xdp_state_or |= xdp_state; 1101 + if (ret) 1102 + break; 1103 + num_rx++; 1104 + } 1432 1105 } 1433 1106 1434 1107 if (num_rx >= budget) ··· 1448 1113 ns_to_ktime(emac->rx_pace_timeout_ns), 1449 1114 HRTIMER_MODE_REL_PINNED); 1450 1115 } else { 1451 - enable_irq(emac->rx_chns.irq[rx_flow]); 1116 + if (emac->rx_chns.irq_disabled) { 1117 + /* re-enable the RX IRQ */ 1118 + emac->rx_chns.irq_disabled = false; 1119 + enable_irq(emac->rx_chns.irq[rx_flow]); 1120 + } 1452 1121 } 1453 1122 } 1454 1123 ··· 1460 1121 } 1461 1122 EXPORT_SYMBOL_GPL(icssg_napi_rx_poll); 1462 1123 1463 - static struct page_pool *prueth_create_page_pool(struct prueth_emac *emac, 1464 - struct device *dma_dev, 1465 - int size) 1466 - { 1467 - struct page_pool_params pp_params = { 0 }; 1468 - struct page_pool *pool; 1469 - 1470 - pp_params.order = 0; 1471 - pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; 1472 - pp_params.pool_size = size; 1473 - pp_params.nid = dev_to_node(emac->prueth->dev); 1474 - pp_params.dma_dir = DMA_BIDIRECTIONAL; 1475 - pp_params.dev = dma_dev; 1476 - pp_params.napi = &emac->napi_rx; 1477 - pp_params.max_len = PAGE_SIZE; 1478 - 1479 - pool = page_pool_create(&pp_params); 1480 - if (IS_ERR(pool)) 1481 - netdev_err(emac->ndev, "cannot create rx page pool\n"); 1482 - 1483 - return pool; 1484 - } 1485 - 1486 1124 int prueth_prepare_rx_chan(struct prueth_emac *emac, 1487 1125 struct prueth_rx_chn *chn, 1488 1126 int buf_size) 1489 1127 { 1490 - struct page_pool *pool; 1491 1128 struct page *page; 1129 + int desc_avail; 1492 1130 int i, ret; 1493 1131 1494 - pool = prueth_create_page_pool(emac, chn->dma_dev, chn->descs_num); 1495 - if (IS_ERR(pool)) 1496 - return PTR_ERR(pool); 1132 + desc_avail = k3_cppi_desc_pool_avail(chn->desc_pool); 1133 + if (desc_avail < chn->descs_num) 1134 + netdev_warn(emac->ndev, 1135 + "not enough RX descriptors available %d < %d\n", 1136 + desc_avail, chn->descs_num); 1497 1137 1498 - chn->pg_pool = pool; 1499 - 1500 - for (i = 0; i < chn->descs_num; i++) { 1501 - /* NOTE: we're not using memory efficiently here. 1502 - * 1 full page (4KB?) used here instead of 1503 - * PRUETH_MAX_PKT_SIZE (~1.5KB?) 1138 + if (chn->xsk_pool) { 1139 + /* get pages from xsk_pool and push to RX ring 1140 + * queue as much as possible 1504 1141 */ 1505 - page = page_pool_dev_alloc_pages(pool); 1506 - if (!page) { 1507 - netdev_err(emac->ndev, "couldn't allocate rx page\n"); 1508 - ret = -ENOMEM; 1142 + ret = prueth_rx_alloc_zc(emac, desc_avail); 1143 + if (!ret) 1509 1144 goto recycle_alloc_pg; 1510 - } 1145 + } else { 1146 + for (i = 0; i < desc_avail; i++) { 1147 + /* NOTE: we're not using memory efficiently here. 1148 + * 1 full page (4KB?) used here instead of 1149 + * PRUETH_MAX_PKT_SIZE (~1.5KB?) 1150 + */ 1151 + page = page_pool_dev_alloc_pages(chn->pg_pool); 1152 + if (!page) { 1153 + netdev_err(emac->ndev, "couldn't allocate rx page\n"); 1154 + ret = -ENOMEM; 1155 + goto recycle_alloc_pg; 1156 + } 1511 1157 1512 - ret = prueth_dma_rx_push_mapped(emac, chn, page, buf_size); 1513 - if (ret < 0) { 1514 - netdev_err(emac->ndev, 1515 - "cannot submit page for rx chan %s ret %d\n", 1516 - chn->name, ret); 1517 - page_pool_recycle_direct(pool, page); 1518 - goto recycle_alloc_pg; 1158 + ret = prueth_dma_rx_push_mapped(emac, chn, page, buf_size); 1159 + if (ret < 0) { 1160 + netdev_err(emac->ndev, 1161 + "cannot submit page for rx chan %s ret %d\n", 1162 + chn->name, ret); 1163 + page_pool_recycle_direct(chn->pg_pool, page); 1164 + goto recycle_alloc_pg; 1165 + } 1519 1166 } 1520 1167 } 1521 1168
+319 -75
drivers/net/ethernet/ti/icssg/icssg_prueth.c
··· 47 47 NETIF_F_HW_HSR_TAG_INS | \ 48 48 NETIF_F_HW_HSR_TAG_RM) 49 49 50 + #define PRUETH_RX_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC |\ 51 + DMA_ATTR_WEAK_ORDERING) 52 + 50 53 /* CTRLMMR_ICSSG_RGMII_CTRL register bits */ 51 54 #define ICSSG_CTRL_RGMII_ID_MODE BIT(24) 52 55 ··· 395 392 container_of(timer, struct prueth_emac, rx_hrtimer); 396 393 int rx_flow = PRUETH_RX_FLOW_DATA; 397 394 398 - enable_irq(emac->rx_chns.irq[rx_flow]); 395 + if (emac->rx_chns.irq_disabled) { 396 + /* re-enable the RX IRQ */ 397 + emac->rx_chns.irq_disabled = false; 398 + enable_irq(emac->rx_chns.irq[rx_flow]); 399 + } 399 400 return HRTIMER_NORESTART; 400 401 } 401 402 ··· 573 566 .perout_enable = prueth_perout_enable, 574 567 }; 575 568 569 + static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac) 570 + { 571 + struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; 572 + 573 + if (xdp_rxq_info_is_reg(rxq)) 574 + xdp_rxq_info_unreg(rxq); 575 + } 576 + 576 577 static int prueth_create_xdp_rxqs(struct prueth_emac *emac) 577 578 { 578 579 struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; 579 580 struct page_pool *pool = emac->rx_chns.pg_pool; 581 + struct prueth_rx_chn *rx_chn = &emac->rx_chns; 580 582 int ret; 581 583 582 584 ret = xdp_rxq_info_reg(rxq, emac->ndev, 0, emac->napi_rx.napi_id); 583 585 if (ret) 584 586 return ret; 585 587 586 - ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool); 587 - if (ret) 588 - xdp_rxq_info_unreg(rxq); 588 + if (rx_chn->xsk_pool) { 589 + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); 590 + if (ret) 591 + goto xdp_unreg; 592 + xsk_pool_set_rxq_info(rx_chn->xsk_pool, rxq); 593 + } else { 594 + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool); 595 + if (ret) 596 + goto xdp_unreg; 597 + } 589 598 599 + return 0; 600 + 601 + xdp_unreg: 602 + prueth_destroy_xdp_rxqs(emac); 590 603 return ret; 591 - } 592 - 593 - static void prueth_destroy_xdp_rxqs(struct prueth_emac *emac) 594 - { 595 - struct xdp_rxq_info *rxq = &emac->rx_chns.xdp_rxq; 596 - 597 - if (!xdp_rxq_info_is_reg(rxq)) 598 - return; 599 - 600 - xdp_rxq_info_unreg(rxq); 601 604 } 602 605 603 606 static int icssg_prueth_add_mcast(struct net_device *ndev, const u8 *addr) ··· 752 735 return 0; 753 736 } 754 737 738 + static void prueth_set_xsk_pool(struct prueth_emac *emac, u16 queue_id) 739 + { 740 + struct prueth_tx_chn *tx_chn = &emac->tx_chns[queue_id]; 741 + struct prueth_rx_chn *rx_chn = &emac->rx_chns; 742 + 743 + if (emac->xsk_qid != queue_id) { 744 + rx_chn->xsk_pool = NULL; 745 + tx_chn->xsk_pool = NULL; 746 + } else { 747 + rx_chn->xsk_pool = xsk_get_pool_from_qid(emac->ndev, queue_id); 748 + tx_chn->xsk_pool = xsk_get_pool_from_qid(emac->ndev, queue_id); 749 + } 750 + } 751 + 752 + static void prueth_destroy_txq(struct prueth_emac *emac) 753 + { 754 + int ret, i; 755 + 756 + atomic_set(&emac->tdown_cnt, emac->tx_ch_num); 757 + /* ensure new tdown_cnt value is visible */ 758 + smp_mb__after_atomic(); 759 + /* tear down and disable UDMA channels */ 760 + reinit_completion(&emac->tdown_complete); 761 + for (i = 0; i < emac->tx_ch_num; i++) 762 + k3_udma_glue_tdown_tx_chn(emac->tx_chns[i].tx_chn, false); 763 + 764 + ret = wait_for_completion_timeout(&emac->tdown_complete, 765 + msecs_to_jiffies(1000)); 766 + if (!ret) 767 + netdev_err(emac->ndev, "tx teardown timeout\n"); 768 + 769 + for (i = 0; i < emac->tx_ch_num; i++) { 770 + napi_disable(&emac->tx_chns[i].napi_tx); 771 + hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer); 772 + k3_udma_glue_reset_tx_chn(emac->tx_chns[i].tx_chn, 773 + &emac->tx_chns[i], 774 + prueth_tx_cleanup); 775 + k3_udma_glue_disable_tx_chn(emac->tx_chns[i].tx_chn); 776 + } 777 + } 778 + 779 + static void prueth_destroy_rxq(struct prueth_emac *emac) 780 + { 781 + int i, ret; 782 + 783 + /* tear down and disable UDMA channels */ 784 + reinit_completion(&emac->tdown_complete); 785 + k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true); 786 + 787 + /* When RX DMA Channel Teardown is initiated, it will result in an 788 + * interrupt and a Teardown Completion Marker (TDCM) is queued into 789 + * the RX Completion queue. Acknowledging the interrupt involves 790 + * popping the TDCM descriptor from the RX Completion queue via the 791 + * RX NAPI Handler. To avoid timing out when waiting for the TDCM to 792 + * be popped, schedule the RX NAPI handler to run immediately. 793 + */ 794 + if (!napi_if_scheduled_mark_missed(&emac->napi_rx)) { 795 + if (napi_schedule_prep(&emac->napi_rx)) 796 + __napi_schedule(&emac->napi_rx); 797 + } 798 + 799 + ret = wait_for_completion_timeout(&emac->tdown_complete, 800 + msecs_to_jiffies(1000)); 801 + if (!ret) 802 + netdev_err(emac->ndev, "rx teardown timeout\n"); 803 + 804 + for (i = 0; i < PRUETH_MAX_RX_FLOWS; i++) { 805 + napi_disable(&emac->napi_rx); 806 + hrtimer_cancel(&emac->rx_hrtimer); 807 + k3_udma_glue_reset_rx_chn(emac->rx_chns.rx_chn, i, 808 + &emac->rx_chns, 809 + prueth_rx_cleanup); 810 + } 811 + 812 + prueth_destroy_xdp_rxqs(emac); 813 + k3_udma_glue_disable_rx_chn(emac->rx_chns.rx_chn); 814 + } 815 + 816 + static int prueth_create_txq(struct prueth_emac *emac) 817 + { 818 + int ret, i; 819 + 820 + for (i = 0; i < emac->tx_ch_num; i++) { 821 + ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn); 822 + if (ret) 823 + goto reset_tx_chan; 824 + napi_enable(&emac->tx_chns[i].napi_tx); 825 + } 826 + return 0; 827 + 828 + reset_tx_chan: 829 + /* Since interface is not yet up, there is wouldn't be 830 + * any SKB for completion. So set false to free_skb 831 + */ 832 + prueth_reset_tx_chan(emac, i, false); 833 + return ret; 834 + } 835 + 836 + static int prueth_create_rxq(struct prueth_emac *emac) 837 + { 838 + int ret; 839 + 840 + ret = prueth_prepare_rx_chan(emac, &emac->rx_chns, PRUETH_MAX_PKT_SIZE); 841 + if (ret) 842 + return ret; 843 + 844 + ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn); 845 + if (ret) 846 + goto reset_rx_chn; 847 + 848 + ret = prueth_create_xdp_rxqs(emac); 849 + if (ret) 850 + goto reset_rx_chn; 851 + 852 + napi_enable(&emac->napi_rx); 853 + return 0; 854 + 855 + reset_rx_chn: 856 + prueth_reset_rx_chan(&emac->rx_chns, PRUETH_MAX_RX_FLOWS, false); 857 + return ret; 858 + } 859 + 755 860 /** 756 861 * emac_ndo_open - EMAC device open 757 862 * @ndev: network adapter device ··· 885 746 static int emac_ndo_open(struct net_device *ndev) 886 747 { 887 748 struct prueth_emac *emac = netdev_priv(ndev); 888 - int ret, i, num_data_chn = emac->tx_ch_num; 749 + int ret, num_data_chn = emac->tx_ch_num; 889 750 struct icssg_flow_cfg __iomem *flow_cfg; 890 751 struct prueth *prueth = emac->prueth; 891 752 int slice = prueth_emac_slice(emac); ··· 906 767 return ret; 907 768 } 908 769 770 + emac->xsk_qid = -EINVAL; 909 771 init_completion(&emac->cmd_complete); 910 772 ret = prueth_init_tx_chns(emac); 911 773 if (ret) { ··· 959 819 goto stop; 960 820 961 821 /* Prepare RX */ 962 - ret = prueth_prepare_rx_chan(emac, &emac->rx_chns, PRUETH_MAX_PKT_SIZE); 822 + ret = prueth_create_rxq(emac); 963 823 if (ret) 964 824 goto free_tx_ts_irq; 965 825 966 - ret = prueth_create_xdp_rxqs(emac); 826 + ret = prueth_create_txq(emac); 967 827 if (ret) 968 - goto reset_rx_chn; 969 - 970 - ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn); 971 - if (ret) 972 - goto destroy_xdp_rxqs; 973 - 974 - for (i = 0; i < emac->tx_ch_num; i++) { 975 - ret = k3_udma_glue_enable_tx_chn(emac->tx_chns[i].tx_chn); 976 - if (ret) 977 - goto reset_tx_chan; 978 - } 979 - 980 - /* Enable NAPI in Tx and Rx direction */ 981 - for (i = 0; i < emac->tx_ch_num; i++) 982 - napi_enable(&emac->tx_chns[i].napi_tx); 983 - napi_enable(&emac->napi_rx); 828 + goto destroy_rxq; 984 829 985 830 /* start PHY */ 986 831 phy_start(ndev->phydev); ··· 976 851 977 852 return 0; 978 853 979 - reset_tx_chan: 980 - /* Since interface is not yet up, there is wouldn't be 981 - * any SKB for completion. So set false to free_skb 982 - */ 983 - prueth_reset_tx_chan(emac, i, false); 984 - destroy_xdp_rxqs: 985 - prueth_destroy_xdp_rxqs(emac); 986 - reset_rx_chn: 987 - prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false); 854 + destroy_rxq: 855 + prueth_destroy_rxq(emac); 988 856 free_tx_ts_irq: 989 857 free_irq(emac->tx_ts_irq, emac); 990 858 stop: ··· 1007 889 { 1008 890 struct prueth_emac *emac = netdev_priv(ndev); 1009 891 struct prueth *prueth = emac->prueth; 1010 - int rx_flow = PRUETH_RX_FLOW_DATA; 1011 - int max_rx_flows; 1012 - int ret, i; 1013 892 1014 893 /* inform the upper layers. */ 1015 894 netif_tx_stop_all_queues(ndev); ··· 1020 905 else 1021 906 __dev_mc_unsync(ndev, icssg_prueth_del_mcast); 1022 907 1023 - atomic_set(&emac->tdown_cnt, emac->tx_ch_num); 1024 - /* ensure new tdown_cnt value is visible */ 1025 - smp_mb__after_atomic(); 1026 - /* tear down and disable UDMA channels */ 1027 - reinit_completion(&emac->tdown_complete); 1028 - for (i = 0; i < emac->tx_ch_num; i++) 1029 - k3_udma_glue_tdown_tx_chn(emac->tx_chns[i].tx_chn, false); 1030 - 1031 - ret = wait_for_completion_timeout(&emac->tdown_complete, 1032 - msecs_to_jiffies(1000)); 1033 - if (!ret) 1034 - netdev_err(ndev, "tx teardown timeout\n"); 1035 - 1036 - prueth_reset_tx_chan(emac, emac->tx_ch_num, true); 1037 - for (i = 0; i < emac->tx_ch_num; i++) { 1038 - napi_disable(&emac->tx_chns[i].napi_tx); 1039 - hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer); 1040 - } 1041 - 1042 - max_rx_flows = PRUETH_MAX_RX_FLOWS; 1043 - k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true); 1044 - 1045 - prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true); 1046 - prueth_destroy_xdp_rxqs(emac); 1047 - napi_disable(&emac->napi_rx); 1048 - hrtimer_cancel(&emac->rx_hrtimer); 908 + prueth_destroy_txq(emac); 909 + prueth_destroy_rxq(emac); 1049 910 1050 911 cancel_work_sync(&emac->rx_mode_work); 1051 912 ··· 1034 943 1035 944 free_irq(emac->tx_ts_irq, emac); 1036 945 1037 - free_irq(emac->rx_chns.irq[rx_flow], emac); 946 + free_irq(emac->rx_chns.irq[PRUETH_RX_FLOW_DATA], emac); 1038 947 prueth_ndev_del_tx_napi(emac, emac->tx_ch_num); 1039 948 1040 - prueth_cleanup_rx_chns(emac, &emac->rx_chns, max_rx_flows); 949 + prueth_cleanup_rx_chns(emac, &emac->rx_chns, PRUETH_MAX_RX_FLOWS); 1041 950 prueth_cleanup_tx_chns(emac); 1042 951 1043 952 prueth->emacs_initialized--; ··· 1199 1108 __netif_tx_lock(netif_txq, cpu); 1200 1109 for (i = 0; i < n; i++) { 1201 1110 xdpf = frames[i]; 1202 - err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx); 1111 + err = emac_xmit_xdp_frame(emac, xdpf, q_idx, 1112 + PRUETH_TX_BUFF_TYPE_XDP_NDO); 1203 1113 if (err != ICSSG_XDP_TX) { 1204 1114 ndev->stats.tx_dropped++; 1205 1115 break; ··· 1233 1141 return 0; 1234 1142 } 1235 1143 1144 + static int prueth_xsk_pool_enable(struct prueth_emac *emac, 1145 + struct xsk_buff_pool *pool, u16 queue_id) 1146 + { 1147 + struct prueth_rx_chn *rx_chn = &emac->rx_chns; 1148 + u32 frame_size; 1149 + int ret; 1150 + 1151 + if (queue_id >= PRUETH_MAX_RX_FLOWS || 1152 + queue_id >= emac->tx_ch_num) { 1153 + netdev_err(emac->ndev, "Invalid XSK queue ID %d\n", queue_id); 1154 + return -EINVAL; 1155 + } 1156 + 1157 + frame_size = xsk_pool_get_rx_frame_size(pool); 1158 + if (frame_size < PRUETH_MAX_PKT_SIZE) 1159 + return -EOPNOTSUPP; 1160 + 1161 + ret = xsk_pool_dma_map(pool, rx_chn->dma_dev, PRUETH_RX_DMA_ATTR); 1162 + if (ret) { 1163 + netdev_err(emac->ndev, "Failed to map XSK pool: %d\n", ret); 1164 + return ret; 1165 + } 1166 + 1167 + if (netif_running(emac->ndev)) { 1168 + /* stop packets from wire for graceful teardown */ 1169 + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_DISABLE); 1170 + if (ret) 1171 + return ret; 1172 + prueth_destroy_rxq(emac); 1173 + } 1174 + 1175 + emac->xsk_qid = queue_id; 1176 + prueth_set_xsk_pool(emac, queue_id); 1177 + 1178 + if (netif_running(emac->ndev)) { 1179 + ret = prueth_create_rxq(emac); 1180 + if (ret) { 1181 + netdev_err(emac->ndev, "Failed to create RX queue: %d\n", ret); 1182 + return ret; 1183 + } 1184 + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_FORWARD); 1185 + if (ret) { 1186 + prueth_destroy_rxq(emac); 1187 + return ret; 1188 + } 1189 + ret = prueth_xsk_wakeup(emac->ndev, queue_id, XDP_WAKEUP_RX); 1190 + if (ret) 1191 + return ret; 1192 + } 1193 + 1194 + return 0; 1195 + } 1196 + 1197 + static int prueth_xsk_pool_disable(struct prueth_emac *emac, u16 queue_id) 1198 + { 1199 + struct xsk_buff_pool *pool; 1200 + int ret; 1201 + 1202 + if (queue_id >= PRUETH_MAX_RX_FLOWS || 1203 + queue_id >= emac->tx_ch_num) { 1204 + netdev_err(emac->ndev, "Invalid XSK queue ID %d\n", queue_id); 1205 + return -EINVAL; 1206 + } 1207 + 1208 + if (emac->xsk_qid != queue_id) { 1209 + netdev_err(emac->ndev, "XSK queue ID %d not registered\n", queue_id); 1210 + return -EINVAL; 1211 + } 1212 + 1213 + pool = xsk_get_pool_from_qid(emac->ndev, queue_id); 1214 + if (!pool) { 1215 + netdev_err(emac->ndev, "No XSK pool registered for queue %d\n", queue_id); 1216 + return -EINVAL; 1217 + } 1218 + 1219 + if (netif_running(emac->ndev)) { 1220 + /* stop packets from wire for graceful teardown */ 1221 + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_DISABLE); 1222 + if (ret) 1223 + return ret; 1224 + prueth_destroy_rxq(emac); 1225 + } 1226 + 1227 + xsk_pool_dma_unmap(pool, PRUETH_RX_DMA_ATTR); 1228 + emac->xsk_qid = -EINVAL; 1229 + prueth_set_xsk_pool(emac, queue_id); 1230 + 1231 + if (netif_running(emac->ndev)) { 1232 + ret = prueth_create_rxq(emac); 1233 + if (ret) { 1234 + netdev_err(emac->ndev, "Failed to create RX queue: %d\n", ret); 1235 + return ret; 1236 + } 1237 + ret = icssg_set_port_state(emac, ICSSG_EMAC_PORT_FORWARD); 1238 + if (ret) { 1239 + prueth_destroy_rxq(emac); 1240 + return ret; 1241 + } 1242 + } 1243 + 1244 + return 0; 1245 + } 1246 + 1236 1247 /** 1237 1248 * emac_ndo_bpf - implements ndo_bpf for icssg_prueth 1238 1249 * @ndev: network adapter device ··· 1350 1155 switch (bpf->command) { 1351 1156 case XDP_SETUP_PROG: 1352 1157 return emac_xdp_setup(emac, bpf); 1158 + case XDP_SETUP_XSK_POOL: 1159 + return bpf->xsk.pool ? 1160 + prueth_xsk_pool_enable(emac, bpf->xsk.pool, bpf->xsk.queue_id) : 1161 + prueth_xsk_pool_disable(emac, bpf->xsk.queue_id); 1353 1162 default: 1354 1163 return -EINVAL; 1355 1164 } 1165 + } 1166 + 1167 + int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags) 1168 + { 1169 + struct prueth_emac *emac = netdev_priv(ndev); 1170 + struct prueth_tx_chn *tx_chn = &emac->tx_chns[qid]; 1171 + struct prueth_rx_chn *rx_chn = &emac->rx_chns; 1172 + 1173 + if (emac->xsk_qid != qid) { 1174 + netdev_err(ndev, "XSK queue %d not registered\n", qid); 1175 + return -EINVAL; 1176 + } 1177 + 1178 + if (qid >= PRUETH_MAX_RX_FLOWS || qid >= emac->tx_ch_num) { 1179 + netdev_err(ndev, "Invalid XSK queue ID %d\n", qid); 1180 + return -EINVAL; 1181 + } 1182 + 1183 + if (!tx_chn->xsk_pool) { 1184 + netdev_err(ndev, "XSK pool not registered for queue %d\n", qid); 1185 + return -EINVAL; 1186 + } 1187 + 1188 + if (!rx_chn->xsk_pool) { 1189 + netdev_err(ndev, "XSK pool not registered for RX queue %d\n", qid); 1190 + return -EINVAL; 1191 + } 1192 + 1193 + if (flags & XDP_WAKEUP_TX) { 1194 + if (!napi_if_scheduled_mark_missed(&tx_chn->napi_tx)) { 1195 + if (likely(napi_schedule_prep(&tx_chn->napi_tx))) 1196 + __napi_schedule(&tx_chn->napi_tx); 1197 + } 1198 + } 1199 + 1200 + if (flags & XDP_WAKEUP_RX) { 1201 + if (!napi_if_scheduled_mark_missed(&emac->napi_rx)) { 1202 + if (likely(napi_schedule_prep(&emac->napi_rx))) 1203 + __napi_schedule(&emac->napi_rx); 1204 + } 1205 + } 1206 + 1207 + return 0; 1356 1208 } 1357 1209 1358 1210 static const struct net_device_ops emac_netdev_ops = { ··· 1420 1178 .ndo_xdp_xmit = emac_xdp_xmit, 1421 1179 .ndo_hwtstamp_get = icssg_ndo_get_ts_config, 1422 1180 .ndo_hwtstamp_set = icssg_ndo_set_ts_config, 1181 + .ndo_xsk_wakeup = prueth_xsk_wakeup, 1423 1182 }; 1424 1183 1425 1184 static int prueth_netdev_init(struct prueth *prueth, ··· 1554 1311 xdp_set_features_flag(ndev, 1555 1312 NETDEV_XDP_ACT_BASIC | 1556 1313 NETDEV_XDP_ACT_REDIRECT | 1557 - NETDEV_XDP_ACT_NDO_XMIT); 1314 + NETDEV_XDP_ACT_NDO_XMIT | 1315 + NETDEV_XDP_ACT_XSK_ZEROCOPY); 1558 1316 1559 1317 netif_napi_add(ndev, &emac->napi_rx, icssg_napi_rx_poll); 1560 1318 hrtimer_setup(&emac->rx_hrtimer, &emac_rx_timer_callback, CLOCK_MONOTONIC,
+23 -2
drivers/net/ethernet/ti/icssg/icssg_prueth.h
··· 38 38 #include <net/devlink.h> 39 39 #include <net/xdp.h> 40 40 #include <net/page_pool/helpers.h> 41 + #include <net/xsk_buff_pool.h> 42 + #include <net/xdp_sock_drv.h> 41 43 42 44 #include "icssg_config.h" 43 45 #include "icss_iep.h" ··· 128 126 char name[32]; 129 127 struct hrtimer tx_hrtimer; 130 128 unsigned long tx_pace_timeout_ns; 129 + struct xsk_buff_pool *xsk_pool; 130 + bool irq_disabled; 131 131 }; 132 132 133 133 struct prueth_rx_chn { ··· 142 138 char name[32]; 143 139 struct page_pool *pg_pool; 144 140 struct xdp_rxq_info xdp_rxq; 141 + struct xsk_buff_pool *xsk_pool; 142 + bool irq_disabled; 145 143 }; 146 144 147 145 enum prueth_swdata_type { ··· 152 146 PRUETH_SWDATA_PAGE, 153 147 PRUETH_SWDATA_CMD, 154 148 PRUETH_SWDATA_XDPF, 149 + PRUETH_SWDATA_XSK, 150 + }; 151 + 152 + enum prueth_tx_buff_type { 153 + PRUETH_TX_BUFF_TYPE_XDP_TX, 154 + PRUETH_TX_BUFF_TYPE_XDP_NDO, 155 155 }; 156 156 157 157 struct prueth_swdata { ··· 167 155 struct page *page; 168 156 u32 cmd; 169 157 struct xdp_frame *xdpf; 158 + struct xdp_buff *xdp; 170 159 } data; 171 160 }; 172 161 ··· 254 241 struct netdev_hw_addr_list vlan_mcast_list[MAX_VLAN_ID]; 255 242 struct bpf_prog *xdp_prog; 256 243 struct xdp_attachment_info xdpi; 244 + int xsk_qid; 257 245 }; 258 246 259 247 /* The buf includes headroom compatible with both skb and xdpf */ ··· 513 499 u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns); 514 500 u32 emac_xmit_xdp_frame(struct prueth_emac *emac, 515 501 struct xdp_frame *xdpf, 516 - struct page *page, 517 - unsigned int q_idx); 502 + unsigned int q_idx, 503 + enum prueth_tx_buff_type buff_type); 504 + void prueth_rx_cleanup(void *data, dma_addr_t desc_dma); 505 + void prueth_tx_cleanup(void *data, dma_addr_t desc_dma); 506 + int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags); 507 + static inline bool prueth_xdp_is_enabled(struct prueth_emac *emac) 508 + { 509 + return !!READ_ONCE(emac->xdp_prog); 510 + } 518 511 519 512 #endif /* __NET_TI_ICSSG_PRUETH_H */