Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'dpaa2-eth-AF_XDP-zc'

Ioana Ciornei says:

====================
net: dpaa2-eth: AF_XDP zero-copy support

This patch set adds support for AF_XDP zero-copy in the dpaa2-eth
driver. The support is available on the LX2160A SoC and its variants and
only on interfaces (DPNIs) with a maximum of 8 queues (HW limitations
are the root cause).

We are first implementing the .get_channels() callback since this a
dependency for further work.

Patches 2-3 are working on making the necessary changes for multiple
buffer pools on a single interface. By default, without an AF_XDP socket
attached, only a single buffer pool will be used and shared between all
the queues. The changes in the functions are made in this patch, but the
actual allocation and setup of a new BP is done in patch#10.

Patches 4-5 are improving the information exposed in debugfs. We are
exposing a new file to show which buffer pool is used by what channels
and how many buffers it currently has.

The 6th patch updates the dpni_set_pools() firmware API so that we are
capable of setting up a different buffer per queue in later patches.

In the 7th patch the generic dev_open/close APIs are used instead of the
dpaa2-eth internal ones.

Patches 8-9 are rearranging the existing code in dpaa2-eth.c in order to
create new functions which will be used in the XSK implementation in
dpaa2-xsk.c

Finally, the last 3 patches are adding the actual support for both the
Rx and Tx path of AF_XDP zero-copy and some associated tracepoints.
Details on the implementation can be found in the actual patch.

Changes in v2:
- 3/12: Export dpaa2_eth_allocate_dpbp/dpaa2_eth_free_dpbp in this
patch to avoid a build warning. The functions will be used in next
patches.
- 6/12: Use __le16 instead of u16 for the dpbp_id field.
- 12/12: Use xdp_buff->data_hard_start when tracing the BP seeding.

Changes in v3:
- 3/12: fix leaking of bp on error path
====================

Acked-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

+1089 -237
+1
MAINTAINERS
··· 6326 6326 F: drivers/net/ethernet/freescale/dpaa2/Makefile 6327 6327 F: drivers/net/ethernet/freescale/dpaa2/dpaa2-eth* 6328 6328 F: drivers/net/ethernet/freescale/dpaa2/dpaa2-mac* 6329 + F: drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk* 6329 6330 F: drivers/net/ethernet/freescale/dpaa2/dpkg.h 6330 6331 F: drivers/net/ethernet/freescale/dpaa2/dpmac* 6331 6332 F: drivers/net/ethernet/freescale/dpaa2/dpni*
+1 -1
drivers/net/ethernet/freescale/dpaa2/Makefile
··· 7 7 obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK) += fsl-dpaa2-ptp.o 8 8 obj-$(CONFIG_FSL_DPAA2_SWITCH) += fsl-dpaa2-switch.o 9 9 10 - fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpaa2-eth-devlink.o 10 + fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpaa2-eth-devlink.o dpaa2-xsk.o 11 11 fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o 12 12 fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o 13 13 fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o
+53 -4
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
··· 98 98 int i; 99 99 100 100 seq_printf(file, "Channel stats for %s:\n", priv->net_dev->name); 101 - seq_printf(file, "%s%16s%16s%16s%16s%16s%16s\n", 102 - "CHID", "CPU", "Deq busy", "Frames", "CDANs", 101 + seq_printf(file, "%s %5s%16s%16s%16s%16s%16s%16s\n", 102 + "IDX", "CHID", "CPU", "Deq busy", "Frames", "CDANs", 103 103 "Avg Frm/CDAN", "Buf count"); 104 104 105 105 for (i = 0; i < priv->num_channels; i++) { 106 106 ch = priv->channel[i]; 107 - seq_printf(file, "%4d%16d%16llu%16llu%16llu%16llu%16d\n", 108 - ch->ch_id, 107 + seq_printf(file, "%3s%d%6d%16d%16llu%16llu%16llu%16llu%16d\n", 108 + "CH#", i, ch->ch_id, 109 109 ch->nctx.desired_cpu, 110 110 ch->stats.dequeue_portal_busy, 111 111 ch->stats.frames, ··· 118 118 } 119 119 120 120 DEFINE_SHOW_ATTRIBUTE(dpaa2_dbg_ch); 121 + 122 + static int dpaa2_dbg_bp_show(struct seq_file *file, void *offset) 123 + { 124 + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private; 125 + int i, j, num_queues, buf_cnt; 126 + struct dpaa2_eth_bp *bp; 127 + char ch_name[10]; 128 + int err; 129 + 130 + /* Print out the header */ 131 + seq_printf(file, "Buffer pool info for %s:\n", priv->net_dev->name); 132 + seq_printf(file, "%s %10s%15s", "IDX", "BPID", "Buf count"); 133 + num_queues = dpaa2_eth_queue_count(priv); 134 + for (i = 0; i < num_queues; i++) { 135 + snprintf(ch_name, sizeof(ch_name), "CH#%d", i); 136 + seq_printf(file, "%10s", ch_name); 137 + } 138 + seq_printf(file, "\n"); 139 + 140 + /* For each buffer pool, print out its BPID, the number of buffers in 141 + * that buffer pool and the channels which are using it. 142 + */ 143 + for (i = 0; i < priv->num_bps; i++) { 144 + bp = priv->bp[i]; 145 + 146 + err = dpaa2_io_query_bp_count(NULL, bp->bpid, &buf_cnt); 147 + if (err) { 148 + netdev_warn(priv->net_dev, "Buffer count query error %d\n", err); 149 + return err; 150 + } 151 + 152 + seq_printf(file, "%3s%d%10d%15d", "BP#", i, bp->bpid, buf_cnt); 153 + for (j = 0; j < num_queues; j++) { 154 + if (priv->channel[j]->bp == bp) 155 + seq_printf(file, "%10s", "x"); 156 + else 157 + seq_printf(file, "%10s", ""); 158 + } 159 + seq_printf(file, "\n"); 160 + } 161 + 162 + return 0; 163 + } 164 + 165 + DEFINE_SHOW_ATTRIBUTE(dpaa2_dbg_bp); 121 166 122 167 void dpaa2_dbg_add(struct dpaa2_eth_priv *priv) 123 168 { ··· 184 139 185 140 /* per-fq stats file */ 186 141 debugfs_create_file("ch_stats", 0444, dir, priv, &dpaa2_dbg_ch_fops); 142 + 143 + /* per buffer pool stats file */ 144 + debugfs_create_file("bp_stats", 0444, dir, priv, &dpaa2_dbg_bp_fops); 145 + 187 146 } 188 147 189 148 void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv)
+87 -47
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-trace.h
··· 73 73 TP_ARGS(netdev, fd) 74 74 ); 75 75 76 + /* Tx (egress) XSK fd */ 77 + DEFINE_EVENT(dpaa2_eth_fd, dpaa2_tx_xsk_fd, 78 + TP_PROTO(struct net_device *netdev, 79 + const struct dpaa2_fd *fd), 80 + 81 + TP_ARGS(netdev, fd) 82 + ); 83 + 76 84 /* Rx fd */ 77 85 DEFINE_EVENT(dpaa2_eth_fd, dpaa2_rx_fd, 86 + TP_PROTO(struct net_device *netdev, 87 + const struct dpaa2_fd *fd), 88 + 89 + TP_ARGS(netdev, fd) 90 + ); 91 + 92 + /* Rx XSK fd */ 93 + DEFINE_EVENT(dpaa2_eth_fd, dpaa2_rx_xsk_fd, 78 94 TP_PROTO(struct net_device *netdev, 79 95 const struct dpaa2_fd *fd), 80 96 ··· 106 90 ); 107 91 108 92 /* Log data about raw buffers. Useful for tracing DPBP content. */ 109 - TRACE_EVENT(dpaa2_eth_buf_seed, 110 - /* Trace function prototype */ 111 - TP_PROTO(struct net_device *netdev, 112 - /* virtual address and size */ 113 - void *vaddr, 114 - size_t size, 115 - /* dma map address and size */ 116 - dma_addr_t dma_addr, 117 - size_t map_size, 118 - /* buffer pool id, if relevant */ 119 - u16 bpid), 93 + DECLARE_EVENT_CLASS(dpaa2_eth_buf, 94 + /* Trace function prototype */ 95 + TP_PROTO(struct net_device *netdev, 96 + /* virtual address and size */ 97 + void *vaddr, 98 + size_t size, 99 + /* dma map address and size */ 100 + dma_addr_t dma_addr, 101 + size_t map_size, 102 + /* buffer pool id, if relevant */ 103 + u16 bpid), 120 104 121 - /* Repeat argument list here */ 122 - TP_ARGS(netdev, vaddr, size, dma_addr, map_size, bpid), 105 + /* Repeat argument list here */ 106 + TP_ARGS(netdev, vaddr, size, dma_addr, map_size, bpid), 123 107 124 - /* A structure containing the relevant information we want 125 - * to record. Declare name and type for each normal element, 126 - * name, type and size for arrays. Use __string for variable 127 - * length strings. 128 - */ 129 - TP_STRUCT__entry( 130 - __field(void *, vaddr) 131 - __field(size_t, size) 132 - __field(dma_addr_t, dma_addr) 133 - __field(size_t, map_size) 134 - __field(u16, bpid) 135 - __string(name, netdev->name) 136 - ), 108 + /* A structure containing the relevant information we want 109 + * to record. Declare name and type for each normal element, 110 + * name, type and size for arrays. Use __string for variable 111 + * length strings. 112 + */ 113 + TP_STRUCT__entry( 114 + __field(void *, vaddr) 115 + __field(size_t, size) 116 + __field(dma_addr_t, dma_addr) 117 + __field(size_t, map_size) 118 + __field(u16, bpid) 119 + __string(name, netdev->name) 120 + ), 137 121 138 - /* The function that assigns values to the above declared 139 - * fields 140 - */ 141 - TP_fast_assign( 142 - __entry->vaddr = vaddr; 143 - __entry->size = size; 144 - __entry->dma_addr = dma_addr; 145 - __entry->map_size = map_size; 146 - __entry->bpid = bpid; 147 - __assign_str(name, netdev->name); 148 - ), 122 + /* The function that assigns values to the above declared 123 + * fields 124 + */ 125 + TP_fast_assign( 126 + __entry->vaddr = vaddr; 127 + __entry->size = size; 128 + __entry->dma_addr = dma_addr; 129 + __entry->map_size = map_size; 130 + __entry->bpid = bpid; 131 + __assign_str(name, netdev->name); 132 + ), 149 133 150 - /* This is what gets printed when the trace event is 151 - * triggered. 152 - */ 153 - TP_printk(TR_BUF_FMT, 154 - __get_str(name), 155 - __entry->vaddr, 156 - __entry->size, 157 - &__entry->dma_addr, 158 - __entry->map_size, 159 - __entry->bpid) 134 + /* This is what gets printed when the trace event is 135 + * triggered. 136 + */ 137 + TP_printk(TR_BUF_FMT, 138 + __get_str(name), 139 + __entry->vaddr, 140 + __entry->size, 141 + &__entry->dma_addr, 142 + __entry->map_size, 143 + __entry->bpid) 144 + ); 145 + 146 + /* Main memory buff seeding */ 147 + DEFINE_EVENT(dpaa2_eth_buf, dpaa2_eth_buf_seed, 148 + TP_PROTO(struct net_device *netdev, 149 + void *vaddr, 150 + size_t size, 151 + dma_addr_t dma_addr, 152 + size_t map_size, 153 + u16 bpid), 154 + 155 + TP_ARGS(netdev, vaddr, size, dma_addr, map_size, bpid) 156 + ); 157 + 158 + /* UMEM buff seeding on AF_XDP fast path */ 159 + DEFINE_EVENT(dpaa2_eth_buf, dpaa2_xsk_buf_seed, 160 + TP_PROTO(struct net_device *netdev, 161 + void *vaddr, 162 + size_t size, 163 + dma_addr_t dma_addr, 164 + size_t map_size, 165 + u16 bpid), 166 + 167 + TP_ARGS(netdev, vaddr, size, dma_addr, map_size, bpid) 160 168 ); 161 169 162 170 /* If only one event of a certain type needs to be declared, use TRACE_EVENT().
+332 -153
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
··· 1 1 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) 2 2 /* Copyright 2014-2016 Freescale Semiconductor Inc. 3 - * Copyright 2016-2020 NXP 3 + * Copyright 2016-2022 NXP 4 4 */ 5 5 #include <linux/init.h> 6 6 #include <linux/module.h> ··· 19 19 #include <net/pkt_cls.h> 20 20 #include <net/sock.h> 21 21 #include <net/tso.h> 22 + #include <net/xdp_sock_drv.h> 22 23 23 24 #include "dpaa2-eth.h" 24 25 ··· 105 104 priv->dpaa2_set_onestep_params_cb = dpaa2_update_ptp_onestep_direct; 106 105 } 107 106 108 - static void *dpaa2_iova_to_virt(struct iommu_domain *domain, 109 - dma_addr_t iova_addr) 107 + void *dpaa2_iova_to_virt(struct iommu_domain *domain, 108 + dma_addr_t iova_addr) 110 109 { 111 110 phys_addr_t phys_addr; 112 111 ··· 280 279 * be released in the pool 281 280 */ 282 281 static void dpaa2_eth_free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, 283 - int count) 282 + int count, bool xsk_zc) 284 283 { 285 284 struct device *dev = priv->net_dev->dev.parent; 285 + struct dpaa2_eth_swa *swa; 286 + struct xdp_buff *xdp_buff; 286 287 void *vaddr; 287 288 int i; 288 289 289 290 for (i = 0; i < count; i++) { 290 291 vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]); 291 - dma_unmap_page(dev, buf_array[i], priv->rx_buf_size, 292 - DMA_BIDIRECTIONAL); 293 - free_pages((unsigned long)vaddr, 0); 292 + 293 + if (!xsk_zc) { 294 + dma_unmap_page(dev, buf_array[i], priv->rx_buf_size, 295 + DMA_BIDIRECTIONAL); 296 + free_pages((unsigned long)vaddr, 0); 297 + } else { 298 + swa = (struct dpaa2_eth_swa *) 299 + (vaddr + DPAA2_ETH_RX_HWA_SIZE); 300 + xdp_buff = swa->xsk.xdp_buff; 301 + xsk_buff_free(xdp_buff); 302 + } 294 303 } 295 304 } 296 305 297 - static void dpaa2_eth_recycle_buf(struct dpaa2_eth_priv *priv, 298 - struct dpaa2_eth_channel *ch, 299 - dma_addr_t addr) 306 + void dpaa2_eth_recycle_buf(struct dpaa2_eth_priv *priv, 307 + struct dpaa2_eth_channel *ch, 308 + dma_addr_t addr) 300 309 { 301 310 int retries = 0; 302 311 int err; ··· 315 304 if (ch->recycled_bufs_cnt < DPAA2_ETH_BUFS_PER_CMD) 316 305 return; 317 306 318 - while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid, 307 + while ((err = dpaa2_io_service_release(ch->dpio, ch->bp->bpid, 319 308 ch->recycled_bufs, 320 309 ch->recycled_bufs_cnt)) == -EBUSY) { 321 310 if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) ··· 324 313 } 325 314 326 315 if (err) { 327 - dpaa2_eth_free_bufs(priv, ch->recycled_bufs, ch->recycled_bufs_cnt); 316 + dpaa2_eth_free_bufs(priv, ch->recycled_bufs, 317 + ch->recycled_bufs_cnt, ch->xsk_zc); 328 318 ch->buf_count -= ch->recycled_bufs_cnt; 329 319 } 330 320 ··· 389 377 fq->xdp_tx_fds.num = 0; 390 378 } 391 379 392 - static void dpaa2_eth_xdp_enqueue(struct dpaa2_eth_priv *priv, 393 - struct dpaa2_eth_channel *ch, 394 - struct dpaa2_fd *fd, 395 - void *buf_start, u16 queue_id) 380 + void dpaa2_eth_xdp_enqueue(struct dpaa2_eth_priv *priv, 381 + struct dpaa2_eth_channel *ch, 382 + struct dpaa2_fd *fd, 383 + void *buf_start, u16 queue_id) 396 384 { 397 385 struct dpaa2_faead *faead; 398 386 struct dpaa2_fd *dest_fd; ··· 497 485 return xdp_act; 498 486 } 499 487 500 - static struct sk_buff *dpaa2_eth_copybreak(struct dpaa2_eth_channel *ch, 501 - const struct dpaa2_fd *fd, 502 - void *fd_vaddr) 488 + struct sk_buff *dpaa2_eth_alloc_skb(struct dpaa2_eth_priv *priv, 489 + struct dpaa2_eth_channel *ch, 490 + const struct dpaa2_fd *fd, u32 fd_length, 491 + void *fd_vaddr) 503 492 { 504 493 u16 fd_offset = dpaa2_fd_get_offset(fd); 505 - struct dpaa2_eth_priv *priv = ch->priv; 506 - u32 fd_length = dpaa2_fd_get_len(fd); 507 494 struct sk_buff *skb = NULL; 508 495 unsigned int skb_len; 509 - 510 - if (fd_length > priv->rx_copybreak) 511 - return NULL; 512 496 513 497 skb_len = fd_length + dpaa2_eth_needed_headroom(NULL); 514 498 ··· 522 514 return skb; 523 515 } 524 516 517 + static struct sk_buff *dpaa2_eth_copybreak(struct dpaa2_eth_channel *ch, 518 + const struct dpaa2_fd *fd, 519 + void *fd_vaddr) 520 + { 521 + struct dpaa2_eth_priv *priv = ch->priv; 522 + u32 fd_length = dpaa2_fd_get_len(fd); 523 + 524 + if (fd_length > priv->rx_copybreak) 525 + return NULL; 526 + 527 + return dpaa2_eth_alloc_skb(priv, ch, fd, fd_length, fd_vaddr); 528 + } 529 + 530 + void dpaa2_eth_receive_skb(struct dpaa2_eth_priv *priv, 531 + struct dpaa2_eth_channel *ch, 532 + const struct dpaa2_fd *fd, void *vaddr, 533 + struct dpaa2_eth_fq *fq, 534 + struct rtnl_link_stats64 *percpu_stats, 535 + struct sk_buff *skb) 536 + { 537 + struct dpaa2_fas *fas; 538 + u32 status = 0; 539 + 540 + fas = dpaa2_get_fas(vaddr, false); 541 + prefetch(fas); 542 + prefetch(skb->data); 543 + 544 + /* Get the timestamp value */ 545 + if (priv->rx_tstamp) { 546 + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); 547 + __le64 *ts = dpaa2_get_ts(vaddr, false); 548 + u64 ns; 549 + 550 + memset(shhwtstamps, 0, sizeof(*shhwtstamps)); 551 + 552 + ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts); 553 + shhwtstamps->hwtstamp = ns_to_ktime(ns); 554 + } 555 + 556 + /* Check if we need to validate the L4 csum */ 557 + if (likely(dpaa2_fd_get_frc(fd) & DPAA2_FD_FRC_FASV)) { 558 + status = le32_to_cpu(fas->status); 559 + dpaa2_eth_validate_rx_csum(priv, status, skb); 560 + } 561 + 562 + skb->protocol = eth_type_trans(skb, priv->net_dev); 563 + skb_record_rx_queue(skb, fq->flowid); 564 + 565 + percpu_stats->rx_packets++; 566 + percpu_stats->rx_bytes += dpaa2_fd_get_len(fd); 567 + ch->stats.bytes_per_cdan += dpaa2_fd_get_len(fd); 568 + 569 + list_add_tail(&skb->list, ch->rx_list); 570 + } 571 + 525 572 /* Main Rx frame processing routine */ 526 - static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, 527 - struct dpaa2_eth_channel *ch, 528 - const struct dpaa2_fd *fd, 529 - struct dpaa2_eth_fq *fq) 573 + void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, 574 + struct dpaa2_eth_channel *ch, 575 + const struct dpaa2_fd *fd, 576 + struct dpaa2_eth_fq *fq) 530 577 { 531 578 dma_addr_t addr = dpaa2_fd_get_addr(fd); 532 579 u8 fd_format = dpaa2_fd_get_format(fd); ··· 590 527 struct rtnl_link_stats64 *percpu_stats; 591 528 struct dpaa2_eth_drv_stats *percpu_extras; 592 529 struct device *dev = priv->net_dev->dev.parent; 593 - struct dpaa2_fas *fas; 594 530 void *buf_data; 595 - u32 status = 0; 596 531 u32 xdp_act; 597 532 598 533 /* Tracing point */ ··· 600 539 dma_sync_single_for_cpu(dev, addr, priv->rx_buf_size, 601 540 DMA_BIDIRECTIONAL); 602 541 603 - fas = dpaa2_get_fas(vaddr, false); 604 - prefetch(fas); 605 542 buf_data = vaddr + dpaa2_fd_get_offset(fd); 606 543 prefetch(buf_data); 607 544 ··· 637 578 if (unlikely(!skb)) 638 579 goto err_build_skb; 639 580 640 - prefetch(skb->data); 641 - 642 - /* Get the timestamp value */ 643 - if (priv->rx_tstamp) { 644 - struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); 645 - __le64 *ts = dpaa2_get_ts(vaddr, false); 646 - u64 ns; 647 - 648 - memset(shhwtstamps, 0, sizeof(*shhwtstamps)); 649 - 650 - ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts); 651 - shhwtstamps->hwtstamp = ns_to_ktime(ns); 652 - } 653 - 654 - /* Check if we need to validate the L4 csum */ 655 - if (likely(dpaa2_fd_get_frc(fd) & DPAA2_FD_FRC_FASV)) { 656 - status = le32_to_cpu(fas->status); 657 - dpaa2_eth_validate_rx_csum(priv, status, skb); 658 - } 659 - 660 - skb->protocol = eth_type_trans(skb, priv->net_dev); 661 - skb_record_rx_queue(skb, fq->flowid); 662 - 663 - percpu_stats->rx_packets++; 664 - percpu_stats->rx_bytes += dpaa2_fd_get_len(fd); 665 - ch->stats.bytes_per_cdan += dpaa2_fd_get_len(fd); 666 - 667 - list_add_tail(&skb->list, ch->rx_list); 668 - 581 + dpaa2_eth_receive_skb(priv, ch, fd, vaddr, fq, percpu_stats, skb); 669 582 return; 670 583 671 584 err_build_skb: ··· 858 827 } 859 828 } 860 829 861 - static void *dpaa2_eth_sgt_get(struct dpaa2_eth_priv *priv) 830 + void *dpaa2_eth_sgt_get(struct dpaa2_eth_priv *priv) 862 831 { 863 832 struct dpaa2_eth_sgt_cache *sgt_cache; 864 833 void *sgt_buf = NULL; ··· 880 849 return sgt_buf; 881 850 } 882 851 883 - static void dpaa2_eth_sgt_recycle(struct dpaa2_eth_priv *priv, void *sgt_buf) 852 + void dpaa2_eth_sgt_recycle(struct dpaa2_eth_priv *priv, void *sgt_buf) 884 853 { 885 854 struct dpaa2_eth_sgt_cache *sgt_cache; 886 855 ··· 1115 1084 * This can be called either from dpaa2_eth_tx_conf() or on the error path of 1116 1085 * dpaa2_eth_tx(). 1117 1086 */ 1118 - static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, 1119 - struct dpaa2_eth_fq *fq, 1120 - const struct dpaa2_fd *fd, bool in_napi) 1087 + void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, 1088 + struct dpaa2_eth_channel *ch, 1089 + struct dpaa2_eth_fq *fq, 1090 + const struct dpaa2_fd *fd, bool in_napi) 1121 1091 { 1122 1092 struct device *dev = priv->net_dev->dev.parent; 1123 1093 dma_addr_t fd_addr, sg_addr; ··· 1185 1153 1186 1154 if (!swa->tso.is_last_fd) 1187 1155 should_free_skb = 0; 1156 + } else if (swa->type == DPAA2_ETH_SWA_XSK) { 1157 + /* Unmap the SGT Buffer */ 1158 + dma_unmap_single(dev, fd_addr, swa->xsk.sgt_size, 1159 + DMA_BIDIRECTIONAL); 1188 1160 } else { 1189 1161 skb = swa->single.skb; 1190 1162 ··· 1203 1167 } 1204 1168 } else { 1205 1169 netdev_dbg(priv->net_dev, "Invalid FD format\n"); 1170 + return; 1171 + } 1172 + 1173 + if (swa->type == DPAA2_ETH_SWA_XSK) { 1174 + ch->xsk_tx_pkts_sent++; 1175 + dpaa2_eth_sgt_recycle(priv, buffer_start); 1206 1176 return; 1207 1177 } 1208 1178 ··· 1386 1344 err_sgt_get: 1387 1345 /* Free all the other FDs that were already fully created */ 1388 1346 for (i = 0; i < index; i++) 1389 - dpaa2_eth_free_tx_fd(priv, NULL, &fd_start[i], false); 1347 + dpaa2_eth_free_tx_fd(priv, NULL, NULL, &fd_start[i], false); 1390 1348 1391 1349 return err; 1392 1350 } ··· 1502 1460 if (unlikely(err < 0)) { 1503 1461 percpu_stats->tx_errors++; 1504 1462 /* Clean up everything, including freeing the skb */ 1505 - dpaa2_eth_free_tx_fd(priv, fq, fd, false); 1463 + dpaa2_eth_free_tx_fd(priv, NULL, fq, fd, false); 1506 1464 netdev_tx_completed_queue(nq, 1, fd_len); 1507 1465 } else { 1508 1466 percpu_stats->tx_packets += total_enqueued; ··· 1595 1553 1596 1554 /* Check frame errors in the FD field */ 1597 1555 fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK; 1598 - dpaa2_eth_free_tx_fd(priv, fq, fd, true); 1556 + dpaa2_eth_free_tx_fd(priv, ch, fq, fd, true); 1599 1557 1600 1558 if (likely(!fd_errors)) 1601 1559 return; ··· 1673 1631 * to the specified buffer pool 1674 1632 */ 1675 1633 static int dpaa2_eth_add_bufs(struct dpaa2_eth_priv *priv, 1676 - struct dpaa2_eth_channel *ch, u16 bpid) 1634 + struct dpaa2_eth_channel *ch) 1677 1635 { 1636 + struct xdp_buff *xdp_buffs[DPAA2_ETH_BUFS_PER_CMD]; 1678 1637 struct device *dev = priv->net_dev->dev.parent; 1679 1638 u64 buf_array[DPAA2_ETH_BUFS_PER_CMD]; 1639 + struct dpaa2_eth_swa *swa; 1680 1640 struct page *page; 1681 1641 dma_addr_t addr; 1682 1642 int retries = 0; 1683 - int i, err; 1643 + int i = 0, err; 1644 + u32 batch; 1684 1645 1685 - for (i = 0; i < DPAA2_ETH_BUFS_PER_CMD; i++) { 1686 - /* Allocate buffer visible to WRIOP + skb shared info + 1687 - * alignment padding 1646 + /* Allocate buffers visible to WRIOP */ 1647 + if (!ch->xsk_zc) { 1648 + for (i = 0; i < DPAA2_ETH_BUFS_PER_CMD; i++) { 1649 + /* Also allocate skb shared info and alignment padding. 1650 + * There is one page for each Rx buffer. WRIOP sees 1651 + * the entire page except for a tailroom reserved for 1652 + * skb shared info 1653 + */ 1654 + page = dev_alloc_pages(0); 1655 + if (!page) 1656 + goto err_alloc; 1657 + 1658 + addr = dma_map_page(dev, page, 0, priv->rx_buf_size, 1659 + DMA_BIDIRECTIONAL); 1660 + if (unlikely(dma_mapping_error(dev, addr))) 1661 + goto err_map; 1662 + 1663 + buf_array[i] = addr; 1664 + 1665 + /* tracing point */ 1666 + trace_dpaa2_eth_buf_seed(priv->net_dev, 1667 + page_address(page), 1668 + DPAA2_ETH_RX_BUF_RAW_SIZE, 1669 + addr, priv->rx_buf_size, 1670 + ch->bp->bpid); 1671 + } 1672 + } else if (xsk_buff_can_alloc(ch->xsk_pool, DPAA2_ETH_BUFS_PER_CMD)) { 1673 + /* Allocate XSK buffers for AF_XDP fast path in batches 1674 + * of DPAA2_ETH_BUFS_PER_CMD. Bail out if the UMEM cannot 1675 + * provide enough buffers at the moment 1688 1676 */ 1689 - /* allocate one page for each Rx buffer. WRIOP sees 1690 - * the entire page except for a tailroom reserved for 1691 - * skb shared info 1692 - */ 1693 - page = dev_alloc_pages(0); 1694 - if (!page) 1677 + batch = xsk_buff_alloc_batch(ch->xsk_pool, xdp_buffs, 1678 + DPAA2_ETH_BUFS_PER_CMD); 1679 + if (!batch) 1695 1680 goto err_alloc; 1696 1681 1697 - addr = dma_map_page(dev, page, 0, priv->rx_buf_size, 1698 - DMA_BIDIRECTIONAL); 1699 - if (unlikely(dma_mapping_error(dev, addr))) 1700 - goto err_map; 1682 + for (i = 0; i < batch; i++) { 1683 + swa = (struct dpaa2_eth_swa *)(xdp_buffs[i]->data_hard_start + 1684 + DPAA2_ETH_RX_HWA_SIZE); 1685 + swa->xsk.xdp_buff = xdp_buffs[i]; 1701 1686 1702 - buf_array[i] = addr; 1687 + addr = xsk_buff_xdp_get_frame_dma(xdp_buffs[i]); 1688 + if (unlikely(dma_mapping_error(dev, addr))) 1689 + goto err_map; 1703 1690 1704 - /* tracing point */ 1705 - trace_dpaa2_eth_buf_seed(priv->net_dev, page_address(page), 1706 - DPAA2_ETH_RX_BUF_RAW_SIZE, 1707 - addr, priv->rx_buf_size, 1708 - bpid); 1691 + buf_array[i] = addr; 1692 + 1693 + trace_dpaa2_xsk_buf_seed(priv->net_dev, 1694 + xdp_buffs[i]->data_hard_start, 1695 + DPAA2_ETH_RX_BUF_RAW_SIZE, 1696 + addr, priv->rx_buf_size, 1697 + ch->bp->bpid); 1698 + } 1709 1699 } 1710 1700 1711 1701 release_bufs: 1712 1702 /* In case the portal is busy, retry until successful */ 1713 - while ((err = dpaa2_io_service_release(ch->dpio, bpid, 1703 + while ((err = dpaa2_io_service_release(ch->dpio, ch->bp->bpid, 1714 1704 buf_array, i)) == -EBUSY) { 1715 1705 if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) 1716 1706 break; ··· 1753 1679 * not much else we can do about it 1754 1680 */ 1755 1681 if (err) { 1756 - dpaa2_eth_free_bufs(priv, buf_array, i); 1682 + dpaa2_eth_free_bufs(priv, buf_array, i, ch->xsk_zc); 1757 1683 return 0; 1758 1684 } 1759 1685 1760 1686 return i; 1761 1687 1762 1688 err_map: 1763 - __free_pages(page, 0); 1689 + if (!ch->xsk_zc) { 1690 + __free_pages(page, 0); 1691 + } else { 1692 + for (; i < batch; i++) 1693 + xsk_buff_free(xdp_buffs[i]); 1694 + } 1764 1695 err_alloc: 1765 1696 /* If we managed to allocate at least some buffers, 1766 1697 * release them to hardware ··· 1776 1697 return 0; 1777 1698 } 1778 1699 1779 - static int dpaa2_eth_seed_pool(struct dpaa2_eth_priv *priv, u16 bpid) 1700 + static int dpaa2_eth_seed_pool(struct dpaa2_eth_priv *priv, 1701 + struct dpaa2_eth_channel *ch) 1780 1702 { 1781 - int i, j; 1703 + int i; 1782 1704 int new_count; 1783 1705 1784 - for (j = 0; j < priv->num_channels; j++) { 1785 - for (i = 0; i < DPAA2_ETH_NUM_BUFS; 1786 - i += DPAA2_ETH_BUFS_PER_CMD) { 1787 - new_count = dpaa2_eth_add_bufs(priv, priv->channel[j], bpid); 1788 - priv->channel[j]->buf_count += new_count; 1706 + for (i = 0; i < DPAA2_ETH_NUM_BUFS; i += DPAA2_ETH_BUFS_PER_CMD) { 1707 + new_count = dpaa2_eth_add_bufs(priv, ch); 1708 + ch->buf_count += new_count; 1789 1709 1790 - if (new_count < DPAA2_ETH_BUFS_PER_CMD) { 1791 - return -ENOMEM; 1792 - } 1793 - } 1710 + if (new_count < DPAA2_ETH_BUFS_PER_CMD) 1711 + return -ENOMEM; 1794 1712 } 1795 1713 1796 1714 return 0; 1797 1715 } 1798 1716 1717 + static void dpaa2_eth_seed_pools(struct dpaa2_eth_priv *priv) 1718 + { 1719 + struct net_device *net_dev = priv->net_dev; 1720 + struct dpaa2_eth_channel *channel; 1721 + int i, err = 0; 1722 + 1723 + for (i = 0; i < priv->num_channels; i++) { 1724 + channel = priv->channel[i]; 1725 + 1726 + err = dpaa2_eth_seed_pool(priv, channel); 1727 + 1728 + /* Not much to do; the buffer pool, though not filled up, 1729 + * may still contain some buffers which would enable us 1730 + * to limp on. 1731 + */ 1732 + if (err) 1733 + netdev_err(net_dev, "Buffer seeding failed for DPBP %d (bpid=%d)\n", 1734 + channel->bp->dev->obj_desc.id, 1735 + channel->bp->bpid); 1736 + } 1737 + } 1738 + 1799 1739 /* 1800 - * Drain the specified number of buffers from the DPNI's private buffer pool. 1740 + * Drain the specified number of buffers from one of the DPNI's private buffer 1741 + * pools. 1801 1742 * @count must not exceeed DPAA2_ETH_BUFS_PER_CMD 1802 1743 */ 1803 - static void dpaa2_eth_drain_bufs(struct dpaa2_eth_priv *priv, int count) 1744 + static void dpaa2_eth_drain_bufs(struct dpaa2_eth_priv *priv, int bpid, 1745 + int count) 1804 1746 { 1805 1747 u64 buf_array[DPAA2_ETH_BUFS_PER_CMD]; 1748 + bool xsk_zc = false; 1806 1749 int retries = 0; 1807 - int ret; 1750 + int i, ret; 1751 + 1752 + for (i = 0; i < priv->num_channels; i++) 1753 + if (priv->channel[i]->bp->bpid == bpid) 1754 + xsk_zc = priv->channel[i]->xsk_zc; 1808 1755 1809 1756 do { 1810 - ret = dpaa2_io_service_acquire(NULL, priv->bpid, 1811 - buf_array, count); 1757 + ret = dpaa2_io_service_acquire(NULL, bpid, buf_array, count); 1812 1758 if (ret < 0) { 1813 1759 if (ret == -EBUSY && 1814 1760 retries++ < DPAA2_ETH_SWP_BUSY_RETRIES) ··· 1841 1737 netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n"); 1842 1738 return; 1843 1739 } 1844 - dpaa2_eth_free_bufs(priv, buf_array, ret); 1740 + dpaa2_eth_free_bufs(priv, buf_array, ret, xsk_zc); 1845 1741 retries = 0; 1846 1742 } while (ret); 1847 1743 } 1848 1744 1849 - static void dpaa2_eth_drain_pool(struct dpaa2_eth_priv *priv) 1745 + static void dpaa2_eth_drain_pool(struct dpaa2_eth_priv *priv, int bpid) 1850 1746 { 1851 1747 int i; 1852 1748 1853 - dpaa2_eth_drain_bufs(priv, DPAA2_ETH_BUFS_PER_CMD); 1854 - dpaa2_eth_drain_bufs(priv, 1); 1749 + /* Drain the buffer pool */ 1750 + dpaa2_eth_drain_bufs(priv, bpid, DPAA2_ETH_BUFS_PER_CMD); 1751 + dpaa2_eth_drain_bufs(priv, bpid, 1); 1855 1752 1753 + /* Setup to zero the buffer count of all channels which were 1754 + * using this buffer pool. 1755 + */ 1856 1756 for (i = 0; i < priv->num_channels; i++) 1857 - priv->channel[i]->buf_count = 0; 1757 + if (priv->channel[i]->bp->bpid == bpid) 1758 + priv->channel[i]->buf_count = 0; 1759 + } 1760 + 1761 + static void dpaa2_eth_drain_pools(struct dpaa2_eth_priv *priv) 1762 + { 1763 + int i; 1764 + 1765 + for (i = 0; i < priv->num_bps; i++) 1766 + dpaa2_eth_drain_pool(priv, priv->bp[i]->bpid); 1858 1767 } 1859 1768 1860 1769 /* Function is called from softirq context only, so we don't need to guard 1861 1770 * the access to percpu count 1862 1771 */ 1863 1772 static int dpaa2_eth_refill_pool(struct dpaa2_eth_priv *priv, 1864 - struct dpaa2_eth_channel *ch, 1865 - u16 bpid) 1773 + struct dpaa2_eth_channel *ch) 1866 1774 { 1867 1775 int new_count; 1868 1776 ··· 1882 1766 return 0; 1883 1767 1884 1768 do { 1885 - new_count = dpaa2_eth_add_bufs(priv, ch, bpid); 1769 + new_count = dpaa2_eth_add_bufs(priv, ch); 1886 1770 if (unlikely(!new_count)) { 1887 1771 /* Out of memory; abort for now, we'll try later on */ 1888 1772 break; ··· 1946 1830 struct dpaa2_eth_fq *fq, *txc_fq = NULL; 1947 1831 struct netdev_queue *nq; 1948 1832 int store_cleaned, work_done; 1833 + bool work_done_zc = false; 1949 1834 struct list_head rx_list; 1950 1835 int retries = 0; 1951 1836 u16 flowid; ··· 1959 1842 INIT_LIST_HEAD(&rx_list); 1960 1843 ch->rx_list = &rx_list; 1961 1844 1845 + if (ch->xsk_zc) { 1846 + work_done_zc = dpaa2_xsk_tx(priv, ch); 1847 + /* If we reached the XSK Tx per NAPI threshold, we're done */ 1848 + if (work_done_zc) { 1849 + work_done = budget; 1850 + goto out; 1851 + } 1852 + } 1853 + 1962 1854 do { 1963 1855 err = dpaa2_eth_pull_channel(ch); 1964 1856 if (unlikely(err)) 1965 1857 break; 1966 1858 1967 1859 /* Refill pool if appropriate */ 1968 - dpaa2_eth_refill_pool(priv, ch, priv->bpid); 1860 + dpaa2_eth_refill_pool(priv, ch); 1969 1861 1970 1862 store_cleaned = dpaa2_eth_consume_frames(ch, &fq); 1971 1863 if (store_cleaned <= 0) ··· 2019 1893 2020 1894 out: 2021 1895 netif_receive_skb_list(ch->rx_list); 1896 + 1897 + if (ch->xsk_tx_pkts_sent) { 1898 + xsk_tx_completed(ch->xsk_pool, ch->xsk_tx_pkts_sent); 1899 + ch->xsk_tx_pkts_sent = 0; 1900 + } 2022 1901 2023 1902 if (txc_fq && txc_fq->dq_frames) { 2024 1903 nq = netdev_get_tx_queue(priv->net_dev, txc_fq->flowid); ··· 2178 2047 struct dpaa2_eth_priv *priv = netdev_priv(net_dev); 2179 2048 int err; 2180 2049 2181 - err = dpaa2_eth_seed_pool(priv, priv->bpid); 2182 - if (err) { 2183 - /* Not much to do; the buffer pool, though not filled up, 2184 - * may still contain some buffers which would enable us 2185 - * to limp on. 2186 - */ 2187 - netdev_err(net_dev, "Buffer seeding failed for DPBP %d (bpid=%d)\n", 2188 - priv->dpbp_dev->obj_desc.id, priv->bpid); 2189 - } 2050 + dpaa2_eth_seed_pools(priv); 2190 2051 2191 2052 if (!dpaa2_eth_is_type_phy(priv)) { 2192 2053 /* We'll only start the txqs when the link is actually ready; ··· 2211 2088 2212 2089 enable_err: 2213 2090 dpaa2_eth_disable_ch_napi(priv); 2214 - dpaa2_eth_drain_pool(priv); 2091 + dpaa2_eth_drain_pools(priv); 2215 2092 return err; 2216 2093 } 2217 2094 ··· 2316 2193 dpaa2_eth_disable_ch_napi(priv); 2317 2194 2318 2195 /* Empty the buffer pool */ 2319 - dpaa2_eth_drain_pool(priv); 2196 + dpaa2_eth_drain_pools(priv); 2320 2197 2321 2198 /* Empty the Scatter-Gather Buffer cache */ 2322 2199 dpaa2_eth_sgt_cache_drain(priv); ··· 2725 2602 need_update = (!!priv->xdp_prog != !!prog); 2726 2603 2727 2604 if (up) 2728 - dpaa2_eth_stop(dev); 2605 + dev_close(dev); 2729 2606 2730 2607 /* While in xdp mode, enforce a maximum Rx frame size based on MTU. 2731 2608 * Also, when switching between xdp/non-xdp modes we need to reconfigure ··· 2753 2630 } 2754 2631 2755 2632 if (up) { 2756 - err = dpaa2_eth_open(dev); 2633 + err = dev_open(dev, NULL); 2757 2634 if (err) 2758 2635 return err; 2759 2636 } ··· 2764 2641 if (prog) 2765 2642 bpf_prog_sub(prog, priv->num_channels); 2766 2643 if (up) 2767 - dpaa2_eth_open(dev); 2644 + dev_open(dev, NULL); 2768 2645 2769 2646 return err; 2770 2647 } ··· 2774 2651 switch (xdp->command) { 2775 2652 case XDP_SETUP_PROG: 2776 2653 return dpaa2_eth_setup_xdp(dev, xdp->prog); 2654 + case XDP_SETUP_XSK_POOL: 2655 + return dpaa2_xsk_setup_pool(dev, xdp->xsk.pool, xdp->xsk.queue_id); 2777 2656 default: 2778 2657 return -EINVAL; 2779 2658 } ··· 3006 2881 .ndo_change_mtu = dpaa2_eth_change_mtu, 3007 2882 .ndo_bpf = dpaa2_eth_xdp, 3008 2883 .ndo_xdp_xmit = dpaa2_eth_xdp_xmit, 2884 + .ndo_xsk_wakeup = dpaa2_xsk_wakeup, 3009 2885 .ndo_setup_tc = dpaa2_eth_setup_tc, 3010 2886 .ndo_vlan_rx_add_vid = dpaa2_eth_rx_add_vid, 3011 2887 .ndo_vlan_rx_kill_vid = dpaa2_eth_rx_kill_vid ··· 3021 2895 /* Update NAPI statistics */ 3022 2896 ch->stats.cdan++; 3023 2897 3024 - napi_schedule(&ch->napi); 2898 + /* NAPI can also be scheduled from the AF_XDP Tx path. Mark a missed 2899 + * so that it can be rescheduled again. 2900 + */ 2901 + if (!napi_if_scheduled_mark_missed(&ch->napi)) 2902 + napi_schedule(&ch->napi); 3025 2903 } 3026 2904 3027 2905 /* Allocate and configure a DPCON object */ ··· 3334 3204 dpaa2_eth_set_fq_affinity(priv); 3335 3205 } 3336 3206 3337 - /* Allocate and configure one buffer pool for each interface */ 3338 - static int dpaa2_eth_setup_dpbp(struct dpaa2_eth_priv *priv) 3207 + /* Allocate and configure a buffer pool */ 3208 + struct dpaa2_eth_bp *dpaa2_eth_allocate_dpbp(struct dpaa2_eth_priv *priv) 3339 3209 { 3340 - int err; 3341 - struct fsl_mc_device *dpbp_dev; 3342 3210 struct device *dev = priv->net_dev->dev.parent; 3211 + struct fsl_mc_device *dpbp_dev; 3343 3212 struct dpbp_attr dpbp_attrs; 3213 + struct dpaa2_eth_bp *bp; 3214 + int err; 3344 3215 3345 3216 err = fsl_mc_object_allocate(to_fsl_mc_device(dev), FSL_MC_POOL_DPBP, 3346 3217 &dpbp_dev); ··· 3350 3219 err = -EPROBE_DEFER; 3351 3220 else 3352 3221 dev_err(dev, "DPBP device allocation failed\n"); 3353 - return err; 3222 + return ERR_PTR(err); 3354 3223 } 3355 3224 3356 - priv->dpbp_dev = dpbp_dev; 3225 + bp = kzalloc(sizeof(*bp), GFP_KERNEL); 3226 + if (!bp) { 3227 + err = -ENOMEM; 3228 + goto err_alloc; 3229 + } 3357 3230 3358 - err = dpbp_open(priv->mc_io, 0, priv->dpbp_dev->obj_desc.id, 3231 + err = dpbp_open(priv->mc_io, 0, dpbp_dev->obj_desc.id, 3359 3232 &dpbp_dev->mc_handle); 3360 3233 if (err) { 3361 3234 dev_err(dev, "dpbp_open() failed\n"); ··· 3384 3249 dev_err(dev, "dpbp_get_attributes() failed\n"); 3385 3250 goto err_get_attr; 3386 3251 } 3387 - priv->bpid = dpbp_attrs.bpid; 3388 3252 3389 - return 0; 3253 + bp->dev = dpbp_dev; 3254 + bp->bpid = dpbp_attrs.bpid; 3255 + 3256 + return bp; 3390 3257 3391 3258 err_get_attr: 3392 3259 dpbp_disable(priv->mc_io, 0, dpbp_dev->mc_handle); ··· 3396 3259 err_reset: 3397 3260 dpbp_close(priv->mc_io, 0, dpbp_dev->mc_handle); 3398 3261 err_open: 3262 + kfree(bp); 3263 + err_alloc: 3399 3264 fsl_mc_object_free(dpbp_dev); 3400 3265 3401 - return err; 3266 + return ERR_PTR(err); 3402 3267 } 3403 3268 3404 - static void dpaa2_eth_free_dpbp(struct dpaa2_eth_priv *priv) 3269 + static int dpaa2_eth_setup_default_dpbp(struct dpaa2_eth_priv *priv) 3405 3270 { 3406 - dpaa2_eth_drain_pool(priv); 3407 - dpbp_disable(priv->mc_io, 0, priv->dpbp_dev->mc_handle); 3408 - dpbp_close(priv->mc_io, 0, priv->dpbp_dev->mc_handle); 3409 - fsl_mc_object_free(priv->dpbp_dev); 3271 + struct dpaa2_eth_bp *bp; 3272 + int i; 3273 + 3274 + bp = dpaa2_eth_allocate_dpbp(priv); 3275 + if (IS_ERR(bp)) 3276 + return PTR_ERR(bp); 3277 + 3278 + priv->bp[DPAA2_ETH_DEFAULT_BP_IDX] = bp; 3279 + priv->num_bps++; 3280 + 3281 + for (i = 0; i < priv->num_channels; i++) 3282 + priv->channel[i]->bp = bp; 3283 + 3284 + return 0; 3285 + } 3286 + 3287 + void dpaa2_eth_free_dpbp(struct dpaa2_eth_priv *priv, struct dpaa2_eth_bp *bp) 3288 + { 3289 + int idx_bp; 3290 + 3291 + /* Find the index at which this BP is stored */ 3292 + for (idx_bp = 0; idx_bp < priv->num_bps; idx_bp++) 3293 + if (priv->bp[idx_bp] == bp) 3294 + break; 3295 + 3296 + /* Drain the pool and disable the associated MC object */ 3297 + dpaa2_eth_drain_pool(priv, bp->bpid); 3298 + dpbp_disable(priv->mc_io, 0, bp->dev->mc_handle); 3299 + dpbp_close(priv->mc_io, 0, bp->dev->mc_handle); 3300 + fsl_mc_object_free(bp->dev); 3301 + kfree(bp); 3302 + 3303 + /* Move the last in use DPBP over in this position */ 3304 + priv->bp[idx_bp] = priv->bp[priv->num_bps - 1]; 3305 + priv->num_bps--; 3306 + } 3307 + 3308 + static void dpaa2_eth_free_dpbps(struct dpaa2_eth_priv *priv) 3309 + { 3310 + int i; 3311 + 3312 + for (i = 0; i < priv->num_bps; i++) 3313 + dpaa2_eth_free_dpbp(priv, priv->bp[i]); 3410 3314 } 3411 3315 3412 3316 static int dpaa2_eth_set_buffer_layout(struct dpaa2_eth_priv *priv) ··· 4332 4154 */ 4333 4155 static int dpaa2_eth_bind_dpni(struct dpaa2_eth_priv *priv) 4334 4156 { 4157 + struct dpaa2_eth_bp *bp = priv->bp[DPAA2_ETH_DEFAULT_BP_IDX]; 4335 4158 struct net_device *net_dev = priv->net_dev; 4159 + struct dpni_pools_cfg pools_params = { 0 }; 4336 4160 struct device *dev = net_dev->dev.parent; 4337 - struct dpni_pools_cfg pools_params; 4338 4161 struct dpni_error_cfg err_cfg; 4339 4162 int err = 0; 4340 4163 int i; 4341 4164 4342 4165 pools_params.num_dpbp = 1; 4343 - pools_params.pools[0].dpbp_id = priv->dpbp_dev->obj_desc.id; 4166 + pools_params.pools[0].dpbp_id = bp->dev->obj_desc.id; 4344 4167 pools_params.pools[0].backup_pool = 0; 4345 4168 pools_params.pools[0].buffer_size = priv->rx_buf_size; 4346 4169 err = dpni_set_pools(priv->mc_io, 0, priv->mc_token, &pools_params); ··· 4820 4641 4821 4642 dpaa2_eth_setup_fqs(priv); 4822 4643 4823 - err = dpaa2_eth_setup_dpbp(priv); 4644 + err = dpaa2_eth_setup_default_dpbp(priv); 4824 4645 if (err) 4825 4646 goto err_dpbp_setup; 4826 4647 ··· 4956 4777 err_alloc_percpu_stats: 4957 4778 dpaa2_eth_del_ch_napi(priv); 4958 4779 err_bind: 4959 - dpaa2_eth_free_dpbp(priv); 4780 + dpaa2_eth_free_dpbps(priv); 4960 4781 err_dpbp_setup: 4961 4782 dpaa2_eth_free_dpio(priv); 4962 4783 err_dpio_setup: ··· 5009 4830 free_percpu(priv->percpu_extras); 5010 4831 5011 4832 dpaa2_eth_del_ch_napi(priv); 5012 - dpaa2_eth_free_dpbp(priv); 4833 + dpaa2_eth_free_dpbps(priv); 5013 4834 dpaa2_eth_free_dpio(priv); 5014 4835 dpaa2_eth_free_dpni(priv); 5015 4836 if (priv->onestep_reg_base)
+93 -8
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
··· 1 1 /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ 2 2 /* Copyright 2014-2016 Freescale Semiconductor Inc. 3 - * Copyright 2016-2020 NXP 3 + * Copyright 2016-2022 NXP 4 4 */ 5 5 6 6 #ifndef __DPAA2_ETH_H ··· 52 52 * in a single NAPI call 53 53 */ 54 54 #define DPAA2_ETH_TXCONF_PER_NAPI 256 55 + 56 + /* Maximum number of Tx frames to be processed in a single NAPI 57 + * call when AF_XDP is running. Bind it to DPAA2_ETH_TXCONF_PER_NAPI 58 + * to maximize the throughput. 59 + */ 60 + #define DPAA2_ETH_TX_ZC_PER_NAPI DPAA2_ETH_TXCONF_PER_NAPI 55 61 56 62 /* Buffer qouta per channel. We want to keep in check number of ingress frames 57 63 * in flight: for small sized frames, congestion group taildrop may kick in ··· 115 109 #define DPAA2_ETH_RX_BUF_ALIGN_REV1 256 116 110 #define DPAA2_ETH_RX_BUF_ALIGN 64 117 111 112 + /* The firmware allows assigning multiple buffer pools to a single DPNI - 113 + * maximum 8 DPBP objects. By default, only the first DPBP (idx 0) is used for 114 + * all queues. Thus, when enabling AF_XDP we must accommodate up to 9 DPBPs 115 + * object: the default and 8 other distinct buffer pools, one for each queue. 116 + */ 117 + #define DPAA2_ETH_DEFAULT_BP_IDX 0 118 + #define DPAA2_ETH_MAX_BPS 9 119 + 118 120 /* We are accommodating a skb backpointer and some S/G info 119 121 * in the frame's software annotation. The hardware 120 122 * options are either 0 or 64, so we choose the latter. ··· 136 122 DPAA2_ETH_SWA_SINGLE, 137 123 DPAA2_ETH_SWA_SG, 138 124 DPAA2_ETH_SWA_XDP, 125 + DPAA2_ETH_SWA_XSK, 139 126 DPAA2_ETH_SWA_SW_TSO, 140 127 }; 141 128 ··· 158 143 int dma_size; 159 144 struct xdp_frame *xdpf; 160 145 } xdp; 146 + struct { 147 + struct xdp_buff *xdp_buff; 148 + int sgt_size; 149 + } xsk; 161 150 struct { 162 151 struct sk_buff *skb; 163 152 int num_sg; ··· 440 421 }; 441 422 442 423 struct dpaa2_eth_priv; 424 + struct dpaa2_eth_channel; 425 + struct dpaa2_eth_fq; 443 426 444 427 struct dpaa2_eth_xdp_fds { 445 428 struct dpaa2_fd fds[DEV_MAP_BULK_SIZE]; 446 429 ssize_t num; 447 430 }; 431 + 432 + typedef void dpaa2_eth_consume_cb_t(struct dpaa2_eth_priv *priv, 433 + struct dpaa2_eth_channel *ch, 434 + const struct dpaa2_fd *fd, 435 + struct dpaa2_eth_fq *fq); 448 436 449 437 struct dpaa2_eth_fq { 450 438 u32 fqid; ··· 465 439 struct dpaa2_eth_channel *channel; 466 440 enum dpaa2_eth_fq_type type; 467 441 468 - void (*consume)(struct dpaa2_eth_priv *priv, 469 - struct dpaa2_eth_channel *ch, 470 - const struct dpaa2_fd *fd, 471 - struct dpaa2_eth_fq *fq); 442 + dpaa2_eth_consume_cb_t *consume; 472 443 struct dpaa2_eth_fq_stats stats; 473 444 474 445 struct dpaa2_eth_xdp_fds xdp_redirect_fds; ··· 475 452 struct dpaa2_eth_ch_xdp { 476 453 struct bpf_prog *prog; 477 454 unsigned int res; 455 + }; 456 + 457 + struct dpaa2_eth_bp { 458 + struct fsl_mc_device *dev; 459 + int bpid; 478 460 }; 479 461 480 462 struct dpaa2_eth_channel { ··· 500 472 /* Buffers to be recycled back in the buffer pool */ 501 473 u64 recycled_bufs[DPAA2_ETH_BUFS_PER_CMD]; 502 474 int recycled_bufs_cnt; 475 + 476 + bool xsk_zc; 477 + int xsk_tx_pkts_sent; 478 + struct xsk_buff_pool *xsk_pool; 479 + struct dpaa2_eth_bp *bp; 503 480 }; 504 481 505 482 struct dpaa2_eth_dist_fields { ··· 539 506 540 507 #define DPAA2_ETH_DEFAULT_COPYBREAK 512 541 508 542 - #define DPAA2_ETH_ENQUEUE_MAX_FDS 200 509 + #define DPAA2_ETH_ENQUEUE_MAX_FDS 256 543 510 struct dpaa2_eth_fds { 544 511 struct dpaa2_fd array[DPAA2_ETH_ENQUEUE_MAX_FDS]; 545 512 }; ··· 568 535 u8 ptp_correction_off; 569 536 void (*dpaa2_set_onestep_params_cb)(struct dpaa2_eth_priv *priv, 570 537 u32 offset, u8 udp); 571 - struct fsl_mc_device *dpbp_dev; 572 538 u16 rx_buf_size; 573 - u16 bpid; 574 539 struct iommu_domain *iommu_domain; 575 540 576 541 enum hwtstamp_tx_types tx_tstamp_type; /* Tx timestamping type */ 577 542 bool rx_tstamp; /* Rx timestamping enabled */ 543 + 544 + /* Buffer pool management */ 545 + struct dpaa2_eth_bp *bp[DPAA2_ETH_MAX_BPS]; 546 + int num_bps; 578 547 579 548 u16 tx_qdid; 580 549 struct fsl_mc_io *mc_io; ··· 806 771 807 772 struct dpaa2_eth_trap_item *dpaa2_eth_dl_get_trap(struct dpaa2_eth_priv *priv, 808 773 struct dpaa2_fapr *fapr); 774 + 775 + struct dpaa2_eth_bp *dpaa2_eth_allocate_dpbp(struct dpaa2_eth_priv *priv); 776 + void dpaa2_eth_free_dpbp(struct dpaa2_eth_priv *priv, struct dpaa2_eth_bp *bp); 777 + 778 + struct sk_buff *dpaa2_eth_alloc_skb(struct dpaa2_eth_priv *priv, 779 + struct dpaa2_eth_channel *ch, 780 + const struct dpaa2_fd *fd, u32 fd_length, 781 + void *fd_vaddr); 782 + 783 + void dpaa2_eth_receive_skb(struct dpaa2_eth_priv *priv, 784 + struct dpaa2_eth_channel *ch, 785 + const struct dpaa2_fd *fd, void *vaddr, 786 + struct dpaa2_eth_fq *fq, 787 + struct rtnl_link_stats64 *percpu_stats, 788 + struct sk_buff *skb); 789 + 790 + void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, 791 + struct dpaa2_eth_channel *ch, 792 + const struct dpaa2_fd *fd, 793 + struct dpaa2_eth_fq *fq); 794 + 795 + struct dpaa2_eth_bp *dpaa2_eth_allocate_dpbp(struct dpaa2_eth_priv *priv); 796 + void dpaa2_eth_free_dpbp(struct dpaa2_eth_priv *priv, 797 + struct dpaa2_eth_bp *bp); 798 + 799 + void *dpaa2_iova_to_virt(struct iommu_domain *domain, dma_addr_t iova_addr); 800 + void dpaa2_eth_recycle_buf(struct dpaa2_eth_priv *priv, 801 + struct dpaa2_eth_channel *ch, 802 + dma_addr_t addr); 803 + 804 + void dpaa2_eth_xdp_enqueue(struct dpaa2_eth_priv *priv, 805 + struct dpaa2_eth_channel *ch, 806 + struct dpaa2_fd *fd, 807 + void *buf_start, u16 queue_id); 808 + 809 + int dpaa2_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); 810 + int dpaa2_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid); 811 + 812 + void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, 813 + struct dpaa2_eth_channel *ch, 814 + struct dpaa2_eth_fq *fq, 815 + const struct dpaa2_fd *fd, bool in_napi); 816 + bool dpaa2_xsk_tx(struct dpaa2_eth_priv *priv, 817 + struct dpaa2_eth_channel *ch); 818 + 819 + /* SGT (Scatter-Gather Table) cache management */ 820 + void *dpaa2_eth_sgt_get(struct dpaa2_eth_priv *priv); 821 + 822 + void dpaa2_eth_sgt_recycle(struct dpaa2_eth_priv *priv, void *sgt_buf); 823 + 809 824 #endif /* __DPAA2_H */
+41 -17
drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
··· 1 1 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) 2 2 /* Copyright 2014-2016 Freescale Semiconductor Inc. 3 - * Copyright 2016 NXP 4 - * Copyright 2020 NXP 3 + * Copyright 2016-2022 NXP 5 4 */ 6 5 7 6 #include <linux/net_tstamp.h> ··· 226 227 struct ethtool_stats *stats, 227 228 u64 *data) 228 229 { 229 - int i = 0; 230 - int j, k, err; 231 - int num_cnt; 232 - union dpni_statistics dpni_stats; 233 - u32 fcnt, bcnt; 234 - u32 fcnt_rx_total = 0, fcnt_tx_total = 0; 235 - u32 bcnt_rx_total = 0, bcnt_tx_total = 0; 236 - u32 buf_cnt; 237 230 struct dpaa2_eth_priv *priv = netdev_priv(net_dev); 238 - struct dpaa2_eth_drv_stats *extras; 239 - struct dpaa2_eth_ch_stats *ch_stats; 231 + union dpni_statistics dpni_stats; 240 232 int dpni_stats_page_size[DPNI_STATISTICS_CNT] = { 241 233 sizeof(dpni_stats.page_0), 242 234 sizeof(dpni_stats.page_1), ··· 237 247 sizeof(dpni_stats.page_5), 238 248 sizeof(dpni_stats.page_6), 239 249 }; 250 + u32 fcnt_rx_total = 0, fcnt_tx_total = 0; 251 + u32 bcnt_rx_total = 0, bcnt_tx_total = 0; 252 + struct dpaa2_eth_ch_stats *ch_stats; 253 + struct dpaa2_eth_drv_stats *extras; 254 + u32 buf_cnt, buf_cnt_total = 0; 255 + int j, k, err, num_cnt, i = 0; 256 + u32 fcnt, bcnt; 240 257 241 258 memset(data, 0, 242 259 sizeof(u64) * (DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS)); ··· 305 308 *(data + i++) = fcnt_tx_total; 306 309 *(data + i++) = bcnt_tx_total; 307 310 308 - err = dpaa2_io_query_bp_count(NULL, priv->bpid, &buf_cnt); 309 - if (err) { 310 - netdev_warn(net_dev, "Buffer count query error %d\n", err); 311 - return; 311 + for (j = 0; j < priv->num_bps; j++) { 312 + err = dpaa2_io_query_bp_count(NULL, priv->bp[j]->bpid, &buf_cnt); 313 + if (err) { 314 + netdev_warn(net_dev, "Buffer count query error %d\n", err); 315 + return; 316 + } 317 + buf_cnt_total += buf_cnt; 312 318 } 313 - *(data + i++) = buf_cnt; 319 + *(data + i++) = buf_cnt_total; 314 320 315 321 if (dpaa2_eth_has_mac(priv)) 316 322 dpaa2_mac_get_ethtool_stats(priv->mac, data + i); ··· 876 876 return err; 877 877 } 878 878 879 + static void dpaa2_eth_get_channels(struct net_device *net_dev, 880 + struct ethtool_channels *channels) 881 + { 882 + struct dpaa2_eth_priv *priv = netdev_priv(net_dev); 883 + int queue_count = dpaa2_eth_queue_count(priv); 884 + 885 + channels->max_rx = queue_count; 886 + channels->max_tx = queue_count; 887 + channels->rx_count = queue_count; 888 + channels->tx_count = queue_count; 889 + 890 + /* Tx confirmation and Rx error */ 891 + channels->max_other = queue_count + 1; 892 + channels->max_combined = channels->max_rx + 893 + channels->max_tx + 894 + channels->max_other; 895 + /* Tx conf and Rx err */ 896 + channels->other_count = queue_count + 1; 897 + channels->combined_count = channels->rx_count + 898 + channels->tx_count + 899 + channels->other_count; 900 + } 901 + 879 902 const struct ethtool_ops dpaa2_ethtool_ops = { 880 903 .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | 881 904 ETHTOOL_COALESCE_USE_ADAPTIVE_RX, ··· 919 896 .set_tunable = dpaa2_eth_set_tunable, 920 897 .get_coalesce = dpaa2_eth_get_coalesce, 921 898 .set_coalesce = dpaa2_eth_set_coalesce, 899 + .get_channels = dpaa2_eth_get_channels, 922 900 };
+454
drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk.c
··· 1 + // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) 2 + /* Copyright 2022 NXP 3 + */ 4 + #include <linux/filter.h> 5 + #include <linux/compiler.h> 6 + #include <linux/bpf_trace.h> 7 + #include <net/xdp.h> 8 + #include <net/xdp_sock_drv.h> 9 + 10 + #include "dpaa2-eth.h" 11 + 12 + static void dpaa2_eth_setup_consume_func(struct dpaa2_eth_priv *priv, 13 + struct dpaa2_eth_channel *ch, 14 + enum dpaa2_eth_fq_type type, 15 + dpaa2_eth_consume_cb_t *consume) 16 + { 17 + struct dpaa2_eth_fq *fq; 18 + int i; 19 + 20 + for (i = 0; i < priv->num_fqs; i++) { 21 + fq = &priv->fq[i]; 22 + 23 + if (fq->type != type) 24 + continue; 25 + if (fq->channel != ch) 26 + continue; 27 + 28 + fq->consume = consume; 29 + } 30 + } 31 + 32 + static u32 dpaa2_xsk_run_xdp(struct dpaa2_eth_priv *priv, 33 + struct dpaa2_eth_channel *ch, 34 + struct dpaa2_eth_fq *rx_fq, 35 + struct dpaa2_fd *fd, void *vaddr) 36 + { 37 + dma_addr_t addr = dpaa2_fd_get_addr(fd); 38 + struct bpf_prog *xdp_prog; 39 + struct xdp_buff *xdp_buff; 40 + struct dpaa2_eth_swa *swa; 41 + u32 xdp_act = XDP_PASS; 42 + int err; 43 + 44 + xdp_prog = READ_ONCE(ch->xdp.prog); 45 + if (!xdp_prog) 46 + goto out; 47 + 48 + swa = (struct dpaa2_eth_swa *)(vaddr + DPAA2_ETH_RX_HWA_SIZE + 49 + ch->xsk_pool->umem->headroom); 50 + xdp_buff = swa->xsk.xdp_buff; 51 + 52 + xdp_buff->data_hard_start = vaddr; 53 + xdp_buff->data = vaddr + dpaa2_fd_get_offset(fd); 54 + xdp_buff->data_end = xdp_buff->data + dpaa2_fd_get_len(fd); 55 + xdp_set_data_meta_invalid(xdp_buff); 56 + xdp_buff->rxq = &ch->xdp_rxq; 57 + 58 + xsk_buff_dma_sync_for_cpu(xdp_buff, ch->xsk_pool); 59 + xdp_act = bpf_prog_run_xdp(xdp_prog, xdp_buff); 60 + 61 + /* xdp.data pointer may have changed */ 62 + dpaa2_fd_set_offset(fd, xdp_buff->data - vaddr); 63 + dpaa2_fd_set_len(fd, xdp_buff->data_end - xdp_buff->data); 64 + 65 + if (likely(xdp_act == XDP_REDIRECT)) { 66 + err = xdp_do_redirect(priv->net_dev, xdp_buff, xdp_prog); 67 + if (unlikely(err)) { 68 + ch->stats.xdp_drop++; 69 + dpaa2_eth_recycle_buf(priv, ch, addr); 70 + } else { 71 + ch->buf_count--; 72 + ch->stats.xdp_redirect++; 73 + } 74 + 75 + goto xdp_redir; 76 + } 77 + 78 + switch (xdp_act) { 79 + case XDP_PASS: 80 + break; 81 + case XDP_TX: 82 + dpaa2_eth_xdp_enqueue(priv, ch, fd, vaddr, rx_fq->flowid); 83 + break; 84 + default: 85 + bpf_warn_invalid_xdp_action(priv->net_dev, xdp_prog, xdp_act); 86 + fallthrough; 87 + case XDP_ABORTED: 88 + trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act); 89 + fallthrough; 90 + case XDP_DROP: 91 + dpaa2_eth_recycle_buf(priv, ch, addr); 92 + ch->stats.xdp_drop++; 93 + break; 94 + } 95 + 96 + xdp_redir: 97 + ch->xdp.res |= xdp_act; 98 + out: 99 + return xdp_act; 100 + } 101 + 102 + /* Rx frame processing routine for the AF_XDP fast path */ 103 + static void dpaa2_xsk_rx(struct dpaa2_eth_priv *priv, 104 + struct dpaa2_eth_channel *ch, 105 + const struct dpaa2_fd *fd, 106 + struct dpaa2_eth_fq *fq) 107 + { 108 + dma_addr_t addr = dpaa2_fd_get_addr(fd); 109 + u8 fd_format = dpaa2_fd_get_format(fd); 110 + struct rtnl_link_stats64 *percpu_stats; 111 + u32 fd_length = dpaa2_fd_get_len(fd); 112 + struct sk_buff *skb; 113 + void *vaddr; 114 + u32 xdp_act; 115 + 116 + trace_dpaa2_rx_xsk_fd(priv->net_dev, fd); 117 + 118 + vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr); 119 + percpu_stats = this_cpu_ptr(priv->percpu_stats); 120 + 121 + if (fd_format != dpaa2_fd_single) { 122 + WARN_ON(priv->xdp_prog); 123 + /* AF_XDP doesn't support any other formats */ 124 + goto err_frame_format; 125 + } 126 + 127 + xdp_act = dpaa2_xsk_run_xdp(priv, ch, fq, (struct dpaa2_fd *)fd, vaddr); 128 + if (xdp_act != XDP_PASS) { 129 + percpu_stats->rx_packets++; 130 + percpu_stats->rx_bytes += dpaa2_fd_get_len(fd); 131 + return; 132 + } 133 + 134 + /* Build skb */ 135 + skb = dpaa2_eth_alloc_skb(priv, ch, fd, fd_length, vaddr); 136 + if (!skb) 137 + /* Nothing else we can do, recycle the buffer and 138 + * drop the frame. 139 + */ 140 + goto err_alloc_skb; 141 + 142 + /* Send the skb to the Linux networking stack */ 143 + dpaa2_eth_receive_skb(priv, ch, fd, vaddr, fq, percpu_stats, skb); 144 + 145 + return; 146 + 147 + err_alloc_skb: 148 + dpaa2_eth_recycle_buf(priv, ch, addr); 149 + err_frame_format: 150 + percpu_stats->rx_dropped++; 151 + } 152 + 153 + static void dpaa2_xsk_set_bp_per_qdbin(struct dpaa2_eth_priv *priv, 154 + struct dpni_pools_cfg *pools_params) 155 + { 156 + int curr_bp = 0, i, j; 157 + 158 + pools_params->pool_options = DPNI_POOL_ASSOC_QDBIN; 159 + for (i = 0; i < priv->num_bps; i++) { 160 + for (j = 0; j < priv->num_channels; j++) 161 + if (priv->bp[i] == priv->channel[j]->bp) 162 + pools_params->pools[curr_bp].priority_mask |= (1 << j); 163 + if (!pools_params->pools[curr_bp].priority_mask) 164 + continue; 165 + 166 + pools_params->pools[curr_bp].dpbp_id = priv->bp[i]->bpid; 167 + pools_params->pools[curr_bp].buffer_size = priv->rx_buf_size; 168 + pools_params->pools[curr_bp++].backup_pool = 0; 169 + } 170 + pools_params->num_dpbp = curr_bp; 171 + } 172 + 173 + static int dpaa2_xsk_disable_pool(struct net_device *dev, u16 qid) 174 + { 175 + struct xsk_buff_pool *pool = xsk_get_pool_from_qid(dev, qid); 176 + struct dpaa2_eth_priv *priv = netdev_priv(dev); 177 + struct dpni_pools_cfg pools_params = { 0 }; 178 + struct dpaa2_eth_channel *ch; 179 + int err; 180 + bool up; 181 + 182 + ch = priv->channel[qid]; 183 + if (!ch->xsk_pool) 184 + return -EINVAL; 185 + 186 + up = netif_running(dev); 187 + if (up) 188 + dev_close(dev); 189 + 190 + xsk_pool_dma_unmap(pool, 0); 191 + err = xdp_rxq_info_reg_mem_model(&ch->xdp_rxq, 192 + MEM_TYPE_PAGE_ORDER0, NULL); 193 + if (err) 194 + netdev_err(dev, "xsk_rxq_info_reg_mem_model() failed (err = %d)\n", 195 + err); 196 + 197 + dpaa2_eth_free_dpbp(priv, ch->bp); 198 + 199 + ch->xsk_zc = false; 200 + ch->xsk_pool = NULL; 201 + ch->xsk_tx_pkts_sent = 0; 202 + ch->bp = priv->bp[DPAA2_ETH_DEFAULT_BP_IDX]; 203 + 204 + dpaa2_eth_setup_consume_func(priv, ch, DPAA2_RX_FQ, dpaa2_eth_rx); 205 + 206 + dpaa2_xsk_set_bp_per_qdbin(priv, &pools_params); 207 + err = dpni_set_pools(priv->mc_io, 0, priv->mc_token, &pools_params); 208 + if (err) 209 + netdev_err(dev, "dpni_set_pools() failed\n"); 210 + 211 + if (up) { 212 + err = dev_open(dev, NULL); 213 + if (err) 214 + return err; 215 + } 216 + 217 + return 0; 218 + } 219 + 220 + static int dpaa2_xsk_enable_pool(struct net_device *dev, 221 + struct xsk_buff_pool *pool, 222 + u16 qid) 223 + { 224 + struct dpaa2_eth_priv *priv = netdev_priv(dev); 225 + struct dpni_pools_cfg pools_params = { 0 }; 226 + struct dpaa2_eth_channel *ch; 227 + int err, err2; 228 + bool up; 229 + 230 + if (priv->dpni_attrs.wriop_version < DPAA2_WRIOP_VERSION(3, 0, 0)) { 231 + netdev_err(dev, "AF_XDP zero-copy not supported on devices <= WRIOP(3, 0, 0)\n"); 232 + return -EOPNOTSUPP; 233 + } 234 + 235 + if (priv->dpni_attrs.num_queues > 8) { 236 + netdev_err(dev, "AF_XDP zero-copy not supported on DPNI with more then 8 queues\n"); 237 + return -EOPNOTSUPP; 238 + } 239 + 240 + up = netif_running(dev); 241 + if (up) 242 + dev_close(dev); 243 + 244 + err = xsk_pool_dma_map(pool, priv->net_dev->dev.parent, 0); 245 + if (err) { 246 + netdev_err(dev, "xsk_pool_dma_map() failed (err = %d)\n", 247 + err); 248 + goto err_dma_unmap; 249 + } 250 + 251 + ch = priv->channel[qid]; 252 + err = xdp_rxq_info_reg_mem_model(&ch->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); 253 + if (err) { 254 + netdev_err(dev, "xdp_rxq_info_reg_mem_model() failed (err = %d)\n", err); 255 + goto err_mem_model; 256 + } 257 + xsk_pool_set_rxq_info(pool, &ch->xdp_rxq); 258 + 259 + priv->bp[priv->num_bps] = dpaa2_eth_allocate_dpbp(priv); 260 + if (IS_ERR(priv->bp[priv->num_bps])) { 261 + err = PTR_ERR(priv->bp[priv->num_bps]); 262 + goto err_bp_alloc; 263 + } 264 + ch->xsk_zc = true; 265 + ch->xsk_pool = pool; 266 + ch->bp = priv->bp[priv->num_bps++]; 267 + 268 + dpaa2_eth_setup_consume_func(priv, ch, DPAA2_RX_FQ, dpaa2_xsk_rx); 269 + 270 + dpaa2_xsk_set_bp_per_qdbin(priv, &pools_params); 271 + err = dpni_set_pools(priv->mc_io, 0, priv->mc_token, &pools_params); 272 + if (err) { 273 + netdev_err(dev, "dpni_set_pools() failed\n"); 274 + goto err_set_pools; 275 + } 276 + 277 + if (up) { 278 + err = dev_open(dev, NULL); 279 + if (err) 280 + return err; 281 + } 282 + 283 + return 0; 284 + 285 + err_set_pools: 286 + err2 = dpaa2_xsk_disable_pool(dev, qid); 287 + if (err2) 288 + netdev_err(dev, "dpaa2_xsk_disable_pool() failed %d\n", err2); 289 + err_bp_alloc: 290 + err2 = xdp_rxq_info_reg_mem_model(&priv->channel[qid]->xdp_rxq, 291 + MEM_TYPE_PAGE_ORDER0, NULL); 292 + if (err2) 293 + netdev_err(dev, "xsk_rxq_info_reg_mem_model() failed with %d)\n", err2); 294 + err_mem_model: 295 + xsk_pool_dma_unmap(pool, 0); 296 + err_dma_unmap: 297 + if (up) 298 + dev_open(dev, NULL); 299 + 300 + return err; 301 + } 302 + 303 + int dpaa2_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid) 304 + { 305 + return pool ? dpaa2_xsk_enable_pool(dev, pool, qid) : 306 + dpaa2_xsk_disable_pool(dev, qid); 307 + } 308 + 309 + int dpaa2_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) 310 + { 311 + struct dpaa2_eth_priv *priv = netdev_priv(dev); 312 + struct dpaa2_eth_channel *ch = priv->channel[qid]; 313 + 314 + if (!priv->link_state.up) 315 + return -ENETDOWN; 316 + 317 + if (!priv->xdp_prog) 318 + return -EINVAL; 319 + 320 + if (!ch->xsk_zc) 321 + return -EINVAL; 322 + 323 + /* We do not have access to a per channel SW interrupt, so instead we 324 + * schedule a NAPI instance. 325 + */ 326 + if (!napi_if_scheduled_mark_missed(&ch->napi)) 327 + napi_schedule(&ch->napi); 328 + 329 + return 0; 330 + } 331 + 332 + static int dpaa2_xsk_tx_build_fd(struct dpaa2_eth_priv *priv, 333 + struct dpaa2_eth_channel *ch, 334 + struct dpaa2_fd *fd, 335 + struct xdp_desc *xdp_desc) 336 + { 337 + struct device *dev = priv->net_dev->dev.parent; 338 + struct dpaa2_sg_entry *sgt; 339 + struct dpaa2_eth_swa *swa; 340 + void *sgt_buf = NULL; 341 + dma_addr_t sgt_addr; 342 + int sgt_buf_size; 343 + dma_addr_t addr; 344 + int err = 0; 345 + 346 + /* Prepare the HW SGT structure */ 347 + sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry); 348 + sgt_buf = dpaa2_eth_sgt_get(priv); 349 + if (unlikely(!sgt_buf)) 350 + return -ENOMEM; 351 + sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); 352 + 353 + /* Get the address of the XSK Tx buffer */ 354 + addr = xsk_buff_raw_get_dma(ch->xsk_pool, xdp_desc->addr); 355 + xsk_buff_raw_dma_sync_for_device(ch->xsk_pool, addr, xdp_desc->len); 356 + 357 + /* Fill in the HW SGT structure */ 358 + dpaa2_sg_set_addr(sgt, addr); 359 + dpaa2_sg_set_len(sgt, xdp_desc->len); 360 + dpaa2_sg_set_final(sgt, true); 361 + 362 + /* Store the necessary info in the SGT buffer */ 363 + swa = (struct dpaa2_eth_swa *)sgt_buf; 364 + swa->type = DPAA2_ETH_SWA_XSK; 365 + swa->xsk.sgt_size = sgt_buf_size; 366 + 367 + /* Separately map the SGT buffer */ 368 + sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL); 369 + if (unlikely(dma_mapping_error(dev, sgt_addr))) { 370 + err = -ENOMEM; 371 + goto sgt_map_failed; 372 + } 373 + 374 + /* Initialize FD fields */ 375 + memset(fd, 0, sizeof(struct dpaa2_fd)); 376 + dpaa2_fd_set_offset(fd, priv->tx_data_offset); 377 + dpaa2_fd_set_format(fd, dpaa2_fd_sg); 378 + dpaa2_fd_set_addr(fd, sgt_addr); 379 + dpaa2_fd_set_len(fd, xdp_desc->len); 380 + dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA); 381 + 382 + return 0; 383 + 384 + sgt_map_failed: 385 + dpaa2_eth_sgt_recycle(priv, sgt_buf); 386 + 387 + return err; 388 + } 389 + 390 + bool dpaa2_xsk_tx(struct dpaa2_eth_priv *priv, 391 + struct dpaa2_eth_channel *ch) 392 + { 393 + struct xdp_desc *xdp_descs = ch->xsk_pool->tx_descs; 394 + struct dpaa2_eth_drv_stats *percpu_extras; 395 + struct rtnl_link_stats64 *percpu_stats; 396 + int budget = DPAA2_ETH_TX_ZC_PER_NAPI; 397 + int total_enqueued, enqueued; 398 + int retries, max_retries; 399 + struct dpaa2_eth_fq *fq; 400 + struct dpaa2_fd *fds; 401 + int batch, i, err; 402 + 403 + percpu_stats = this_cpu_ptr(priv->percpu_stats); 404 + percpu_extras = this_cpu_ptr(priv->percpu_extras); 405 + fds = (this_cpu_ptr(priv->fd))->array; 406 + 407 + /* Use the FQ with the same idx as the affine CPU */ 408 + fq = &priv->fq[ch->nctx.desired_cpu]; 409 + 410 + batch = xsk_tx_peek_release_desc_batch(ch->xsk_pool, budget); 411 + if (!batch) 412 + return false; 413 + 414 + /* Create a FD for each XSK frame to be sent */ 415 + for (i = 0; i < batch; i++) { 416 + err = dpaa2_xsk_tx_build_fd(priv, ch, &fds[i], &xdp_descs[i]); 417 + if (err) { 418 + batch = i; 419 + break; 420 + } 421 + 422 + trace_dpaa2_tx_xsk_fd(priv->net_dev, &fds[i]); 423 + } 424 + 425 + /* Enqueue all the created FDs */ 426 + max_retries = batch * DPAA2_ETH_ENQUEUE_RETRIES; 427 + total_enqueued = 0; 428 + enqueued = 0; 429 + retries = 0; 430 + while (total_enqueued < batch && retries < max_retries) { 431 + err = priv->enqueue(priv, fq, &fds[total_enqueued], 0, 432 + batch - total_enqueued, &enqueued); 433 + if (err == -EBUSY) { 434 + retries++; 435 + continue; 436 + } 437 + 438 + total_enqueued += enqueued; 439 + } 440 + percpu_extras->tx_portal_busy += retries; 441 + 442 + /* Update statistics */ 443 + percpu_stats->tx_packets += total_enqueued; 444 + for (i = 0; i < total_enqueued; i++) 445 + percpu_stats->tx_bytes += dpaa2_fd_get_len(&fds[i]); 446 + for (i = total_enqueued; i < batch; i++) { 447 + dpaa2_eth_free_tx_fd(priv, ch, fq, &fds[i], false); 448 + percpu_stats->tx_errors++; 449 + } 450 + 451 + xsk_tx_release(ch->xsk_pool); 452 + 453 + return total_enqueued == budget ? true : false; 454 + }
+13 -6
drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
··· 13 13 #define DPNI_VER_MINOR 0 14 14 #define DPNI_CMD_BASE_VERSION 1 15 15 #define DPNI_CMD_2ND_VERSION 2 16 + #define DPNI_CMD_3RD_VERSION 3 16 17 #define DPNI_CMD_ID_OFFSET 4 17 18 18 19 #define DPNI_CMD(id) (((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_BASE_VERSION) 19 20 #define DPNI_CMD_V2(id) (((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_2ND_VERSION) 21 + #define DPNI_CMD_V3(id) (((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_3RD_VERSION) 20 22 21 23 #define DPNI_CMDID_OPEN DPNI_CMD(0x801) 22 24 #define DPNI_CMDID_CLOSE DPNI_CMD(0x800) ··· 41 39 #define DPNI_CMDID_GET_IRQ_STATUS DPNI_CMD(0x016) 42 40 #define DPNI_CMDID_CLEAR_IRQ_STATUS DPNI_CMD(0x017) 43 41 44 - #define DPNI_CMDID_SET_POOLS DPNI_CMD(0x200) 42 + #define DPNI_CMDID_SET_POOLS DPNI_CMD_V3(0x200) 45 43 #define DPNI_CMDID_SET_ERRORS_BEHAVIOR DPNI_CMD(0x20B) 46 44 47 45 #define DPNI_CMDID_GET_QDID DPNI_CMD(0x210) ··· 117 115 }; 118 116 119 117 #define DPNI_BACKUP_POOL(val, order) (((val) & 0x1) << (order)) 118 + 119 + struct dpni_cmd_pool { 120 + __le16 dpbp_id; 121 + u8 priority_mask; 122 + u8 pad; 123 + }; 124 + 120 125 struct dpni_cmd_set_pools { 121 - /* cmd word 0 */ 122 126 u8 num_dpbp; 123 127 u8 backup_pool_mask; 124 - __le16 pad; 125 - /* cmd word 0..4 */ 126 - __le32 dpbp_id[DPNI_MAX_DPBP]; 127 - /* cmd word 4..6 */ 128 + u8 pad; 129 + u8 pool_options; 130 + struct dpni_cmd_pool pool[DPNI_MAX_DPBP]; 128 131 __le16 buffer_size[DPNI_MAX_DPBP]; 129 132 }; 130 133
+5 -1
drivers/net/ethernet/freescale/dpaa2/dpni.c
··· 173 173 token); 174 174 cmd_params = (struct dpni_cmd_set_pools *)cmd.params; 175 175 cmd_params->num_dpbp = cfg->num_dpbp; 176 + cmd_params->pool_options = cfg->pool_options; 176 177 for (i = 0; i < DPNI_MAX_DPBP; i++) { 177 - cmd_params->dpbp_id[i] = cpu_to_le32(cfg->pools[i].dpbp_id); 178 + cmd_params->pool[i].dpbp_id = 179 + cpu_to_le16(cfg->pools[i].dpbp_id); 180 + cmd_params->pool[i].priority_mask = 181 + cfg->pools[i].priority_mask; 178 182 cmd_params->buffer_size[i] = 179 183 cpu_to_le16(cfg->pools[i].buffer_size); 180 184 cmd_params->backup_pool_mask |=
+9
drivers/net/ethernet/freescale/dpaa2/dpni.h
··· 92 92 u32 cmd_flags, 93 93 u16 token); 94 94 95 + #define DPNI_POOL_ASSOC_QPRI 0 96 + #define DPNI_POOL_ASSOC_QDBIN 1 97 + 95 98 /** 96 99 * struct dpni_pools_cfg - Structure representing buffer pools configuration 97 100 * @num_dpbp: Number of DPBPs 101 + * @pool_options: Buffer assignment options. 102 + * This field is a combination of DPNI_POOL_ASSOC_flags 98 103 * @pools: Array of buffer pools parameters; The number of valid entries 99 104 * must match 'num_dpbp' value 100 105 * @pools.dpbp_id: DPBP object ID 106 + * @pools.priority: Priority mask that indicates TC's used with this buffer. 107 + * If set to 0x00 MC will assume value 0xff. 101 108 * @pools.buffer_size: Buffer size 102 109 * @pools.backup_pool: Backup pool 103 110 */ 104 111 struct dpni_pools_cfg { 105 112 u8 num_dpbp; 113 + u8 pool_options; 106 114 struct { 107 115 int dpbp_id; 116 + u8 priority_mask; 108 117 u16 buffer_size; 109 118 int backup_pool; 110 119 } pools[DPNI_MAX_DPBP];