Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tsnep: Add XDP socket zero-copy RX support

Add support for XSK zero-copy to RX path. The setup of the XSK pool can
be done at runtime. If the netdev is running, then the queue must be
disabled and enabled during reconfiguration. This can be done easily
with functions introduced in previous commits.

A more important property is that, if the netdev is running, then the
setup of the XSK pool shall not stop the netdev in case of errors. A
broken netdev after a failed XSK pool setup is bad behavior. Therefore,
the allocation and setup of resources during XSK pool setup is done only
before any queue is disabled. Additionally, freeing and later allocation
of resources is eliminated in some cases. Page pool entries are kept for
later use. Two memory models are registered in parallel. As a result,
the XSK pool setup cannot fail during queue reconfiguration.

In contrast to other drivers, XSK pool setup and XDP BPF program setup
are separate actions. XSK pool setup can be done without any XDP BPF
program. The XDP BPF program can be added, removed or changed without
any reconfiguration of the XSK pool.

Test results with A53 1.2GHz:

xdpsock rxdrop copy mode, 64 byte frames:
pps pkts 1.00
rx 856,054 10,625,775
Two CPUs with both 100% utilization.

xdpsock rxdrop zero-copy mode, 64 byte frames:
pps pkts 1.00
rx 889,388 4,615,284
Two CPUs with 100% and 20% utilization.

Packet rate increases and CPU utilization is reduced.

100% CPU load seems to the base load. This load is consumed by ksoftirqd
just for dropping the generated packets without xdpsock running.

Using batch API reduced CPU utilization slightly, but measurements are
not stable enough to provide meaningful numbers.

Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Gerhard Engleder and committed by
Jakub Kicinski
3fc23339 c2d64697

+556 -14
+12 -1
drivers/net/ethernet/engleder/tsnep.h
··· 101 101 102 102 u32 properties; 103 103 104 - struct page *page; 104 + union { 105 + struct page *page; 106 + struct xdp_buff *xdp; 107 + }; 105 108 size_t len; 106 109 dma_addr_t dma; 107 110 }; ··· 124 121 u32 owner_counter; 125 122 int increment_owner_counter; 126 123 struct page_pool *page_pool; 124 + struct page **page_buffer; 125 + struct xsk_buff_pool *xsk_pool; 126 + struct xdp_buff **xdp_batch; 127 127 128 128 u32 packets; 129 129 u32 bytes; ··· 135 129 u32 alloc_failed; 136 130 137 131 struct xdp_rxq_info xdp_rxq; 132 + struct xdp_rxq_info xdp_rxq_zc; 138 133 }; 139 134 140 135 struct tsnep_queue { ··· 221 214 222 215 int tsnep_xdp_setup_prog(struct tsnep_adapter *adapter, struct bpf_prog *prog, 223 216 struct netlink_ext_ack *extack); 217 + int tsnep_xdp_setup_pool(struct tsnep_adapter *adapter, 218 + struct xsk_buff_pool *pool, u16 queue_id); 224 219 225 220 #if IS_ENABLED(CONFIG_TSNEP_SELFTESTS) 226 221 int tsnep_ethtool_get_test_count(void); ··· 251 242 void tsnep_get_system_time(struct tsnep_adapter *adapter, u64 *time); 252 243 int tsnep_set_irq_coalesce(struct tsnep_queue *queue, u32 usecs); 253 244 u32 tsnep_get_irq_coalesce(struct tsnep_queue *queue); 245 + int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool); 246 + void tsnep_disable_xsk(struct tsnep_queue *queue); 254 247 255 248 #endif /* _TSNEP_H */
+478 -13
drivers/net/ethernet/engleder/tsnep_main.c
··· 28 28 #include <linux/iopoll.h> 29 29 #include <linux/bpf.h> 30 30 #include <linux/bpf_trace.h> 31 + #include <net/xdp_sock_drv.h> 31 32 32 33 #define TSNEP_RX_OFFSET (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN) 33 34 #define TSNEP_HEADROOM ALIGN(TSNEP_RX_OFFSET, 4) 34 35 #define TSNEP_MAX_RX_BUF_SIZE (PAGE_SIZE - TSNEP_HEADROOM - \ 35 36 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) 37 + /* XSK buffer shall store at least Q-in-Q frame */ 38 + #define TSNEP_XSK_RX_BUF_SIZE (ALIGN(TSNEP_RX_INLINE_METADATA_SIZE + \ 39 + ETH_FRAME_LEN + ETH_FCS_LEN + \ 40 + VLAN_HLEN * 2, 4)) 36 41 37 42 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT 38 43 #define DMA_ADDR_HIGH(dma_addr) ((u32)(((dma_addr) >> 32) & 0xFFFFFFFF)) ··· 782 777 783 778 for (i = 0; i < TSNEP_RING_SIZE; i++) { 784 779 entry = &rx->entry[i]; 785 - if (entry->page) 780 + if (!rx->xsk_pool && entry->page) 786 781 page_pool_put_full_page(rx->page_pool, entry->page, 787 782 false); 783 + if (rx->xsk_pool && entry->xdp) 784 + xsk_buff_free(entry->xdp); 785 + /* xdp is union with page */ 788 786 entry->page = NULL; 789 787 } 790 788 ··· 900 892 return rx->read - rx->write - 1; 901 893 } 902 894 895 + static void tsnep_rx_free_page_buffer(struct tsnep_rx *rx) 896 + { 897 + struct page **page; 898 + 899 + /* last entry of page_buffer is always zero, because ring cannot be 900 + * filled completely 901 + */ 902 + page = rx->page_buffer; 903 + while (*page) { 904 + page_pool_put_full_page(rx->page_pool, *page, false); 905 + *page = NULL; 906 + page++; 907 + } 908 + } 909 + 910 + static int tsnep_rx_alloc_page_buffer(struct tsnep_rx *rx) 911 + { 912 + int i; 913 + 914 + /* alloc for all ring entries except the last one, because ring cannot 915 + * be filled completely 916 + */ 917 + for (i = 0; i < TSNEP_RING_SIZE - 1; i++) { 918 + rx->page_buffer[i] = page_pool_dev_alloc_pages(rx->page_pool); 919 + if (!rx->page_buffer[i]) { 920 + tsnep_rx_free_page_buffer(rx); 921 + 922 + return -ENOMEM; 923 + } 924 + } 925 + 926 + return 0; 927 + } 928 + 903 929 static void tsnep_rx_set_page(struct tsnep_rx *rx, struct tsnep_rx_entry *entry, 904 930 struct page *page) 905 931 { ··· 969 927 { 970 928 struct tsnep_rx_entry *entry = &rx->entry[index]; 971 929 972 - /* TSNEP_MAX_RX_BUF_SIZE is a multiple of 4 */ 930 + /* TSNEP_MAX_RX_BUF_SIZE and TSNEP_XSK_RX_BUF_SIZE are multiple of 4 */ 973 931 entry->properties = entry->len & TSNEP_DESC_LENGTH_MASK; 974 932 entry->properties |= TSNEP_DESC_INTERRUPT_FLAG; 975 933 if (index == rx->increment_owner_counter) { ··· 1031 989 return desc_refilled; 1032 990 } 1033 991 992 + static void tsnep_rx_set_xdp(struct tsnep_rx *rx, struct tsnep_rx_entry *entry, 993 + struct xdp_buff *xdp) 994 + { 995 + entry->xdp = xdp; 996 + entry->len = TSNEP_XSK_RX_BUF_SIZE; 997 + entry->dma = xsk_buff_xdp_get_dma(entry->xdp); 998 + entry->desc->rx = __cpu_to_le64(entry->dma); 999 + } 1000 + 1001 + static void tsnep_rx_reuse_buffer_zc(struct tsnep_rx *rx, int index) 1002 + { 1003 + struct tsnep_rx_entry *entry = &rx->entry[index]; 1004 + struct tsnep_rx_entry *read = &rx->entry[rx->read]; 1005 + 1006 + tsnep_rx_set_xdp(rx, entry, read->xdp); 1007 + read->xdp = NULL; 1008 + } 1009 + 1010 + static int tsnep_rx_alloc_zc(struct tsnep_rx *rx, int count, bool reuse) 1011 + { 1012 + u32 allocated; 1013 + int i; 1014 + 1015 + allocated = xsk_buff_alloc_batch(rx->xsk_pool, rx->xdp_batch, count); 1016 + for (i = 0; i < allocated; i++) { 1017 + int index = (rx->write + i) & TSNEP_RING_MASK; 1018 + struct tsnep_rx_entry *entry = &rx->entry[index]; 1019 + 1020 + tsnep_rx_set_xdp(rx, entry, rx->xdp_batch[i]); 1021 + tsnep_rx_activate(rx, index); 1022 + } 1023 + if (i == 0) { 1024 + rx->alloc_failed++; 1025 + 1026 + if (reuse) { 1027 + tsnep_rx_reuse_buffer_zc(rx, rx->write); 1028 + tsnep_rx_activate(rx, rx->write); 1029 + } 1030 + } 1031 + 1032 + if (i) 1033 + rx->write = (rx->write + i) & TSNEP_RING_MASK; 1034 + 1035 + return i; 1036 + } 1037 + 1038 + static void tsnep_rx_free_zc(struct tsnep_rx *rx) 1039 + { 1040 + int i; 1041 + 1042 + for (i = 0; i < TSNEP_RING_SIZE; i++) { 1043 + struct tsnep_rx_entry *entry = &rx->entry[i]; 1044 + 1045 + if (entry->xdp) 1046 + xsk_buff_free(entry->xdp); 1047 + entry->xdp = NULL; 1048 + } 1049 + } 1050 + 1051 + static int tsnep_rx_refill_zc(struct tsnep_rx *rx, int count, bool reuse) 1052 + { 1053 + int desc_refilled; 1054 + 1055 + desc_refilled = tsnep_rx_alloc_zc(rx, count, reuse); 1056 + if (desc_refilled) 1057 + tsnep_rx_enable(rx); 1058 + 1059 + return desc_refilled; 1060 + } 1061 + 1034 1062 static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, 1035 1063 struct xdp_buff *xdp, int *status, 1036 1064 struct netdev_queue *tx_nq, struct tsnep_tx *tx) ··· 1112 1000 length = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM; 1113 1001 1114 1002 act = bpf_prog_run_xdp(prog, xdp); 1115 - 1116 - /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ 1117 - sync = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM; 1118 - sync = max(sync, length); 1119 - 1120 1003 switch (act) { 1121 1004 case XDP_PASS: 1122 1005 return false; ··· 1133 1026 trace_xdp_exception(rx->adapter->netdev, prog, act); 1134 1027 fallthrough; 1135 1028 case XDP_DROP: 1029 + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU 1030 + * touch 1031 + */ 1032 + sync = xdp->data_end - xdp->data_hard_start - 1033 + XDP_PACKET_HEADROOM; 1034 + sync = max(sync, length); 1136 1035 page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data), 1137 1036 sync, true); 1037 + return true; 1038 + } 1039 + } 1040 + 1041 + static bool tsnep_xdp_run_prog_zc(struct tsnep_rx *rx, struct bpf_prog *prog, 1042 + struct xdp_buff *xdp, int *status, 1043 + struct netdev_queue *tx_nq, 1044 + struct tsnep_tx *tx) 1045 + { 1046 + u32 act; 1047 + 1048 + act = bpf_prog_run_xdp(prog, xdp); 1049 + 1050 + /* XDP_REDIRECT is the main action for zero-copy */ 1051 + if (likely(act == XDP_REDIRECT)) { 1052 + if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0) 1053 + goto out_failure; 1054 + *status |= TSNEP_XDP_REDIRECT; 1055 + return true; 1056 + } 1057 + 1058 + switch (act) { 1059 + case XDP_PASS: 1060 + return false; 1061 + case XDP_TX: 1062 + if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx)) 1063 + goto out_failure; 1064 + *status |= TSNEP_XDP_TX; 1065 + return true; 1066 + default: 1067 + bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act); 1068 + fallthrough; 1069 + case XDP_ABORTED: 1070 + out_failure: 1071 + trace_xdp_exception(rx->adapter->netdev, prog, act); 1072 + fallthrough; 1073 + case XDP_DROP: 1074 + xsk_buff_free(xdp); 1138 1075 return true; 1139 1076 } 1140 1077 } ··· 1355 1204 return done; 1356 1205 } 1357 1206 1207 + static int tsnep_rx_poll_zc(struct tsnep_rx *rx, struct napi_struct *napi, 1208 + int budget) 1209 + { 1210 + struct tsnep_rx_entry *entry; 1211 + struct netdev_queue *tx_nq; 1212 + struct bpf_prog *prog; 1213 + struct tsnep_tx *tx; 1214 + int desc_available; 1215 + int xdp_status = 0; 1216 + struct page *page; 1217 + int done = 0; 1218 + int length; 1219 + 1220 + desc_available = tsnep_rx_desc_available(rx); 1221 + prog = READ_ONCE(rx->adapter->xdp_prog); 1222 + if (prog) { 1223 + tx_nq = netdev_get_tx_queue(rx->adapter->netdev, 1224 + rx->tx_queue_index); 1225 + tx = &rx->adapter->tx[rx->tx_queue_index]; 1226 + } 1227 + 1228 + while (likely(done < budget) && (rx->read != rx->write)) { 1229 + entry = &rx->entry[rx->read]; 1230 + if ((__le32_to_cpu(entry->desc_wb->properties) & 1231 + TSNEP_DESC_OWNER_COUNTER_MASK) != 1232 + (entry->properties & TSNEP_DESC_OWNER_COUNTER_MASK)) 1233 + break; 1234 + done++; 1235 + 1236 + if (desc_available >= TSNEP_RING_RX_REFILL) { 1237 + bool reuse = desc_available >= TSNEP_RING_RX_REUSE; 1238 + 1239 + desc_available -= tsnep_rx_refill_zc(rx, desc_available, 1240 + reuse); 1241 + if (!entry->xdp) { 1242 + /* buffer has been reused for refill to prevent 1243 + * empty RX ring, thus buffer cannot be used for 1244 + * RX processing 1245 + */ 1246 + rx->read = (rx->read + 1) & TSNEP_RING_MASK; 1247 + desc_available++; 1248 + 1249 + rx->dropped++; 1250 + 1251 + continue; 1252 + } 1253 + } 1254 + 1255 + /* descriptor properties shall be read first, because valid data 1256 + * is signaled there 1257 + */ 1258 + dma_rmb(); 1259 + 1260 + prefetch(entry->xdp->data); 1261 + length = __le32_to_cpu(entry->desc_wb->properties) & 1262 + TSNEP_DESC_LENGTH_MASK; 1263 + xsk_buff_set_size(entry->xdp, length); 1264 + xsk_buff_dma_sync_for_cpu(entry->xdp, rx->xsk_pool); 1265 + 1266 + /* RX metadata with timestamps is in front of actual data, 1267 + * subtract metadata size to get length of actual data and 1268 + * consider metadata size as offset of actual data during RX 1269 + * processing 1270 + */ 1271 + length -= TSNEP_RX_INLINE_METADATA_SIZE; 1272 + 1273 + rx->read = (rx->read + 1) & TSNEP_RING_MASK; 1274 + desc_available++; 1275 + 1276 + if (prog) { 1277 + bool consume; 1278 + 1279 + entry->xdp->data += TSNEP_RX_INLINE_METADATA_SIZE; 1280 + entry->xdp->data_meta += TSNEP_RX_INLINE_METADATA_SIZE; 1281 + 1282 + consume = tsnep_xdp_run_prog_zc(rx, prog, entry->xdp, 1283 + &xdp_status, tx_nq, tx); 1284 + if (consume) { 1285 + rx->packets++; 1286 + rx->bytes += length; 1287 + 1288 + entry->xdp = NULL; 1289 + 1290 + continue; 1291 + } 1292 + } 1293 + 1294 + page = page_pool_dev_alloc_pages(rx->page_pool); 1295 + if (page) { 1296 + memcpy(page_address(page) + TSNEP_RX_OFFSET, 1297 + entry->xdp->data - TSNEP_RX_INLINE_METADATA_SIZE, 1298 + length + TSNEP_RX_INLINE_METADATA_SIZE); 1299 + tsnep_rx_page(rx, napi, page, length); 1300 + } else { 1301 + rx->dropped++; 1302 + } 1303 + xsk_buff_free(entry->xdp); 1304 + entry->xdp = NULL; 1305 + } 1306 + 1307 + if (xdp_status) 1308 + tsnep_finalize_xdp(rx->adapter, xdp_status, tx_nq, tx); 1309 + 1310 + if (desc_available) 1311 + desc_available -= tsnep_rx_refill_zc(rx, desc_available, false); 1312 + 1313 + if (xsk_uses_need_wakeup(rx->xsk_pool)) { 1314 + if (desc_available) 1315 + xsk_set_rx_need_wakeup(rx->xsk_pool); 1316 + else 1317 + xsk_clear_rx_need_wakeup(rx->xsk_pool); 1318 + 1319 + return done; 1320 + } 1321 + 1322 + return desc_available ? budget : done; 1323 + } 1324 + 1358 1325 static bool tsnep_rx_pending(struct tsnep_rx *rx) 1359 1326 { 1360 1327 struct tsnep_rx_entry *entry; ··· 1500 1231 tsnep_rx_init(rx); 1501 1232 1502 1233 desc_available = tsnep_rx_desc_available(rx); 1503 - retval = tsnep_rx_alloc(rx, desc_available, false); 1234 + if (rx->xsk_pool) 1235 + retval = tsnep_rx_alloc_zc(rx, desc_available, false); 1236 + else 1237 + retval = tsnep_rx_alloc(rx, desc_available, false); 1504 1238 if (retval != desc_available) { 1505 - tsnep_rx_ring_cleanup(rx); 1239 + retval = -ENOMEM; 1506 1240 1507 - return -ENOMEM; 1241 + goto alloc_failed; 1242 + } 1243 + 1244 + /* prealloc pages to prevent allocation failures when XSK pool is 1245 + * disabled at runtime 1246 + */ 1247 + if (rx->xsk_pool) { 1248 + retval = tsnep_rx_alloc_page_buffer(rx); 1249 + if (retval) 1250 + goto alloc_failed; 1508 1251 } 1509 1252 1510 1253 return 0; 1254 + 1255 + alloc_failed: 1256 + tsnep_rx_ring_cleanup(rx); 1257 + return retval; 1511 1258 } 1512 1259 1513 1260 static void tsnep_rx_close(struct tsnep_rx *rx) 1514 1261 { 1262 + if (rx->xsk_pool) 1263 + tsnep_rx_free_page_buffer(rx); 1264 + 1515 1265 tsnep_rx_ring_cleanup(rx); 1266 + } 1267 + 1268 + static void tsnep_rx_reopen(struct tsnep_rx *rx) 1269 + { 1270 + struct page **page = rx->page_buffer; 1271 + int i; 1272 + 1273 + tsnep_rx_init(rx); 1274 + 1275 + for (i = 0; i < TSNEP_RING_SIZE; i++) { 1276 + struct tsnep_rx_entry *entry = &rx->entry[i]; 1277 + 1278 + /* defined initial values for properties are required for 1279 + * correct owner counter checking 1280 + */ 1281 + entry->desc->properties = 0; 1282 + entry->desc_wb->properties = 0; 1283 + 1284 + /* prevent allocation failures by reusing kept pages */ 1285 + if (*page) { 1286 + tsnep_rx_set_page(rx, entry, *page); 1287 + tsnep_rx_activate(rx, rx->write); 1288 + rx->write++; 1289 + 1290 + *page = NULL; 1291 + page++; 1292 + } 1293 + } 1294 + } 1295 + 1296 + static void tsnep_rx_reopen_xsk(struct tsnep_rx *rx) 1297 + { 1298 + struct page **page = rx->page_buffer; 1299 + u32 allocated; 1300 + int i; 1301 + 1302 + tsnep_rx_init(rx); 1303 + 1304 + /* alloc all ring entries except the last one, because ring cannot be 1305 + * filled completely, as many buffers as possible is enough as wakeup is 1306 + * done if new buffers are available 1307 + */ 1308 + allocated = xsk_buff_alloc_batch(rx->xsk_pool, rx->xdp_batch, 1309 + TSNEP_RING_SIZE - 1); 1310 + 1311 + for (i = 0; i < TSNEP_RING_SIZE; i++) { 1312 + struct tsnep_rx_entry *entry = &rx->entry[i]; 1313 + 1314 + /* keep pages to prevent allocation failures when xsk is 1315 + * disabled 1316 + */ 1317 + if (entry->page) { 1318 + *page = entry->page; 1319 + entry->page = NULL; 1320 + 1321 + page++; 1322 + } 1323 + 1324 + /* defined initial values for properties are required for 1325 + * correct owner counter checking 1326 + */ 1327 + entry->desc->properties = 0; 1328 + entry->desc_wb->properties = 0; 1329 + 1330 + if (allocated) { 1331 + tsnep_rx_set_xdp(rx, entry, 1332 + rx->xdp_batch[allocated - 1]); 1333 + tsnep_rx_activate(rx, rx->write); 1334 + rx->write++; 1335 + 1336 + allocated--; 1337 + } 1338 + } 1516 1339 } 1517 1340 1518 1341 static bool tsnep_pending(struct tsnep_queue *queue) ··· 1629 1268 complete = tsnep_tx_poll(queue->tx, budget); 1630 1269 1631 1270 if (queue->rx) { 1632 - done = tsnep_rx_poll(queue->rx, napi, budget); 1271 + done = queue->rx->xsk_pool ? 1272 + tsnep_rx_poll_zc(queue->rx, napi, budget) : 1273 + tsnep_rx_poll(queue->rx, napi, budget); 1633 1274 if (done >= budget) 1634 1275 complete = false; 1635 1276 } ··· 1712 1349 1713 1350 tsnep_free_irq(queue, first); 1714 1351 1715 - if (rx && xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1716 - xdp_rxq_info_unreg(&rx->xdp_rxq); 1352 + if (rx) { 1353 + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1354 + xdp_rxq_info_unreg(&rx->xdp_rxq); 1355 + if (xdp_rxq_info_is_reg(&rx->xdp_rxq_zc)) 1356 + xdp_rxq_info_unreg(&rx->xdp_rxq_zc); 1357 + } 1717 1358 1718 1359 netif_napi_del(&queue->napi); 1719 1360 } ··· 1740 1373 else 1741 1374 rx->tx_queue_index = 0; 1742 1375 1376 + /* prepare both memory models to eliminate possible registration 1377 + * errors when memory model is switched between page pool and 1378 + * XSK pool during runtime 1379 + */ 1743 1380 retval = xdp_rxq_info_reg(&rx->xdp_rxq, adapter->netdev, 1744 1381 rx->queue_index, queue->napi.napi_id); 1745 1382 if (retval) ··· 1753 1382 rx->page_pool); 1754 1383 if (retval) 1755 1384 goto failed; 1385 + retval = xdp_rxq_info_reg(&rx->xdp_rxq_zc, adapter->netdev, 1386 + rx->queue_index, queue->napi.napi_id); 1387 + if (retval) 1388 + goto failed; 1389 + retval = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq_zc, 1390 + MEM_TYPE_XSK_BUFF_POOL, 1391 + NULL); 1392 + if (retval) 1393 + goto failed; 1394 + if (rx->xsk_pool) 1395 + xsk_pool_set_rxq_info(rx->xsk_pool, &rx->xdp_rxq_zc); 1756 1396 } 1757 1397 1758 1398 retval = tsnep_request_irq(queue, first); ··· 1877 1495 } 1878 1496 1879 1497 return 0; 1498 + } 1499 + 1500 + int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool) 1501 + { 1502 + bool running = netif_running(queue->adapter->netdev); 1503 + u32 frame_size; 1504 + 1505 + frame_size = xsk_pool_get_rx_frame_size(pool); 1506 + if (frame_size < TSNEP_XSK_RX_BUF_SIZE) 1507 + return -EOPNOTSUPP; 1508 + 1509 + queue->rx->page_buffer = kcalloc(TSNEP_RING_SIZE, 1510 + sizeof(*queue->rx->page_buffer), 1511 + GFP_KERNEL); 1512 + if (!queue->rx->page_buffer) 1513 + return -ENOMEM; 1514 + queue->rx->xdp_batch = kcalloc(TSNEP_RING_SIZE, 1515 + sizeof(*queue->rx->xdp_batch), 1516 + GFP_KERNEL); 1517 + if (!queue->rx->xdp_batch) { 1518 + kfree(queue->rx->page_buffer); 1519 + queue->rx->page_buffer = NULL; 1520 + 1521 + return -ENOMEM; 1522 + } 1523 + 1524 + xsk_pool_set_rxq_info(pool, &queue->rx->xdp_rxq_zc); 1525 + 1526 + if (running) 1527 + tsnep_queue_disable(queue); 1528 + 1529 + queue->rx->xsk_pool = pool; 1530 + 1531 + if (running) { 1532 + tsnep_rx_reopen_xsk(queue->rx); 1533 + tsnep_queue_enable(queue); 1534 + } 1535 + 1536 + return 0; 1537 + } 1538 + 1539 + void tsnep_disable_xsk(struct tsnep_queue *queue) 1540 + { 1541 + bool running = netif_running(queue->adapter->netdev); 1542 + 1543 + if (running) 1544 + tsnep_queue_disable(queue); 1545 + 1546 + tsnep_rx_free_zc(queue->rx); 1547 + 1548 + queue->rx->xsk_pool = NULL; 1549 + 1550 + if (running) { 1551 + tsnep_rx_reopen(queue->rx); 1552 + tsnep_queue_enable(queue); 1553 + } 1554 + 1555 + kfree(queue->rx->xdp_batch); 1556 + queue->rx->xdp_batch = NULL; 1557 + kfree(queue->rx->page_buffer); 1558 + queue->rx->page_buffer = NULL; 1880 1559 } 1881 1560 1882 1561 static netdev_tx_t tsnep_netdev_xmit_frame(struct sk_buff *skb, ··· 2089 1646 switch (bpf->command) { 2090 1647 case XDP_SETUP_PROG: 2091 1648 return tsnep_xdp_setup_prog(adapter, bpf->prog, bpf->extack); 1649 + case XDP_SETUP_XSK_POOL: 1650 + return tsnep_xdp_setup_pool(adapter, bpf->xsk.pool, 1651 + bpf->xsk.queue_id); 2092 1652 default: 2093 1653 return -EOPNOTSUPP; 2094 1654 } ··· 2146 1700 return nxmit; 2147 1701 } 2148 1702 1703 + static int tsnep_netdev_xsk_wakeup(struct net_device *dev, u32 queue_id, 1704 + u32 flags) 1705 + { 1706 + struct tsnep_adapter *adapter = netdev_priv(dev); 1707 + struct tsnep_queue *queue; 1708 + 1709 + if (queue_id >= adapter->num_rx_queues || 1710 + queue_id >= adapter->num_tx_queues) 1711 + return -EINVAL; 1712 + 1713 + queue = &adapter->queue[queue_id]; 1714 + 1715 + if (!napi_if_scheduled_mark_missed(&queue->napi)) 1716 + napi_schedule(&queue->napi); 1717 + 1718 + return 0; 1719 + } 1720 + 2149 1721 static const struct net_device_ops tsnep_netdev_ops = { 2150 1722 .ndo_open = tsnep_netdev_open, 2151 1723 .ndo_stop = tsnep_netdev_close, ··· 2177 1713 .ndo_setup_tc = tsnep_tc_setup, 2178 1714 .ndo_bpf = tsnep_netdev_bpf, 2179 1715 .ndo_xdp_xmit = tsnep_netdev_xdp_xmit, 1716 + .ndo_xsk_wakeup = tsnep_netdev_xsk_wakeup, 2180 1717 }; 2181 1718 2182 1719 static int tsnep_mac_init(struct tsnep_adapter *adapter)
+66
drivers/net/ethernet/engleder/tsnep_xdp.c
··· 17 17 18 18 return 0; 19 19 } 20 + 21 + static int tsnep_xdp_enable_pool(struct tsnep_adapter *adapter, 22 + struct xsk_buff_pool *pool, u16 queue_id) 23 + { 24 + struct tsnep_queue *queue; 25 + int retval; 26 + 27 + if (queue_id >= adapter->num_rx_queues || 28 + queue_id >= adapter->num_tx_queues) 29 + return -EINVAL; 30 + 31 + queue = &adapter->queue[queue_id]; 32 + if (queue->rx->queue_index != queue_id || 33 + queue->tx->queue_index != queue_id) { 34 + netdev_err(adapter->netdev, 35 + "XSK support only for TX/RX queue pairs\n"); 36 + 37 + return -EOPNOTSUPP; 38 + } 39 + 40 + retval = xsk_pool_dma_map(pool, adapter->dmadev, 41 + DMA_ATTR_SKIP_CPU_SYNC); 42 + if (retval) { 43 + netdev_err(adapter->netdev, "failed to map XSK pool\n"); 44 + 45 + return retval; 46 + } 47 + 48 + retval = tsnep_enable_xsk(queue, pool); 49 + if (retval) { 50 + xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC); 51 + 52 + return retval; 53 + } 54 + 55 + return 0; 56 + } 57 + 58 + static int tsnep_xdp_disable_pool(struct tsnep_adapter *adapter, u16 queue_id) 59 + { 60 + struct xsk_buff_pool *pool; 61 + struct tsnep_queue *queue; 62 + 63 + if (queue_id >= adapter->num_rx_queues || 64 + queue_id >= adapter->num_tx_queues) 65 + return -EINVAL; 66 + 67 + pool = xsk_get_pool_from_qid(adapter->netdev, queue_id); 68 + if (!pool) 69 + return -EINVAL; 70 + 71 + queue = &adapter->queue[queue_id]; 72 + 73 + tsnep_disable_xsk(queue); 74 + 75 + xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC); 76 + 77 + return 0; 78 + } 79 + 80 + int tsnep_xdp_setup_pool(struct tsnep_adapter *adapter, 81 + struct xsk_buff_pool *pool, u16 queue_id) 82 + { 83 + return pool ? tsnep_xdp_enable_pool(adapter, pool, queue_id) : 84 + tsnep_xdp_disable_pool(adapter, queue_id); 85 + }