Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

libeth: xsk: add XSk xmit functions

Reuse core sending functions to send XSk xmit frames.
Both metadata and no metadata pools/driver are supported. libeth_xdp
also provides generic XSk metadata ops, currently with the checksum
offload only and for cases when HW doesn't require supplying L3/L4
checksum offsets. Drivers are free to pass their own ops.
&libeth_xdp_tx_bulk is not used here as it would be redundant;
pool->tx_descs are accessed directly.
Fake "libeth_xsktmo" is needed to hide implementation details from the
drivers when they want to use the generic ops: the original struct is
defined in the same file where dev->xsk_tx_metadata_ops gets set to
avoid duplication of slowpath; at the same time; XSk xmit functions
use local "fast" copy to inline XMO callbacks.
Tx descriptor filling loop is unrolled by 8.

Suggested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> # optimizations
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>

authored by

Alexander Lobakin and committed by
Tony Nguyen
40e846d1 b3ad8450

+248 -17
+2
drivers/net/ethernet/intel/libeth/priv.h
··· 13 13 struct skb_shared_info; 14 14 struct xdp_frame_bulk; 15 15 16 + extern const struct xsk_tx_metadata_ops libeth_xsktmo_slow; 17 + 16 18 void libeth_xsk_tx_return_bulk(const struct libeth_xdp_tx_frame *bq, 17 19 u32 count); 18 20
+12 -2
drivers/net/ethernet/intel/libeth/xdp.c
··· 376 376 * __libeth_xdp_set_features - set XDP features for netdev 377 377 * @dev: &net_device to configure 378 378 * @xmo: XDP metadata ops (Rx hints) 379 + * @zc_segs: maximum number of S/G frags the HW can transmit 380 + * @tmo: XSk Tx metadata ops (Tx hints) 379 381 * 380 382 * Set all the features libeth_xdp supports. Only the first argument is 381 - * necessary. 383 + * necessary; without the third one (zero), XSk support won't be advertised. 382 384 * Use the non-underscored versions in drivers instead. 383 385 */ 384 386 void __libeth_xdp_set_features(struct net_device *dev, 385 - const struct xdp_metadata_ops *xmo) 387 + const struct xdp_metadata_ops *xmo, 388 + u32 zc_segs, 389 + const struct xsk_tx_metadata_ops *tmo) 386 390 { 387 391 xdp_set_features_flag(dev, 388 392 NETDEV_XDP_ACT_BASIC | 389 393 NETDEV_XDP_ACT_REDIRECT | 390 394 NETDEV_XDP_ACT_NDO_XMIT | 395 + (zc_segs ? NETDEV_XDP_ACT_XSK_ZEROCOPY : 0) | 391 396 NETDEV_XDP_ACT_RX_SG | 392 397 NETDEV_XDP_ACT_NDO_XMIT_SG); 393 398 dev->xdp_metadata_ops = xmo; 399 + 400 + tmo = tmo == libeth_xsktmo ? &libeth_xsktmo_slow : tmo; 401 + 402 + dev->xdp_zc_max_segs = zc_segs ? : 1; 403 + dev->xsk_tx_metadata_ops = zc_segs ? tmo : NULL; 394 404 } 395 405 EXPORT_SYMBOL_GPL(__libeth_xdp_set_features); 396 406
+6
drivers/net/ethernet/intel/libeth/xsk.c
··· 18 18 libeth_xsk_buff_free_slow(bq[i].xsk); 19 19 } 20 20 21 + /* XSk TMO */ 22 + 23 + const struct xsk_tx_metadata_ops libeth_xsktmo_slow = { 24 + .tmo_request_checksum = libeth_xsktmo_req_csum, 25 + }; 26 + 21 27 /* Rx polling path */ 22 28 23 29 /**
+2 -2
include/net/libeth/tx.h
··· 12 12 13 13 /** 14 14 * enum libeth_sqe_type - type of &libeth_sqe to act on Tx completion 15 - * @LIBETH_SQE_EMPTY: unused/empty OR XDP_TX frag, no action required 15 + * @LIBETH_SQE_EMPTY: unused/empty OR XDP_TX/XSk frame, no action required 16 16 * @LIBETH_SQE_CTX: context descriptor with empty SQE, no action required 17 17 * @LIBETH_SQE_SLAB: kmalloc-allocated buffer, unmap and kfree() 18 18 * @LIBETH_SQE_FRAG: mapped skb frag, only unmap DMA ··· 93 93 * @bq: XDP frame bulk to combine return operations 94 94 * @ss: onstack NAPI stats to fill 95 95 * @xss: onstack XDPSQ NAPI stats to fill 96 - * @xdp_tx: number of XDP frames processed 96 + * @xdp_tx: number of XDP-not-XSk frames processed 97 97 * @napi: whether it's called from the NAPI context 98 98 * 99 99 * libeth uses this structure to access objects needed for performing full
+60 -13
include/net/libeth/xdp.h
··· 293 293 /** 294 294 * enum - &libeth_xdp_tx_frame and &libeth_xdp_tx_desc flags 295 295 * @LIBETH_XDP_TX_LEN: only for ``XDP_TX``, [15:0] of ::len_fl is actual length 296 + * @LIBETH_XDP_TX_CSUM: for XSk xmit, enable checksum offload 297 + * @LIBETH_XDP_TX_XSKMD: for XSk xmit, mask of the metadata bits 296 298 * @LIBETH_XDP_TX_FIRST: indicates the frag is the first one of the frame 297 299 * @LIBETH_XDP_TX_LAST: whether the frag is the last one of the frame 298 300 * @LIBETH_XDP_TX_MULTI: whether the frame contains several frags ··· 302 300 */ 303 301 enum { 304 302 LIBETH_XDP_TX_LEN = GENMASK(15, 0), 303 + 304 + LIBETH_XDP_TX_CSUM = XDP_TXMD_FLAGS_CHECKSUM, 305 + LIBETH_XDP_TX_XSKMD = LIBETH_XDP_TX_LEN, 305 306 306 307 LIBETH_XDP_TX_FIRST = BIT(16), 307 308 LIBETH_XDP_TX_LAST = BIT(17), ··· 325 320 * @len: frag length for XSk ``XDP_TX`` and .ndo_xdp_xmit() 326 321 * @flags: Tx flags for the above 327 322 * @opts: combined @len + @flags for the above for speed 323 + * @desc: XSk xmit descriptor for direct casting 328 324 */ 329 325 struct libeth_xdp_tx_frame { 330 326 union { ··· 355 349 aligned_u64 opts; 356 350 }; 357 351 }; 352 + 353 + /* XSk xmit */ 354 + struct xdp_desc desc; 358 355 }; 359 - } __aligned_largest; 356 + } __aligned(sizeof(struct xdp_desc)); 360 357 static_assert(offsetof(struct libeth_xdp_tx_frame, frag.len) == 361 358 offsetof(struct libeth_xdp_tx_frame, len_fl)); 359 + static_assert(sizeof(struct libeth_xdp_tx_frame) == sizeof(struct xdp_desc)); 362 360 363 361 /** 364 362 * struct libeth_xdp_tx_bulk - XDP Tx frame bulk for bulk sending ··· 373 363 * @count: current number of frames in @bulk 374 364 * @bulk: array of queued frames for bulk Tx 375 365 * 376 - * All XDP Tx operations queue each frame to the bulk first and flush it 377 - * when @count reaches the array end. Bulk is always placed on the stack 378 - * for performance. One bulk element contains all the data necessary 366 + * All XDP Tx operations except XSk xmit queue each frame to the bulk first 367 + * and flush it when @count reaches the array end. Bulk is always placed on 368 + * the stack for performance. One bulk element contains all the data necessary 379 369 * for sending a frame and then freeing it on completion. 370 + * For XSk xmit, Tx descriptor array from &xsk_buff_pool is casted directly 371 + * to &libeth_xdp_tx_frame as they are compatible and the bulk structure is 372 + * not used. 380 373 */ 381 374 struct libeth_xdp_tx_bulk { 382 375 const struct bpf_prog *prog; ··· 404 391 405 392 /** 406 393 * struct libeth_xdpsq - abstraction for an XDPSQ 407 - * @pool: XSk buffer pool for XSk ``XDP_TX`` 394 + * @pool: XSk buffer pool for XSk ``XDP_TX`` and xmit 408 395 * @sqes: array of Tx buffers from the actual queue struct 409 396 * @descs: opaque pointer to the HW descriptor array 410 397 * @ntu: pointer to the next free descriptor index 411 398 * @count: number of descriptors on that queue 412 399 * @pending: pointer to the number of sent-not-completed descs on that queue 413 - * @xdp_tx: pointer to the above 400 + * @xdp_tx: pointer to the above, but only for non-XSk-xmit frames 414 401 * @lock: corresponding XDPSQ lock 415 402 * 416 403 * Abstraction for driver-independent implementation of Tx. Placed on the stack ··· 452 439 } __aligned_largest; 453 440 454 441 /** 442 + * libeth_xdp_ptr_to_priv - convert pointer to a libeth_xdp u64 priv 443 + * @ptr: pointer to convert 444 + * 445 + * The main sending function passes private data as the largest scalar, u64. 446 + * Use this helper when you want to pass a pointer there. 447 + */ 448 + #define libeth_xdp_ptr_to_priv(ptr) ({ \ 449 + typecheck_pointer(ptr); \ 450 + ((u64)(uintptr_t)(ptr)); \ 451 + }) 452 + /** 453 + * libeth_xdp_priv_to_ptr - convert libeth_xdp u64 priv to a pointer 454 + * @priv: private data to convert 455 + * 456 + * The main sending function passes private data as the largest scalar, u64. 457 + * Use this helper when your callback takes this u64 and you want to convert 458 + * it back to a pointer. 459 + */ 460 + #define libeth_xdp_priv_to_ptr(priv) ({ \ 461 + static_assert(__same_type(priv, u64)); \ 462 + ((const void *)(uintptr_t)(priv)); \ 463 + }) 464 + 465 + /** 455 466 * libeth_xdp_tx_xmit_bulk - main XDP Tx function 456 467 * @bulk: array of frames to send 457 468 * @xdpsq: pointer to the driver-specific XDPSQ struct ··· 487 450 * @xmit: callback for filling a HW descriptor with the frame info 488 451 * 489 452 * Internal abstraction for placing @n XDP Tx frames on the HW XDPSQ. Used for 490 - * all types of frames. 453 + * all types of frames: ``XDP_TX``, .ndo_xdp_xmit(), XSk ``XDP_TX``, and XSk 454 + * xmit. 491 455 * @prep must lock the queue as this function releases it at the end. @unroll 492 - * greatly increases the object code size, but also greatly increases 493 - * performance. 456 + * greatly increases the object code size, but also greatly increases XSk xmit 457 + * performance; for other types of frames, it's not enabled. 494 458 * The compilers inline all those onstack abstractions to direct data accesses. 495 459 * 496 460 * Return: number of frames actually placed on the queue, <= @n. The function ··· 747 709 * @fill: libeth_xdp callback to fill &libeth_sqe and &libeth_xdp_tx_desc 748 710 * @xmit: driver callback to fill a HW descriptor 749 711 * 750 - * Internal abstraction to create bulk flush functions for drivers. 712 + * Internal abstraction to create bulk flush functions for drivers. Used for 713 + * everything except XSk xmit. 751 714 * 752 715 * Return: true if anything was sent, false otherwise. 753 716 */ ··· 1802 1763 u32 libeth_xdp_queue_threshold(u32 count); 1803 1764 1804 1765 void __libeth_xdp_set_features(struct net_device *dev, 1805 - const struct xdp_metadata_ops *xmo); 1766 + const struct xdp_metadata_ops *xmo, 1767 + u32 zc_segs, 1768 + const struct xsk_tx_metadata_ops *tmo); 1806 1769 void libeth_xdp_set_redirect(struct net_device *dev, bool enable); 1807 1770 1808 1771 /** ··· 1821 1780 COUNT_ARGS(__VA_ARGS__))(dev, ##__VA_ARGS__) 1822 1781 1823 1782 #define __libeth_xdp_feat0(dev) \ 1824 - __libeth_xdp_set_features(dev, NULL) 1783 + __libeth_xdp_set_features(dev, NULL, 0, NULL) 1825 1784 #define __libeth_xdp_feat1(dev, xmo) \ 1826 - __libeth_xdp_set_features(dev, xmo) 1785 + __libeth_xdp_set_features(dev, xmo, 0, NULL) 1786 + #define __libeth_xdp_feat2(dev, xmo, zc_segs) \ 1787 + __libeth_xdp_set_features(dev, xmo, zc_segs, NULL) 1788 + #define __libeth_xdp_feat3(dev, xmo, zc_segs, tmo) \ 1789 + __libeth_xdp_set_features(dev, xmo, zc_segs, tmo) 1827 1790 1828 1791 /** 1829 1792 * libeth_xdp_set_features_noredir - enable all libeth_xdp features w/o redir ··· 1847 1802 libeth_xdp_set_features(ud, ##__VA_ARGS__); \ 1848 1803 libeth_xdp_set_redirect(ud, false); \ 1849 1804 } while (0) 1805 + 1806 + #define libeth_xsktmo ((const void *)GOLDEN_RATIO_PRIME) 1850 1807 1851 1808 #endif /* __LIBETH_XDP_H */
+166
include/net/libeth/xsk.h
··· 7 7 #include <net/libeth/xdp.h> 8 8 #include <net/xdp_sock_drv.h> 9 9 10 + /* ``XDP_TXMD_FLAGS_VALID`` is defined only under ``CONFIG_XDP_SOCKETS`` */ 11 + #ifdef XDP_TXMD_FLAGS_VALID 12 + static_assert(XDP_TXMD_FLAGS_VALID <= LIBETH_XDP_TX_XSKMD); 13 + #endif 14 + 10 15 /* ``XDP_TX`` bulking */ 11 16 12 17 /** ··· 149 144 #define libeth_xsk_tx_flush_bulk(bq, flags, prep, xmit) \ 150 145 __libeth_xdp_tx_flush_bulk(bq, (flags) | LIBETH_XDP_TX_XSK, prep, \ 151 146 libeth_xsk_tx_fill_buf, xmit) 147 + 148 + /* XSk TMO */ 149 + 150 + /** 151 + * libeth_xsktmo_req_csum - XSk Tx metadata op to request checksum offload 152 + * @csum_start: unused 153 + * @csum_offset: unused 154 + * @priv: &libeth_xdp_tx_desc from the filling helper 155 + * 156 + * Generic implementation of ::tmo_request_checksum. Works only when HW doesn't 157 + * require filling checksum offsets and other parameters beside the checksum 158 + * request bit. 159 + * Consider using within @libeth_xsktmo unless the driver requires HW-specific 160 + * callbacks. 161 + */ 162 + static inline void libeth_xsktmo_req_csum(u16 csum_start, u16 csum_offset, 163 + void *priv) 164 + { 165 + ((struct libeth_xdp_tx_desc *)priv)->flags |= LIBETH_XDP_TX_CSUM; 166 + } 167 + 168 + /* Only to inline the callbacks below, use @libeth_xsktmo in drivers instead */ 169 + static const struct xsk_tx_metadata_ops __libeth_xsktmo = { 170 + .tmo_request_checksum = libeth_xsktmo_req_csum, 171 + }; 172 + 173 + /** 174 + * __libeth_xsk_xmit_fill_buf_md - internal helper to prepare XSk xmit w/meta 175 + * @xdesc: &xdp_desc from the XSk buffer pool 176 + * @sq: XDPSQ abstraction for the queue 177 + * @priv: XSk Tx metadata ops 178 + * 179 + * Same as __libeth_xsk_xmit_fill_buf(), but requests metadata pointer and 180 + * fills additional fields in &libeth_xdp_tx_desc to ask for metadata offload. 181 + * 182 + * Return: XDP Tx descriptor with the DMA, metadata request bits, and other 183 + * info to pass to the driver callback. 184 + */ 185 + static __always_inline struct libeth_xdp_tx_desc 186 + __libeth_xsk_xmit_fill_buf_md(const struct xdp_desc *xdesc, 187 + const struct libeth_xdpsq *sq, 188 + u64 priv) 189 + { 190 + const struct xsk_tx_metadata_ops *tmo = libeth_xdp_priv_to_ptr(priv); 191 + struct libeth_xdp_tx_desc desc; 192 + struct xdp_desc_ctx ctx; 193 + 194 + ctx = xsk_buff_raw_get_ctx(sq->pool, xdesc->addr); 195 + desc = (typeof(desc)){ 196 + .addr = ctx.dma, 197 + .len = xdesc->len, 198 + }; 199 + 200 + BUILD_BUG_ON(!__builtin_constant_p(tmo == libeth_xsktmo)); 201 + tmo = tmo == libeth_xsktmo ? &__libeth_xsktmo : tmo; 202 + 203 + xsk_tx_metadata_request(ctx.meta, tmo, &desc); 204 + 205 + return desc; 206 + } 207 + 208 + /* XSk xmit implementation */ 209 + 210 + /** 211 + * __libeth_xsk_xmit_fill_buf - internal helper to prepare XSk xmit w/o meta 212 + * @xdesc: &xdp_desc from the XSk buffer pool 213 + * @sq: XDPSQ abstraction for the queue 214 + * 215 + * Return: XDP Tx descriptor with the DMA and other info to pass to 216 + * the driver callback. 217 + */ 218 + static inline struct libeth_xdp_tx_desc 219 + __libeth_xsk_xmit_fill_buf(const struct xdp_desc *xdesc, 220 + const struct libeth_xdpsq *sq) 221 + { 222 + return (struct libeth_xdp_tx_desc){ 223 + .addr = xsk_buff_raw_get_dma(sq->pool, xdesc->addr), 224 + .len = xdesc->len, 225 + }; 226 + } 227 + 228 + /** 229 + * libeth_xsk_xmit_fill_buf - internal helper to prepare an XSk xmit 230 + * @frm: &xdp_desc from the XSk buffer pool 231 + * @i: index on the HW queue 232 + * @sq: XDPSQ abstraction for the queue 233 + * @priv: XSk Tx metadata ops 234 + * 235 + * Depending on the metadata ops presence (determined at compile time), calls 236 + * the quickest helper to build a libeth XDP Tx descriptor. 237 + * 238 + * Return: XDP Tx descriptor with the synced DMA, metadata request bits, 239 + * and other info to pass to the driver callback. 240 + */ 241 + static __always_inline struct libeth_xdp_tx_desc 242 + libeth_xsk_xmit_fill_buf(struct libeth_xdp_tx_frame frm, u32 i, 243 + const struct libeth_xdpsq *sq, u64 priv) 244 + { 245 + struct libeth_xdp_tx_desc desc; 246 + 247 + if (priv) 248 + desc = __libeth_xsk_xmit_fill_buf_md(&frm.desc, sq, priv); 249 + else 250 + desc = __libeth_xsk_xmit_fill_buf(&frm.desc, sq); 251 + 252 + desc.flags |= xsk_is_eop_desc(&frm.desc) ? LIBETH_XDP_TX_LAST : 0; 253 + 254 + xsk_buff_raw_dma_sync_for_device(sq->pool, desc.addr, desc.len); 255 + 256 + return desc; 257 + } 258 + 259 + /** 260 + * libeth_xsk_xmit_do_bulk - send XSk xmit frames 261 + * @pool: XSk buffer pool containing the frames to send 262 + * @xdpsq: opaque pointer to driver's XDPSQ struct 263 + * @budget: maximum number of frames can be sent 264 + * @tmo: optional XSk Tx metadata ops 265 + * @prep: driver callback to build a &libeth_xdpsq 266 + * @xmit: driver callback to put frames to a HW queue 267 + * @finalize: driver callback to start a transmission 268 + * 269 + * Implements generic XSk xmit. Always turns on XSk Tx wakeup as it's assumed 270 + * lazy cleaning is used and interrupts are disabled for the queue. 271 + * HW descriptor filling is unrolled by ``LIBETH_XDP_TX_BATCH`` to optimize 272 + * writes. 273 + * Note that unlike other XDP Tx ops, the queue must be locked and cleaned 274 + * prior to calling this function to already know available @budget. 275 + * @prepare must only build a &libeth_xdpsq and return ``U32_MAX``. 276 + * 277 + * Return: false if @budget was exhausted, true otherwise. 278 + */ 279 + static __always_inline bool 280 + libeth_xsk_xmit_do_bulk(struct xsk_buff_pool *pool, void *xdpsq, u32 budget, 281 + const struct xsk_tx_metadata_ops *tmo, 282 + u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), 283 + void (*xmit)(struct libeth_xdp_tx_desc desc, u32 i, 284 + const struct libeth_xdpsq *sq, u64 priv), 285 + void (*finalize)(void *xdpsq, bool sent, bool flush)) 286 + { 287 + const struct libeth_xdp_tx_frame *bulk; 288 + bool wake; 289 + u32 n; 290 + 291 + wake = xsk_uses_need_wakeup(pool); 292 + if (wake) 293 + xsk_clear_tx_need_wakeup(pool); 294 + 295 + n = xsk_tx_peek_release_desc_batch(pool, budget); 296 + bulk = container_of(&pool->tx_descs[0], typeof(*bulk), desc); 297 + 298 + libeth_xdp_tx_xmit_bulk(bulk, xdpsq, n, true, 299 + libeth_xdp_ptr_to_priv(tmo), prep, 300 + libeth_xsk_xmit_fill_buf, xmit); 301 + finalize(xdpsq, n, true); 302 + 303 + if (wake) 304 + xsk_set_tx_need_wakeup(pool); 305 + 306 + return n < budget; 307 + } 152 308 153 309 #endif /* __LIBETH_XSK_H */