Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

libeth: xsk: add XSkFQ refill and XSk wakeup helpers

XSkFQ refill is pretty generic across the drivers minus FQ descriptor
filling and can easily be unified with one inline callback.
XSk wakeup is usually not, but here, instead of commonly used
"SW interrupts", I picked firing an IPI. In most tests, it showed better
performance; it also provides better control for userspace on which CPU
will handle the xmit, as SW interrupts honor IRQ affinity no matter
which core produces XSk xmit descs (while XDPSQs are associated 1:1
with cores having the same ID).

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>

authored by

Alexander Lobakin and committed by
Tony Nguyen
3ced71a8 5495c58c

+222
+124
drivers/net/ethernet/intel/libeth/xsk.c
··· 145 145 146 146 return __ret; 147 147 } 148 + 149 + /* Refill */ 150 + 151 + /** 152 + * libeth_xskfq_create - create an XSkFQ 153 + * @fq: fill queue to initialize 154 + * 155 + * Allocates the FQEs and initializes the fields used by libeth_xdp: number 156 + * of buffers to refill, refill threshold and buffer len. 157 + * 158 + * Return: %0 on success, -errno otherwise. 159 + */ 160 + int libeth_xskfq_create(struct libeth_xskfq *fq) 161 + { 162 + fq->fqes = kvcalloc_node(fq->count, sizeof(*fq->fqes), GFP_KERNEL, 163 + fq->nid); 164 + if (!fq->fqes) 165 + return -ENOMEM; 166 + 167 + fq->pending = fq->count; 168 + fq->thresh = libeth_xdp_queue_threshold(fq->count); 169 + fq->buf_len = xsk_pool_get_rx_frame_size(fq->pool); 170 + 171 + return 0; 172 + } 173 + EXPORT_SYMBOL_GPL(libeth_xskfq_create); 174 + 175 + /** 176 + * libeth_xskfq_destroy - destroy an XSkFQ 177 + * @fq: fill queue to destroy 178 + * 179 + * Zeroes the used fields and frees the FQEs array. 180 + */ 181 + void libeth_xskfq_destroy(struct libeth_xskfq *fq) 182 + { 183 + fq->buf_len = 0; 184 + fq->thresh = 0; 185 + fq->pending = 0; 186 + 187 + kvfree(fq->fqes); 188 + } 189 + EXPORT_SYMBOL_GPL(libeth_xskfq_destroy); 190 + 191 + /* .ndo_xsk_wakeup */ 192 + 193 + static void libeth_xsk_napi_sched(void *info) 194 + { 195 + __napi_schedule_irqoff(info); 196 + } 197 + 198 + /** 199 + * libeth_xsk_init_wakeup - initialize libeth XSk wakeup structure 200 + * @csd: struct to initialize 201 + * @napi: NAPI corresponding to this queue 202 + * 203 + * libeth_xdp uses inter-processor interrupts to perform XSk wakeups. In order 204 + * to do that, the corresponding CSDs must be initialized when creating the 205 + * queues. 206 + */ 207 + void libeth_xsk_init_wakeup(call_single_data_t *csd, struct napi_struct *napi) 208 + { 209 + INIT_CSD(csd, libeth_xsk_napi_sched, napi); 210 + } 211 + EXPORT_SYMBOL_GPL(libeth_xsk_init_wakeup); 212 + 213 + /** 214 + * libeth_xsk_wakeup - perform an XSk wakeup 215 + * @csd: CSD corresponding to the queue 216 + * @qid: the stack queue index 217 + * 218 + * Try to mark the NAPI as missed first, so that it could be rescheduled. 219 + * If it's not, schedule it on the corresponding CPU using IPIs (or directly 220 + * if already running on it). 221 + */ 222 + void libeth_xsk_wakeup(call_single_data_t *csd, u32 qid) 223 + { 224 + struct napi_struct *napi = csd->info; 225 + 226 + if (napi_if_scheduled_mark_missed(napi) || 227 + unlikely(!napi_schedule_prep(napi))) 228 + return; 229 + 230 + if (unlikely(qid >= nr_cpu_ids)) 231 + qid %= nr_cpu_ids; 232 + 233 + if (qid != raw_smp_processor_id() && cpu_online(qid)) 234 + smp_call_function_single_async(qid, csd); 235 + else 236 + __napi_schedule(napi); 237 + } 238 + EXPORT_SYMBOL_GPL(libeth_xsk_wakeup); 239 + 240 + /* Pool setup */ 241 + 242 + #define LIBETH_XSK_DMA_ATTR \ 243 + (DMA_ATTR_WEAK_ORDERING | DMA_ATTR_SKIP_CPU_SYNC) 244 + 245 + /** 246 + * libeth_xsk_setup_pool - setup or destroy an XSk pool for a queue 247 + * @dev: target &net_device 248 + * @qid: stack queue index to configure 249 + * @enable: whether to enable or disable the pool 250 + * 251 + * Check that @qid is valid and then map or unmap the pool. 252 + * 253 + * Return: %0 on success, -errno otherwise. 254 + */ 255 + int libeth_xsk_setup_pool(struct net_device *dev, u32 qid, bool enable) 256 + { 257 + struct xsk_buff_pool *pool; 258 + 259 + pool = xsk_get_pool_from_qid(dev, qid); 260 + if (!pool) 261 + return -EINVAL; 262 + 263 + if (enable) 264 + return xsk_pool_dma_map(pool, dev->dev.parent, 265 + LIBETH_XSK_DMA_ATTR); 266 + else 267 + xsk_pool_dma_unmap(pool, LIBETH_XSK_DMA_ATTR); 268 + 269 + return 0; 270 + } 271 + EXPORT_SYMBOL_GPL(libeth_xsk_setup_pool);
+98
include/net/libeth/xsk.h
··· 584 584 #define LIBETH_XSK_DEFINE_FINALIZE(name, flush, finalize) \ 585 585 __LIBETH_XDP_DEFINE_FINALIZE(name, flush, finalize, xsk) 586 586 587 + /* Refilling */ 588 + 589 + /** 590 + * struct libeth_xskfq - structure representing an XSk buffer (fill) queue 591 + * @fp: hotpath part of the structure 592 + * @pool: &xsk_buff_pool for buffer management 593 + * @fqes: array of XSk buffer pointers 594 + * @descs: opaque pointer to the HW descriptor array 595 + * @ntu: index of the next buffer to poll 596 + * @count: number of descriptors/buffers the queue has 597 + * @pending: current number of XSkFQEs to refill 598 + * @thresh: threshold below which the queue is refilled 599 + * @buf_len: HW-writeable length per each buffer 600 + * @nid: ID of the closest NUMA node with memory 601 + */ 602 + struct libeth_xskfq { 603 + struct_group_tagged(libeth_xskfq_fp, fp, 604 + struct xsk_buff_pool *pool; 605 + struct libeth_xdp_buff **fqes; 606 + void *descs; 607 + 608 + u32 ntu; 609 + u32 count; 610 + ); 611 + 612 + /* Cold fields */ 613 + u32 pending; 614 + u32 thresh; 615 + 616 + u32 buf_len; 617 + int nid; 618 + }; 619 + 620 + int libeth_xskfq_create(struct libeth_xskfq *fq); 621 + void libeth_xskfq_destroy(struct libeth_xskfq *fq); 622 + 623 + /** 624 + * libeth_xsk_buff_xdp_get_dma - get DMA address of XSk &libeth_xdp_buff 625 + * @xdp: buffer to get the DMA addr for 626 + */ 627 + #define libeth_xsk_buff_xdp_get_dma(xdp) \ 628 + xsk_buff_xdp_get_dma(&(xdp)->base) 629 + 630 + /** 631 + * libeth_xskfqe_alloc - allocate @n XSk Rx buffers 632 + * @fq: hotpath part of the XSkFQ, usually onstack 633 + * @n: number of buffers to allocate 634 + * @fill: driver callback to write DMA addresses to HW descriptors 635 + * 636 + * Note that @fq->ntu gets updated, but ::pending must be recalculated 637 + * by the caller. 638 + * 639 + * Return: number of buffers refilled. 640 + */ 641 + static __always_inline u32 642 + libeth_xskfqe_alloc(struct libeth_xskfq_fp *fq, u32 n, 643 + void (*fill)(const struct libeth_xskfq_fp *fq, u32 i)) 644 + { 645 + u32 this, ret, done = 0; 646 + struct xdp_buff **xskb; 647 + 648 + this = fq->count - fq->ntu; 649 + if (likely(this > n)) 650 + this = n; 651 + 652 + again: 653 + xskb = (typeof(xskb))&fq->fqes[fq->ntu]; 654 + ret = xsk_buff_alloc_batch(fq->pool, xskb, this); 655 + 656 + for (u32 i = 0, ntu = fq->ntu; likely(i < ret); i++) 657 + fill(fq, ntu + i); 658 + 659 + done += ret; 660 + fq->ntu += ret; 661 + 662 + if (likely(fq->ntu < fq->count) || unlikely(ret < this)) 663 + goto out; 664 + 665 + fq->ntu = 0; 666 + 667 + if (this < n) { 668 + this = n - this; 669 + goto again; 670 + } 671 + 672 + out: 673 + return done; 674 + } 675 + 676 + /* .ndo_xsk_wakeup */ 677 + 678 + void libeth_xsk_init_wakeup(call_single_data_t *csd, struct napi_struct *napi); 679 + void libeth_xsk_wakeup(call_single_data_t *csd, u32 qid); 680 + 681 + /* Pool setup */ 682 + 683 + int libeth_xsk_setup_pool(struct net_device *dev, u32 qid, bool enable); 684 + 587 685 #endif /* __LIBETH_XSK_H */