Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2
3#include <net/xsk_buff_pool.h>
4#include <net/xdp_sock.h>
5#include <net/xdp_sock_drv.h>
6
7#include "xsk_queue.h"
8#include "xdp_umem.h"
9#include "xsk.h"
10
11void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs)
12{
13 unsigned long flags;
14
15 if (!xs->tx)
16 return;
17
18 spin_lock_irqsave(&pool->xsk_tx_list_lock, flags);
19 list_add_rcu(&xs->tx_list, &pool->xsk_tx_list);
20 spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags);
21}
22
23void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs)
24{
25 unsigned long flags;
26
27 if (!xs->tx)
28 return;
29
30 spin_lock_irqsave(&pool->xsk_tx_list_lock, flags);
31 list_del_rcu(&xs->tx_list);
32 spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags);
33}
34
35void xp_destroy(struct xsk_buff_pool *pool)
36{
37 if (!pool)
38 return;
39
40 kvfree(pool->tx_descs);
41 kvfree(pool->heads);
42 kvfree(pool);
43}
44
45int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs)
46{
47 pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs),
48 GFP_KERNEL);
49 if (!pool->tx_descs)
50 return -ENOMEM;
51
52 return 0;
53}
54
55struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
56 struct xdp_umem *umem)
57{
58 bool unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
59 struct xsk_buff_pool *pool;
60 struct xdp_buff_xsk *xskb;
61 u32 i, entries;
62
63 entries = unaligned ? umem->chunks : 0;
64 pool = kvzalloc(struct_size(pool, free_heads, entries), GFP_KERNEL);
65 if (!pool)
66 goto out;
67
68 pool->heads = kvcalloc(umem->chunks, sizeof(*pool->heads), GFP_KERNEL);
69 if (!pool->heads)
70 goto out;
71
72 if (xs->tx)
73 if (xp_alloc_tx_descs(pool, xs))
74 goto out;
75
76 pool->chunk_mask = ~((u64)umem->chunk_size - 1);
77 pool->addrs_cnt = umem->size;
78 pool->heads_cnt = umem->chunks;
79 pool->free_heads_cnt = umem->chunks;
80 pool->headroom = umem->headroom;
81 pool->chunk_size = umem->chunk_size;
82 pool->chunk_shift = ffs(umem->chunk_size) - 1;
83 pool->unaligned = unaligned;
84 pool->frame_len = umem->chunk_size - umem->headroom -
85 XDP_PACKET_HEADROOM;
86 pool->umem = umem;
87 pool->addrs = umem->addrs;
88 INIT_LIST_HEAD(&pool->free_list);
89 INIT_LIST_HEAD(&pool->xsk_tx_list);
90 spin_lock_init(&pool->xsk_tx_list_lock);
91 spin_lock_init(&pool->cq_lock);
92 refcount_set(&pool->users, 1);
93
94 pool->fq = xs->fq_tmp;
95 pool->cq = xs->cq_tmp;
96
97 for (i = 0; i < pool->free_heads_cnt; i++) {
98 xskb = &pool->heads[i];
99 xskb->pool = pool;
100 xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
101 INIT_LIST_HEAD(&xskb->free_list_node);
102 if (pool->unaligned)
103 pool->free_heads[i] = xskb;
104 else
105 xp_init_xskb_addr(xskb, pool, i * pool->chunk_size);
106 }
107
108 return pool;
109
110out:
111 xp_destroy(pool);
112 return NULL;
113}
114
115void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
116{
117 u32 i;
118
119 for (i = 0; i < pool->heads_cnt; i++)
120 pool->heads[i].xdp.rxq = rxq;
121}
122EXPORT_SYMBOL(xp_set_rxq_info);
123
124static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
125{
126 struct netdev_bpf bpf;
127 int err;
128
129 ASSERT_RTNL();
130
131 if (pool->umem->zc) {
132 bpf.command = XDP_SETUP_XSK_POOL;
133 bpf.xsk.pool = NULL;
134 bpf.xsk.queue_id = pool->queue_id;
135
136 err = pool->netdev->netdev_ops->ndo_bpf(pool->netdev, &bpf);
137
138 if (err)
139 WARN(1, "Failed to disable zero-copy!\n");
140 }
141}
142
143int xp_assign_dev(struct xsk_buff_pool *pool,
144 struct net_device *netdev, u16 queue_id, u16 flags)
145{
146 bool force_zc, force_copy;
147 struct netdev_bpf bpf;
148 int err = 0;
149
150 ASSERT_RTNL();
151
152 force_zc = flags & XDP_ZEROCOPY;
153 force_copy = flags & XDP_COPY;
154
155 if (force_zc && force_copy)
156 return -EINVAL;
157
158 if (xsk_get_pool_from_qid(netdev, queue_id))
159 return -EBUSY;
160
161 pool->netdev = netdev;
162 pool->queue_id = queue_id;
163 err = xsk_reg_pool_at_qid(netdev, pool, queue_id);
164 if (err)
165 return err;
166
167 if (flags & XDP_USE_NEED_WAKEUP)
168 pool->uses_need_wakeup = true;
169 /* Tx needs to be explicitly woken up the first time. Also
170 * for supporting drivers that do not implement this
171 * feature. They will always have to call sendto() or poll().
172 */
173 pool->cached_need_wakeup = XDP_WAKEUP_TX;
174
175 dev_hold(netdev);
176
177 if (force_copy)
178 /* For copy-mode, we are done. */
179 return 0;
180
181 if (!netdev->netdev_ops->ndo_bpf ||
182 !netdev->netdev_ops->ndo_xsk_wakeup) {
183 err = -EOPNOTSUPP;
184 goto err_unreg_pool;
185 }
186
187 bpf.command = XDP_SETUP_XSK_POOL;
188 bpf.xsk.pool = pool;
189 bpf.xsk.queue_id = queue_id;
190
191 err = netdev->netdev_ops->ndo_bpf(netdev, &bpf);
192 if (err)
193 goto err_unreg_pool;
194
195 if (!pool->dma_pages) {
196 WARN(1, "Driver did not DMA map zero-copy buffers");
197 err = -EINVAL;
198 goto err_unreg_xsk;
199 }
200 pool->umem->zc = true;
201 return 0;
202
203err_unreg_xsk:
204 xp_disable_drv_zc(pool);
205err_unreg_pool:
206 if (!force_zc)
207 err = 0; /* fallback to copy mode */
208 if (err) {
209 xsk_clear_pool_at_qid(netdev, queue_id);
210 dev_put(netdev);
211 }
212 return err;
213}
214
215int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
216 struct net_device *dev, u16 queue_id)
217{
218 u16 flags;
219
220 /* One fill and completion ring required for each queue id. */
221 if (!pool->fq || !pool->cq)
222 return -EINVAL;
223
224 flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY;
225 if (pool->uses_need_wakeup)
226 flags |= XDP_USE_NEED_WAKEUP;
227
228 return xp_assign_dev(pool, dev, queue_id, flags);
229}
230
231void xp_clear_dev(struct xsk_buff_pool *pool)
232{
233 if (!pool->netdev)
234 return;
235
236 xp_disable_drv_zc(pool);
237 xsk_clear_pool_at_qid(pool->netdev, pool->queue_id);
238 dev_put(pool->netdev);
239 pool->netdev = NULL;
240}
241
242static void xp_release_deferred(struct work_struct *work)
243{
244 struct xsk_buff_pool *pool = container_of(work, struct xsk_buff_pool,
245 work);
246
247 rtnl_lock();
248 xp_clear_dev(pool);
249 rtnl_unlock();
250
251 if (pool->fq) {
252 xskq_destroy(pool->fq);
253 pool->fq = NULL;
254 }
255
256 if (pool->cq) {
257 xskq_destroy(pool->cq);
258 pool->cq = NULL;
259 }
260
261 xdp_put_umem(pool->umem, false);
262 xp_destroy(pool);
263}
264
265void xp_get_pool(struct xsk_buff_pool *pool)
266{
267 refcount_inc(&pool->users);
268}
269
270bool xp_put_pool(struct xsk_buff_pool *pool)
271{
272 if (!pool)
273 return false;
274
275 if (refcount_dec_and_test(&pool->users)) {
276 INIT_WORK(&pool->work, xp_release_deferred);
277 schedule_work(&pool->work);
278 return true;
279 }
280
281 return false;
282}
283
284static struct xsk_dma_map *xp_find_dma_map(struct xsk_buff_pool *pool)
285{
286 struct xsk_dma_map *dma_map;
287
288 list_for_each_entry(dma_map, &pool->umem->xsk_dma_list, list) {
289 if (dma_map->netdev == pool->netdev)
290 return dma_map;
291 }
292
293 return NULL;
294}
295
296static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_device *netdev,
297 u32 nr_pages, struct xdp_umem *umem)
298{
299 struct xsk_dma_map *dma_map;
300
301 dma_map = kzalloc(sizeof(*dma_map), GFP_KERNEL);
302 if (!dma_map)
303 return NULL;
304
305 dma_map->dma_pages = kvcalloc(nr_pages, sizeof(*dma_map->dma_pages), GFP_KERNEL);
306 if (!dma_map->dma_pages) {
307 kfree(dma_map);
308 return NULL;
309 }
310
311 dma_map->netdev = netdev;
312 dma_map->dev = dev;
313 dma_map->dma_need_sync = false;
314 dma_map->dma_pages_cnt = nr_pages;
315 refcount_set(&dma_map->users, 1);
316 list_add(&dma_map->list, &umem->xsk_dma_list);
317 return dma_map;
318}
319
320static void xp_destroy_dma_map(struct xsk_dma_map *dma_map)
321{
322 list_del(&dma_map->list);
323 kvfree(dma_map->dma_pages);
324 kfree(dma_map);
325}
326
327static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
328{
329 dma_addr_t *dma;
330 u32 i;
331
332 for (i = 0; i < dma_map->dma_pages_cnt; i++) {
333 dma = &dma_map->dma_pages[i];
334 if (*dma) {
335 dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
336 DMA_BIDIRECTIONAL, attrs);
337 *dma = 0;
338 }
339 }
340
341 xp_destroy_dma_map(dma_map);
342}
343
344void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
345{
346 struct xsk_dma_map *dma_map;
347
348 if (pool->dma_pages_cnt == 0)
349 return;
350
351 dma_map = xp_find_dma_map(pool);
352 if (!dma_map) {
353 WARN(1, "Could not find dma_map for device");
354 return;
355 }
356
357 if (!refcount_dec_and_test(&dma_map->users))
358 return;
359
360 __xp_dma_unmap(dma_map, attrs);
361 kvfree(pool->dma_pages);
362 pool->dma_pages_cnt = 0;
363 pool->dev = NULL;
364}
365EXPORT_SYMBOL(xp_dma_unmap);
366
367static void xp_check_dma_contiguity(struct xsk_dma_map *dma_map)
368{
369 u32 i;
370
371 for (i = 0; i < dma_map->dma_pages_cnt - 1; i++) {
372 if (dma_map->dma_pages[i] + PAGE_SIZE == dma_map->dma_pages[i + 1])
373 dma_map->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK;
374 else
375 dma_map->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK;
376 }
377}
378
379static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_map)
380{
381 pool->dma_pages = kvcalloc(dma_map->dma_pages_cnt, sizeof(*pool->dma_pages), GFP_KERNEL);
382 if (!pool->dma_pages)
383 return -ENOMEM;
384
385 pool->dev = dma_map->dev;
386 pool->dma_pages_cnt = dma_map->dma_pages_cnt;
387 pool->dma_need_sync = dma_map->dma_need_sync;
388 memcpy(pool->dma_pages, dma_map->dma_pages,
389 pool->dma_pages_cnt * sizeof(*pool->dma_pages));
390
391 return 0;
392}
393
394int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
395 unsigned long attrs, struct page **pages, u32 nr_pages)
396{
397 struct xsk_dma_map *dma_map;
398 dma_addr_t dma;
399 int err;
400 u32 i;
401
402 dma_map = xp_find_dma_map(pool);
403 if (dma_map) {
404 err = xp_init_dma_info(pool, dma_map);
405 if (err)
406 return err;
407
408 refcount_inc(&dma_map->users);
409 return 0;
410 }
411
412 dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem);
413 if (!dma_map)
414 return -ENOMEM;
415
416 for (i = 0; i < dma_map->dma_pages_cnt; i++) {
417 dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
418 DMA_BIDIRECTIONAL, attrs);
419 if (dma_mapping_error(dev, dma)) {
420 __xp_dma_unmap(dma_map, attrs);
421 return -ENOMEM;
422 }
423 if (dma_need_sync(dev, dma))
424 dma_map->dma_need_sync = true;
425 dma_map->dma_pages[i] = dma;
426 }
427
428 if (pool->unaligned)
429 xp_check_dma_contiguity(dma_map);
430 else
431 for (i = 0; i < pool->heads_cnt; i++) {
432 struct xdp_buff_xsk *xskb = &pool->heads[i];
433
434 xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
435 }
436
437 err = xp_init_dma_info(pool, dma_map);
438 if (err) {
439 __xp_dma_unmap(dma_map, attrs);
440 return err;
441 }
442
443 return 0;
444}
445EXPORT_SYMBOL(xp_dma_map);
446
447static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool *pool,
448 u64 addr)
449{
450 return xp_desc_crosses_non_contig_pg(pool, addr, pool->chunk_size);
451}
452
453static bool xp_check_unaligned(struct xsk_buff_pool *pool, u64 *addr)
454{
455 *addr = xp_unaligned_extract_addr(*addr);
456 if (*addr >= pool->addrs_cnt ||
457 *addr + pool->chunk_size > pool->addrs_cnt ||
458 xp_addr_crosses_non_contig_pg(pool, *addr))
459 return false;
460 return true;
461}
462
463static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr)
464{
465 *addr = xp_aligned_extract_addr(pool, *addr);
466 return *addr < pool->addrs_cnt;
467}
468
469static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
470{
471 struct xdp_buff_xsk *xskb;
472 u64 addr;
473 bool ok;
474
475 if (pool->free_heads_cnt == 0)
476 return NULL;
477
478 for (;;) {
479 if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
480 pool->fq->queue_empty_descs++;
481 return NULL;
482 }
483
484 ok = pool->unaligned ? xp_check_unaligned(pool, &addr) :
485 xp_check_aligned(pool, &addr);
486 if (!ok) {
487 pool->fq->invalid_descs++;
488 xskq_cons_release(pool->fq);
489 continue;
490 }
491 break;
492 }
493
494 if (pool->unaligned) {
495 xskb = pool->free_heads[--pool->free_heads_cnt];
496 xp_init_xskb_addr(xskb, pool, addr);
497 if (pool->dma_pages_cnt)
498 xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
499 } else {
500 xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
501 }
502
503 xskq_cons_release(pool->fq);
504 return xskb;
505}
506
507struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
508{
509 struct xdp_buff_xsk *xskb;
510
511 if (!pool->free_list_cnt) {
512 xskb = __xp_alloc(pool);
513 if (!xskb)
514 return NULL;
515 } else {
516 pool->free_list_cnt--;
517 xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
518 free_list_node);
519 list_del_init(&xskb->free_list_node);
520 }
521
522 xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
523 xskb->xdp.data_meta = xskb->xdp.data;
524
525 if (pool->dma_need_sync) {
526 dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
527 pool->frame_len,
528 DMA_BIDIRECTIONAL);
529 }
530 return &xskb->xdp;
531}
532EXPORT_SYMBOL(xp_alloc);
533
534static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
535{
536 u32 i, cached_cons, nb_entries;
537
538 if (max > pool->free_heads_cnt)
539 max = pool->free_heads_cnt;
540 max = xskq_cons_nb_entries(pool->fq, max);
541
542 cached_cons = pool->fq->cached_cons;
543 nb_entries = max;
544 i = max;
545 while (i--) {
546 struct xdp_buff_xsk *xskb;
547 u64 addr;
548 bool ok;
549
550 __xskq_cons_read_addr_unchecked(pool->fq, cached_cons++, &addr);
551
552 ok = pool->unaligned ? xp_check_unaligned(pool, &addr) :
553 xp_check_aligned(pool, &addr);
554 if (unlikely(!ok)) {
555 pool->fq->invalid_descs++;
556 nb_entries--;
557 continue;
558 }
559
560 if (pool->unaligned) {
561 xskb = pool->free_heads[--pool->free_heads_cnt];
562 xp_init_xskb_addr(xskb, pool, addr);
563 if (pool->dma_pages_cnt)
564 xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
565 } else {
566 xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
567 }
568
569 *xdp = &xskb->xdp;
570 xdp++;
571 }
572
573 xskq_cons_release_n(pool->fq, max);
574 return nb_entries;
575}
576
577static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 nb_entries)
578{
579 struct xdp_buff_xsk *xskb;
580 u32 i;
581
582 nb_entries = min_t(u32, nb_entries, pool->free_list_cnt);
583
584 i = nb_entries;
585 while (i--) {
586 xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
587 list_del_init(&xskb->free_list_node);
588
589 *xdp = &xskb->xdp;
590 xdp++;
591 }
592 pool->free_list_cnt -= nb_entries;
593
594 return nb_entries;
595}
596
597u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
598{
599 u32 nb_entries1 = 0, nb_entries2;
600
601 if (unlikely(pool->dma_need_sync)) {
602 struct xdp_buff *buff;
603
604 /* Slow path */
605 buff = xp_alloc(pool);
606 if (buff)
607 *xdp = buff;
608 return !!buff;
609 }
610
611 if (unlikely(pool->free_list_cnt)) {
612 nb_entries1 = xp_alloc_reused(pool, xdp, max);
613 if (nb_entries1 == max)
614 return nb_entries1;
615
616 max -= nb_entries1;
617 xdp += nb_entries1;
618 }
619
620 nb_entries2 = xp_alloc_new_from_fq(pool, xdp, max);
621 if (!nb_entries2)
622 pool->fq->queue_empty_descs++;
623
624 return nb_entries1 + nb_entries2;
625}
626EXPORT_SYMBOL(xp_alloc_batch);
627
628bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
629{
630 if (pool->free_list_cnt >= count)
631 return true;
632 return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
633}
634EXPORT_SYMBOL(xp_can_alloc);
635
636void xp_free(struct xdp_buff_xsk *xskb)
637{
638 if (!list_empty(&xskb->free_list_node))
639 return;
640
641 xskb->pool->free_list_cnt++;
642 list_add(&xskb->free_list_node, &xskb->pool->free_list);
643}
644EXPORT_SYMBOL(xp_free);
645
646void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
647{
648 addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
649 return pool->addrs + addr;
650}
651EXPORT_SYMBOL(xp_raw_get_data);
652
653dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
654{
655 addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
656 return (pool->dma_pages[addr >> PAGE_SHIFT] &
657 ~XSK_NEXT_PG_CONTIG_MASK) +
658 (addr & ~PAGE_MASK);
659}
660EXPORT_SYMBOL(xp_raw_get_dma);
661
662void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
663{
664 dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
665 xskb->pool->frame_len, DMA_BIDIRECTIONAL);
666}
667EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
668
669void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
670 size_t size)
671{
672 dma_sync_single_range_for_device(pool->dev, dma, 0,
673 size, DMA_BIDIRECTIONAL);
674}
675EXPORT_SYMBOL(xp_dma_sync_for_device_slow);