Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Devmem TCP
4 *
5 * Authors: Mina Almasry <almasrymina@google.com>
6 * Willem de Bruijn <willemdebruijn.kernel@gmail.com>
7 * Kaiyuan Zhang <kaiyuanz@google.com
8 */
9
10#include <linux/dma-buf.h>
11#include <linux/genalloc.h>
12#include <linux/mm.h>
13#include <linux/netdevice.h>
14#include <linux/types.h>
15#include <net/netdev_queues.h>
16#include <net/netdev_rx_queue.h>
17#include <net/page_pool/helpers.h>
18#include <net/page_pool/memory_provider.h>
19#include <net/sock.h>
20#include <trace/events/page_pool.h>
21
22#include "devmem.h"
23#include "mp_dmabuf_devmem.h"
24#include "page_pool_priv.h"
25
26/* Device memory support */
27
28static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
29
30static const struct memory_provider_ops dmabuf_devmem_ops;
31
32bool net_is_devmem_iov(struct net_iov *niov)
33{
34 return niov->type == NET_IOV_DMABUF;
35}
36
37static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
38 struct gen_pool_chunk *chunk,
39 void *not_used)
40{
41 struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
42
43 kvfree(owner->area.niovs);
44 kfree(owner);
45}
46
47static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
48{
49 struct dmabuf_genpool_chunk_owner *owner;
50
51 owner = net_devmem_iov_to_chunk_owner(niov);
52 return owner->base_dma_addr +
53 ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
54}
55
56void __net_devmem_dmabuf_binding_free(struct work_struct *wq)
57{
58 struct net_devmem_dmabuf_binding *binding = container_of(wq, typeof(*binding), unbind_w);
59
60 size_t size, avail;
61
62 gen_pool_for_each_chunk(binding->chunk_pool,
63 net_devmem_dmabuf_free_chunk_owner, NULL);
64
65 size = gen_pool_size(binding->chunk_pool);
66 avail = gen_pool_avail(binding->chunk_pool);
67
68 if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu",
69 size, avail))
70 gen_pool_destroy(binding->chunk_pool);
71
72 dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
73 binding->direction);
74 dma_buf_detach(binding->dmabuf, binding->attachment);
75 dma_buf_put(binding->dmabuf);
76 xa_destroy(&binding->bound_rxqs);
77 kvfree(binding->tx_vec);
78 kfree(binding);
79}
80
81struct net_iov *
82net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
83{
84 struct dmabuf_genpool_chunk_owner *owner;
85 unsigned long dma_addr;
86 struct net_iov *niov;
87 ssize_t offset;
88 ssize_t index;
89
90 dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE,
91 (void **)&owner);
92 if (!dma_addr)
93 return NULL;
94
95 offset = dma_addr - owner->base_dma_addr;
96 index = offset / PAGE_SIZE;
97 niov = &owner->area.niovs[index];
98
99 niov->pp_magic = 0;
100 niov->pp = NULL;
101 atomic_long_set(&niov->pp_ref_count, 0);
102
103 return niov;
104}
105
106void net_devmem_free_dmabuf(struct net_iov *niov)
107{
108 struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov);
109 unsigned long dma_addr = net_devmem_get_dma_addr(niov);
110
111 if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr,
112 PAGE_SIZE)))
113 return;
114
115 gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE);
116}
117
118void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
119{
120 struct netdev_rx_queue *rxq;
121 unsigned long xa_idx;
122 unsigned int rxq_idx;
123
124 xa_erase(&net_devmem_dmabuf_bindings, binding->id);
125
126 /* Ensure no tx net_devmem_lookup_dmabuf() are in flight after the
127 * erase.
128 */
129 synchronize_net();
130
131 if (binding->list.next)
132 list_del(&binding->list);
133
134 xa_for_each(&binding->bound_rxqs, xa_idx, rxq) {
135 const struct pp_memory_provider_params mp_params = {
136 .mp_priv = binding,
137 .mp_ops = &dmabuf_devmem_ops,
138 };
139
140 rxq_idx = get_netdev_rx_queue_index(rxq);
141
142 __net_mp_close_rxq(binding->dev, rxq_idx, &mp_params);
143 }
144
145 net_devmem_dmabuf_binding_put(binding);
146}
147
148int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
149 struct net_devmem_dmabuf_binding *binding,
150 struct netlink_ext_ack *extack)
151{
152 struct pp_memory_provider_params mp_params = {
153 .mp_priv = binding,
154 .mp_ops = &dmabuf_devmem_ops,
155 };
156 struct netdev_rx_queue *rxq;
157 u32 xa_idx;
158 int err;
159
160 err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack);
161 if (err)
162 return err;
163
164 rxq = __netif_get_rx_queue(dev, rxq_idx);
165 err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
166 GFP_KERNEL);
167 if (err)
168 goto err_close_rxq;
169
170 return 0;
171
172err_close_rxq:
173 __net_mp_close_rxq(dev, rxq_idx, &mp_params);
174 return err;
175}
176
177struct net_devmem_dmabuf_binding *
178net_devmem_bind_dmabuf(struct net_device *dev,
179 struct device *dma_dev,
180 enum dma_data_direction direction,
181 unsigned int dmabuf_fd, struct netdev_nl_sock *priv,
182 struct netlink_ext_ack *extack)
183{
184 struct net_devmem_dmabuf_binding *binding;
185 static u32 id_alloc_next;
186 struct scatterlist *sg;
187 struct dma_buf *dmabuf;
188 unsigned int sg_idx, i;
189 unsigned long virtual;
190 int err;
191
192 if (!dma_dev) {
193 NL_SET_ERR_MSG(extack, "Device doesn't support DMA");
194 return ERR_PTR(-EOPNOTSUPP);
195 }
196
197 dmabuf = dma_buf_get(dmabuf_fd);
198 if (IS_ERR(dmabuf))
199 return ERR_CAST(dmabuf);
200
201 binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
202 dev_to_node(&dev->dev));
203 if (!binding) {
204 err = -ENOMEM;
205 goto err_put_dmabuf;
206 }
207
208 binding->dev = dev;
209 xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC);
210
211 refcount_set(&binding->ref, 1);
212
213 mutex_init(&binding->lock);
214
215 binding->dmabuf = dmabuf;
216 binding->direction = direction;
217
218 binding->attachment = dma_buf_attach(binding->dmabuf, dma_dev);
219 if (IS_ERR(binding->attachment)) {
220 err = PTR_ERR(binding->attachment);
221 NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device");
222 goto err_free_binding;
223 }
224
225 binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment,
226 direction);
227 if (IS_ERR(binding->sgt)) {
228 err = PTR_ERR(binding->sgt);
229 NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment");
230 goto err_detach;
231 }
232
233 if (direction == DMA_TO_DEVICE) {
234 binding->tx_vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
235 sizeof(struct net_iov *),
236 GFP_KERNEL);
237 if (!binding->tx_vec) {
238 err = -ENOMEM;
239 goto err_unmap;
240 }
241 }
242
243 /* For simplicity we expect to make PAGE_SIZE allocations, but the
244 * binding can be much more flexible than that. We may be able to
245 * allocate MTU sized chunks here. Leave that for future work...
246 */
247 binding->chunk_pool = gen_pool_create(PAGE_SHIFT,
248 dev_to_node(&dev->dev));
249 if (!binding->chunk_pool) {
250 err = -ENOMEM;
251 goto err_tx_vec;
252 }
253
254 virtual = 0;
255 for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) {
256 dma_addr_t dma_addr = sg_dma_address(sg);
257 struct dmabuf_genpool_chunk_owner *owner;
258 size_t len = sg_dma_len(sg);
259 struct net_iov *niov;
260
261 owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
262 dev_to_node(&dev->dev));
263 if (!owner) {
264 err = -ENOMEM;
265 goto err_free_chunks;
266 }
267
268 owner->area.base_virtual = virtual;
269 owner->base_dma_addr = dma_addr;
270 owner->area.num_niovs = len / PAGE_SIZE;
271 owner->binding = binding;
272
273 err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
274 dma_addr, len, dev_to_node(&dev->dev),
275 owner);
276 if (err) {
277 kfree(owner);
278 err = -EINVAL;
279 goto err_free_chunks;
280 }
281
282 owner->area.niovs = kvmalloc_array(owner->area.num_niovs,
283 sizeof(*owner->area.niovs),
284 GFP_KERNEL);
285 if (!owner->area.niovs) {
286 err = -ENOMEM;
287 goto err_free_chunks;
288 }
289
290 for (i = 0; i < owner->area.num_niovs; i++) {
291 niov = &owner->area.niovs[i];
292 niov->type = NET_IOV_DMABUF;
293 niov->owner = &owner->area;
294 page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
295 net_devmem_get_dma_addr(niov));
296 if (direction == DMA_TO_DEVICE)
297 binding->tx_vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
298 }
299
300 virtual += len;
301 }
302
303 err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
304 binding, xa_limit_32b, &id_alloc_next,
305 GFP_KERNEL);
306 if (err < 0)
307 goto err_free_chunks;
308
309 list_add(&binding->list, &priv->bindings);
310
311 return binding;
312
313err_free_chunks:
314 gen_pool_for_each_chunk(binding->chunk_pool,
315 net_devmem_dmabuf_free_chunk_owner, NULL);
316 gen_pool_destroy(binding->chunk_pool);
317err_tx_vec:
318 kvfree(binding->tx_vec);
319err_unmap:
320 dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
321 direction);
322err_detach:
323 dma_buf_detach(dmabuf, binding->attachment);
324err_free_binding:
325 kfree(binding);
326err_put_dmabuf:
327 dma_buf_put(dmabuf);
328 return ERR_PTR(err);
329}
330
331struct net_devmem_dmabuf_binding *net_devmem_lookup_dmabuf(u32 id)
332{
333 struct net_devmem_dmabuf_binding *binding;
334
335 rcu_read_lock();
336 binding = xa_load(&net_devmem_dmabuf_bindings, id);
337 if (binding) {
338 if (!net_devmem_dmabuf_binding_get(binding))
339 binding = NULL;
340 }
341 rcu_read_unlock();
342
343 return binding;
344}
345
346void net_devmem_get_net_iov(struct net_iov *niov)
347{
348 net_devmem_dmabuf_binding_get(net_devmem_iov_binding(niov));
349}
350
351void net_devmem_put_net_iov(struct net_iov *niov)
352{
353 net_devmem_dmabuf_binding_put(net_devmem_iov_binding(niov));
354}
355
356struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk,
357 unsigned int dmabuf_id)
358{
359 struct net_devmem_dmabuf_binding *binding;
360 struct dst_entry *dst = __sk_dst_get(sk);
361 int err = 0;
362
363 binding = net_devmem_lookup_dmabuf(dmabuf_id);
364 if (!binding || !binding->tx_vec) {
365 err = -EINVAL;
366 goto out_err;
367 }
368
369 /* The dma-addrs in this binding are only reachable to the corresponding
370 * net_device.
371 */
372 if (!dst || !dst->dev || dst->dev->ifindex != binding->dev->ifindex) {
373 err = -ENODEV;
374 goto out_err;
375 }
376
377 return binding;
378
379out_err:
380 if (binding)
381 net_devmem_dmabuf_binding_put(binding);
382
383 return ERR_PTR(err);
384}
385
386struct net_iov *
387net_devmem_get_niov_at(struct net_devmem_dmabuf_binding *binding,
388 size_t virt_addr, size_t *off, size_t *size)
389{
390 if (virt_addr >= binding->dmabuf->size)
391 return NULL;
392
393 *off = virt_addr % PAGE_SIZE;
394 *size = PAGE_SIZE - *off;
395
396 return binding->tx_vec[virt_addr / PAGE_SIZE];
397}
398
399/*** "Dmabuf devmem memory provider" ***/
400
401int mp_dmabuf_devmem_init(struct page_pool *pool)
402{
403 struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
404
405 if (!binding)
406 return -EINVAL;
407
408 /* dma-buf dma addresses do not need and should not be used with
409 * dma_sync_for_cpu/device. Force disable dma_sync.
410 */
411 pool->dma_sync = false;
412 pool->dma_sync_for_cpu = false;
413
414 if (pool->p.order != 0)
415 return -E2BIG;
416
417 net_devmem_dmabuf_binding_get(binding);
418 return 0;
419}
420
421netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp)
422{
423 struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
424 struct net_iov *niov;
425 netmem_ref netmem;
426
427 niov = net_devmem_alloc_dmabuf(binding);
428 if (!niov)
429 return 0;
430
431 netmem = net_iov_to_netmem(niov);
432
433 page_pool_set_pp_info(pool, netmem);
434
435 pool->pages_state_hold_cnt++;
436 trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt);
437 return netmem;
438}
439
440void mp_dmabuf_devmem_destroy(struct page_pool *pool)
441{
442 struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
443
444 net_devmem_dmabuf_binding_put(binding);
445}
446
447bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
448{
449 long refcount = atomic_long_read(netmem_get_pp_ref_count_ref(netmem));
450
451 if (WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
452 return false;
453
454 if (WARN_ON_ONCE(refcount != 1))
455 return false;
456
457 page_pool_clear_pp_info(netmem);
458
459 net_devmem_free_dmabuf(netmem_to_net_iov(netmem));
460
461 /* We don't want the page pool put_page()ing our net_iovs. */
462 return false;
463}
464
465static int mp_dmabuf_devmem_nl_fill(void *mp_priv, struct sk_buff *rsp,
466 struct netdev_rx_queue *rxq)
467{
468 const struct net_devmem_dmabuf_binding *binding = mp_priv;
469 int type = rxq ? NETDEV_A_QUEUE_DMABUF : NETDEV_A_PAGE_POOL_DMABUF;
470
471 return nla_put_u32(rsp, type, binding->id);
472}
473
474static void mp_dmabuf_devmem_uninstall(void *mp_priv,
475 struct netdev_rx_queue *rxq)
476{
477 struct net_devmem_dmabuf_binding *binding = mp_priv;
478 struct netdev_rx_queue *bound_rxq;
479 unsigned long xa_idx;
480
481 xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) {
482 if (bound_rxq == rxq) {
483 xa_erase(&binding->bound_rxqs, xa_idx);
484 if (xa_empty(&binding->bound_rxqs)) {
485 mutex_lock(&binding->lock);
486 binding->dev = NULL;
487 mutex_unlock(&binding->lock);
488 }
489 break;
490 }
491 }
492}
493
494static const struct memory_provider_ops dmabuf_devmem_ops = {
495 .init = mp_dmabuf_devmem_init,
496 .destroy = mp_dmabuf_devmem_destroy,
497 .alloc_netmems = mp_dmabuf_devmem_alloc_netmems,
498 .release_netmem = mp_dmabuf_devmem_release_page,
499 .nl_fill = mp_dmabuf_devmem_nl_fill,
500 .uninstall = mp_dmabuf_devmem_uninstall,
501};