Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2020 Mellanox Technologies Ltd. */
3
4#include <linux/vhost_types.h>
5#include <linux/vdpa.h>
6#include <linux/gcd.h>
7#include <linux/string.h>
8#include <linux/mlx5/qp.h>
9#include "mlx5_vdpa.h"
10
11/* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */
12#define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
13({ \
14 u64 __s = _s; \
15 u64 _res; \
16 _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
17 _res; \
18})
19
20static int get_octo_len(u64 len, int page_shift)
21{
22 u64 page_size = 1ULL << page_shift;
23 int npages;
24
25 npages = ALIGN(len, page_size) >> page_shift;
26 return (npages + 1) / 2;
27}
28
29static void mlx5_set_access_mode(void *mkc, int mode)
30{
31 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
32 MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
33}
34
35static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
36{
37 struct scatterlist *sg;
38 int nsg = mr->nsg;
39 u64 dma_addr;
40 u64 dma_len;
41 int j = 0;
42 int i;
43
44 for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) {
45 for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg);
46 nsg && dma_len;
47 nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size))
48 mtt[j++] = cpu_to_be64(dma_addr);
49 }
50}
51
52struct mlx5_create_mkey_mem {
53 u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)];
54 u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
55 __be64 mtt[];
56};
57
58struct mlx5_destroy_mkey_mem {
59 u8 out[MLX5_ST_SZ_BYTES(destroy_mkey_out)];
60 u8 in[MLX5_ST_SZ_BYTES(destroy_mkey_in)];
61};
62
63static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev,
64 struct mlx5_vdpa_direct_mr *mr,
65 struct mlx5_create_mkey_mem *mem)
66{
67 void *in = &mem->in;
68 void *mkc;
69
70 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
71 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
72 MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
73 MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
74 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
75 MLX5_SET(mkc, mkc, qpn, 0xffffff);
76 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
77 MLX5_SET64(mkc, mkc, start_addr, mr->offset);
78 MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
79 MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
80 MLX5_SET(mkc, mkc, translations_octword_size,
81 get_octo_len(mr->end - mr->start, mr->log_size));
82 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
83 get_octo_len(mr->end - mr->start, mr->log_size));
84 populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
85
86 MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
87 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
88}
89
90static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev,
91 struct mlx5_vdpa_direct_mr *mr,
92 struct mlx5_create_mkey_mem *mem)
93{
94 u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index);
95
96 mr->mr = mlx5_idx_to_mkey(mkey_index);
97}
98
99static void fill_destroy_direct_mr(struct mlx5_vdpa_dev *mvdev,
100 struct mlx5_vdpa_direct_mr *mr,
101 struct mlx5_destroy_mkey_mem *mem)
102{
103 void *in = &mem->in;
104
105 MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid);
106 MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
107 MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mr->mr));
108}
109
110static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
111{
112 if (!mr->mr)
113 return;
114
115 mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
116}
117
118static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
119{
120 return max_t(u64, map->start, mr->start);
121}
122
123static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
124{
125 return min_t(u64, map->last + 1, mr->end);
126}
127
128static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
129{
130 return map_end(map, mr) - map_start(map, mr);
131}
132
133#define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
134#define MLX5_VDPA_INVALID_LEN ((u64)-1)
135
136static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
137{
138 struct mlx5_vdpa_direct_mr *s;
139
140 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
141 if (!s)
142 return MLX5_VDPA_INVALID_START_ADDR;
143
144 return s->start;
145}
146
147static u64 indir_len(struct mlx5_vdpa_mr *mkey)
148{
149 struct mlx5_vdpa_direct_mr *s;
150 struct mlx5_vdpa_direct_mr *e;
151
152 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
153 if (!s)
154 return MLX5_VDPA_INVALID_LEN;
155
156 e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
157
158 return e->end - s->start;
159}
160
161#define LOG_MAX_KLM_SIZE 30
162#define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
163
164static u32 klm_bcount(u64 size)
165{
166 return (u32)size;
167}
168
169static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
170{
171 struct mlx5_vdpa_direct_mr *dmr;
172 struct mlx5_klm *klmarr;
173 struct mlx5_klm *klm;
174 bool first = true;
175 u64 preve;
176 int i;
177
178 klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
179 i = 0;
180 list_for_each_entry(dmr, &mkey->head, list) {
181again:
182 klm = &klmarr[i++];
183 if (first) {
184 preve = dmr->start;
185 first = false;
186 }
187
188 if (preve == dmr->start) {
189 klm->key = cpu_to_be32(dmr->mr);
190 klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
191 preve = dmr->end;
192 } else {
193 klm->key = cpu_to_be32(mvdev->res.null_mkey);
194 klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
195 preve = dmr->start;
196 goto again;
197 }
198 }
199}
200
201static int klm_byte_size(int nklms)
202{
203 return 16 * ALIGN(nklms, 4);
204}
205
206#define MLX5_VDPA_MTT_ALIGN 16
207
208static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
209{
210 struct mlx5_vdpa_async_cmd *cmds;
211 struct mlx5_vdpa_direct_mr *dmr;
212 int err = 0;
213 int i = 0;
214
215 cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
216 if (!cmds)
217 return -ENOMEM;
218
219 list_for_each_entry(dmr, &mr->head, list) {
220 struct mlx5_create_mkey_mem *cmd_mem;
221 int mttlen, mttcount;
222
223 mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN);
224 mttcount = mttlen / sizeof(cmd_mem->mtt[0]);
225 cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL);
226 if (!cmd_mem) {
227 err = -ENOMEM;
228 goto done;
229 }
230
231 cmds[i].out = cmd_mem->out;
232 cmds[i].outlen = sizeof(cmd_mem->out);
233 cmds[i].in = cmd_mem->in;
234 cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount);
235
236 fill_create_direct_mr(mvdev, dmr, cmd_mem);
237
238 i++;
239 }
240
241 err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
242 if (err) {
243
244 mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err);
245 goto done;
246 }
247
248 i = 0;
249 list_for_each_entry(dmr, &mr->head, list) {
250 struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
251 struct mlx5_create_mkey_mem *cmd_mem;
252
253 cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out);
254
255 if (!cmd->err) {
256 create_direct_mr_end(mvdev, dmr, cmd_mem);
257 } else {
258 err = err ? err : cmd->err;
259 mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n",
260 dmr->start, dmr->end, cmd->err);
261 }
262 }
263
264done:
265 for (i = i-1; i >= 0; i--) {
266 struct mlx5_create_mkey_mem *cmd_mem;
267
268 cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out);
269 kvfree(cmd_mem);
270 }
271
272 kvfree(cmds);
273 return err;
274}
275
276DEFINE_FREE(free_cmds, struct mlx5_vdpa_async_cmd *, kvfree(_T))
277DEFINE_FREE(free_cmd_mem, struct mlx5_destroy_mkey_mem *, kvfree(_T))
278
279static int destroy_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
280{
281 struct mlx5_destroy_mkey_mem *cmd_mem __free(free_cmd_mem) = NULL;
282 struct mlx5_vdpa_async_cmd *cmds __free(free_cmds) = NULL;
283 struct mlx5_vdpa_direct_mr *dmr;
284 int err = 0;
285 int i = 0;
286
287 cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
288 cmd_mem = kvcalloc(mr->num_directs, sizeof(*cmd_mem), GFP_KERNEL);
289 if (!cmds || !cmd_mem)
290 return -ENOMEM;
291
292 list_for_each_entry(dmr, &mr->head, list) {
293 cmds[i].out = cmd_mem[i].out;
294 cmds[i].outlen = sizeof(cmd_mem[i].out);
295 cmds[i].in = cmd_mem[i].in;
296 cmds[i].inlen = sizeof(cmd_mem[i].in);
297 fill_destroy_direct_mr(mvdev, dmr, &cmd_mem[i]);
298 i++;
299 }
300
301 err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
302 if (err) {
303
304 mlx5_vdpa_err(mvdev, "error issuing MTT mkey deletion for direct mrs: %d\n", err);
305 return err;
306 }
307
308 i = 0;
309 list_for_each_entry(dmr, &mr->head, list) {
310 struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
311
312 dmr->mr = 0;
313 if (cmd->err) {
314 err = err ? err : cmd->err;
315 mlx5_vdpa_err(mvdev, "error deleting MTT mkey [0x%llx, 0x%llx]: %d\n",
316 dmr->start, dmr->end, cmd->err);
317 }
318 }
319
320 return err;
321}
322
323static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
324{
325 int inlen;
326 void *mkc;
327 void *in;
328 int err;
329 u64 start;
330 u64 len;
331
332 start = indir_start_addr(mr);
333 len = indir_len(mr);
334 if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
335 return -EINVAL;
336
337 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
338 in = kzalloc(inlen, GFP_KERNEL);
339 if (!in)
340 return -ENOMEM;
341
342 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
343 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
344 MLX5_SET(mkc, mkc, lw, 1);
345 MLX5_SET(mkc, mkc, lr, 1);
346 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
347 MLX5_SET(mkc, mkc, qpn, 0xffffff);
348 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
349 MLX5_SET64(mkc, mkc, start_addr, start);
350 MLX5_SET64(mkc, mkc, len, len);
351 MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
352 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
353 fill_indir(mvdev, mr, in);
354 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
355 kfree(in);
356 return err;
357}
358
359static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
360{
361 mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey);
362}
363
364static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
365 struct vhost_iotlb *iotlb)
366{
367 struct vhost_iotlb_map *map;
368 unsigned long lgcd = 0;
369 int log_entity_size;
370 unsigned long size;
371 u64 start = 0;
372 int err;
373 struct page *pg;
374 unsigned int nsg;
375 int sglen;
376 u64 pa, offset;
377 u64 paend;
378 struct scatterlist *sg;
379 struct device *dma = mvdev->vdev.dma_dev;
380
381 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
382 map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
383 size = maplen(map, mr);
384 lgcd = gcd(lgcd, size);
385 start += size;
386 }
387 log_entity_size = ilog2(lgcd);
388
389 sglen = 1 << log_entity_size;
390 nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
391
392 err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
393 if (err)
394 return err;
395
396 sg = mr->sg_head.sgl;
397 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
398 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
399 offset = mr->start > map->start ? mr->start - map->start : 0;
400 pa = map->addr + offset;
401 paend = map->addr + offset + maplen(map, mr);
402 for (; pa < paend; pa += sglen) {
403 pg = pfn_to_page(__phys_to_pfn(pa));
404 if (!sg) {
405 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
406 map->start, map->last + 1);
407 err = -ENOMEM;
408 goto err_map;
409 }
410 sg_set_page(sg, pg, sglen, 0);
411 sg = sg_next(sg);
412 if (!sg)
413 goto done;
414 }
415 }
416done:
417 mr->log_size = log_entity_size;
418 mr->nsg = nsg;
419 mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
420 if (!mr->nent) {
421 err = -ENOMEM;
422 goto err_map;
423 }
424
425 return 0;
426
427err_map:
428 sg_free_table(&mr->sg_head);
429 return err;
430}
431
432static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
433{
434 struct device *dma = mvdev->vdev.dma_dev;
435
436 destroy_direct_mr(mvdev, mr);
437 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
438 sg_free_table(&mr->sg_head);
439}
440
441static int add_direct_chain(struct mlx5_vdpa_dev *mvdev,
442 struct mlx5_vdpa_mr *mr,
443 u64 start,
444 u64 size,
445 u8 perm,
446 struct vhost_iotlb *iotlb)
447{
448 struct mlx5_vdpa_direct_mr *dmr;
449 struct mlx5_vdpa_direct_mr *n;
450 LIST_HEAD(tmp);
451 u64 st;
452 u64 sz;
453 int err;
454
455 st = start;
456 while (size) {
457 sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
458 dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
459 if (!dmr) {
460 err = -ENOMEM;
461 goto err_alloc;
462 }
463
464 dmr->start = st;
465 dmr->end = st + sz;
466 dmr->perm = perm;
467 err = map_direct_mr(mvdev, dmr, iotlb);
468 if (err) {
469 kfree(dmr);
470 goto err_alloc;
471 }
472
473 list_add_tail(&dmr->list, &tmp);
474 size -= sz;
475 mr->num_directs++;
476 mr->num_klms++;
477 st += sz;
478 }
479 list_splice_tail(&tmp, &mr->head);
480 return 0;
481
482err_alloc:
483 list_for_each_entry_safe(dmr, n, &mr->head, list) {
484 list_del_init(&dmr->list);
485 unmap_direct_mr(mvdev, dmr);
486 kfree(dmr);
487 }
488 return err;
489}
490
491/* The iotlb pointer contains a list of maps. Go over the maps, possibly
492 * merging mergeable maps, and create direct memory keys that provide the
493 * device access to memory. The direct mkeys are then referred to by the
494 * indirect memory key that provides access to the enitre address space given
495 * by iotlb.
496 */
497static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
498 struct mlx5_vdpa_mr *mr,
499 struct vhost_iotlb *iotlb)
500{
501 struct mlx5_vdpa_direct_mr *dmr;
502 struct mlx5_vdpa_direct_mr *n;
503 struct vhost_iotlb_map *map;
504 u32 pperm = U16_MAX;
505 u64 last = U64_MAX;
506 u64 ps = U64_MAX;
507 u64 pe = U64_MAX;
508 u64 start = 0;
509 int err = 0;
510 int nnuls;
511
512 INIT_LIST_HEAD(&mr->head);
513 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
514 map = vhost_iotlb_itree_next(map, start, last)) {
515 start = map->start;
516 if (pe == map->start && pperm == map->perm) {
517 pe = map->last + 1;
518 } else {
519 if (ps != U64_MAX) {
520 if (pe < map->start) {
521 /* We have a hole in the map. Check how
522 * many null keys are required to fill it.
523 */
524 nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
525 LOG_MAX_KLM_SIZE);
526 mr->num_klms += nnuls;
527 }
528 err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
529 if (err)
530 goto err_chain;
531 }
532 ps = map->start;
533 pe = map->last + 1;
534 pperm = map->perm;
535 }
536 }
537 err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
538 if (err)
539 goto err_chain;
540
541 err = create_direct_keys(mvdev, mr);
542 if (err)
543 goto err_chain;
544
545 /* Create the memory key that defines the guests's address space. This
546 * memory key refers to the direct keys that contain the MTT
547 * translations
548 */
549 err = create_indirect_key(mvdev, mr);
550 if (err)
551 goto err_chain;
552
553 mr->user_mr = true;
554 return 0;
555
556err_chain:
557 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
558 list_del_init(&dmr->list);
559 unmap_direct_mr(mvdev, dmr);
560 kfree(dmr);
561 }
562 return err;
563}
564
565static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
566{
567 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
568 void *mkc;
569 u32 *in;
570 int err;
571
572 in = kzalloc(inlen, GFP_KERNEL);
573 if (!in)
574 return -ENOMEM;
575
576 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
577
578 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
579 MLX5_SET(mkc, mkc, length64, 1);
580 MLX5_SET(mkc, mkc, lw, 1);
581 MLX5_SET(mkc, mkc, lr, 1);
582 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
583 MLX5_SET(mkc, mkc, qpn, 0xffffff);
584
585 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
586 if (!err)
587 mr->user_mr = false;
588
589 kfree(in);
590 return err;
591}
592
593static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
594{
595 mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
596}
597
598static int dup_iotlb(struct vhost_iotlb *dst, struct vhost_iotlb *src)
599{
600 struct vhost_iotlb_map *map;
601 u64 start = 0, last = ULLONG_MAX;
602 int err;
603
604 if (dst == src)
605 return -EINVAL;
606
607 if (!src) {
608 err = vhost_iotlb_add_range(dst, start, last, start, VHOST_ACCESS_RW);
609 return err;
610 }
611
612 for (map = vhost_iotlb_itree_first(src, start, last); map;
613 map = vhost_iotlb_itree_next(map, start, last)) {
614 err = vhost_iotlb_add_range(dst, map->start, map->last,
615 map->addr, map->perm);
616 if (err)
617 return err;
618 }
619 return 0;
620}
621
622static void prune_iotlb(struct vhost_iotlb *iotlb)
623{
624 vhost_iotlb_del_range(iotlb, 0, ULLONG_MAX);
625}
626
627static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
628{
629 struct mlx5_vdpa_direct_mr *dmr;
630 struct mlx5_vdpa_direct_mr *n;
631
632 destroy_indirect_key(mvdev, mr);
633 destroy_direct_keys(mvdev, mr);
634 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
635 list_del_init(&dmr->list);
636 unmap_direct_mr(mvdev, dmr);
637 kfree(dmr);
638 }
639}
640
641static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
642{
643 if (WARN_ON(!mr))
644 return;
645
646 if (mr->user_mr)
647 destroy_user_mr(mvdev, mr);
648 else
649 destroy_dma_mr(mvdev, mr);
650
651 vhost_iotlb_free(mr->iotlb);
652
653 list_del(&mr->mr_list);
654
655 kfree(mr);
656}
657
658/* There can be multiple .set_map() operations in quick succession.
659 * This large delay is a simple way to prevent the MR cleanup from blocking
660 * .set_map() MR creation in this scenario.
661 */
662#define MLX5_VDPA_MR_GC_TRIGGER_MS 2000
663
664static void mlx5_vdpa_mr_gc_handler(struct work_struct *work)
665{
666 struct mlx5_vdpa_mr_resources *mres;
667 struct mlx5_vdpa_mr *mr, *tmp;
668 struct mlx5_vdpa_dev *mvdev;
669
670 mres = container_of(work, struct mlx5_vdpa_mr_resources, gc_dwork_ent.work);
671
672 if (atomic_read(&mres->shutdown)) {
673 mutex_lock(&mres->lock);
674 } else if (!mutex_trylock(&mres->lock)) {
675 queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent,
676 msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS));
677 return;
678 }
679
680 mvdev = container_of(mres, struct mlx5_vdpa_dev, mres);
681
682 list_for_each_entry_safe(mr, tmp, &mres->mr_gc_list_head, mr_list) {
683 _mlx5_vdpa_destroy_mr(mvdev, mr);
684 }
685
686 mutex_unlock(&mres->lock);
687}
688
689static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
690 struct mlx5_vdpa_mr *mr)
691{
692 struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
693
694 if (!mr)
695 return;
696
697 if (refcount_dec_and_test(&mr->refcount)) {
698 list_move_tail(&mr->mr_list, &mres->mr_gc_list_head);
699 queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent,
700 msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS));
701 }
702}
703
704void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
705 struct mlx5_vdpa_mr *mr)
706{
707 mutex_lock(&mvdev->mres.lock);
708 _mlx5_vdpa_put_mr(mvdev, mr);
709 mutex_unlock(&mvdev->mres.lock);
710}
711
712static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
713 struct mlx5_vdpa_mr *mr)
714{
715 if (!mr)
716 return;
717
718 refcount_inc(&mr->refcount);
719}
720
721void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
722 struct mlx5_vdpa_mr *mr)
723{
724 mutex_lock(&mvdev->mres.lock);
725 _mlx5_vdpa_get_mr(mvdev, mr);
726 mutex_unlock(&mvdev->mres.lock);
727}
728
729void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
730 struct mlx5_vdpa_mr *new_mr,
731 unsigned int asid)
732{
733 struct mlx5_vdpa_mr *old_mr = mvdev->mres.mr[asid];
734
735 mutex_lock(&mvdev->mres.lock);
736
737 _mlx5_vdpa_put_mr(mvdev, old_mr);
738 mvdev->mres.mr[asid] = new_mr;
739
740 mutex_unlock(&mvdev->mres.lock);
741}
742
743static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev)
744{
745 struct mlx5_vdpa_mr *mr;
746
747 mutex_lock(&mvdev->mres.lock);
748
749 list_for_each_entry(mr, &mvdev->mres.mr_list_head, mr_list) {
750
751 mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: "
752 "mr: %p, mkey: 0x%x, refcount: %u\n",
753 mr, mr->mkey, refcount_read(&mr->refcount));
754 }
755
756 mutex_unlock(&mvdev->mres.lock);
757
758}
759
760void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev)
761{
762 if (!mvdev->res.valid)
763 return;
764
765 for (int i = 0; i < MLX5_VDPA_NUM_AS; i++)
766 mlx5_vdpa_update_mr(mvdev, NULL, i);
767
768 prune_iotlb(mvdev->cvq.iotlb);
769
770 mlx5_vdpa_show_mr_leaks(mvdev);
771}
772
773static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
774 struct mlx5_vdpa_mr *mr,
775 struct vhost_iotlb *iotlb)
776{
777 int err;
778
779 if (iotlb)
780 err = create_user_mr(mvdev, mr, iotlb);
781 else
782 err = create_dma_mr(mvdev, mr);
783
784 if (err)
785 return err;
786
787 mr->iotlb = vhost_iotlb_alloc(0, 0);
788 if (!mr->iotlb) {
789 err = -ENOMEM;
790 goto err_mr;
791 }
792
793 err = dup_iotlb(mr->iotlb, iotlb);
794 if (err)
795 goto err_iotlb;
796
797 list_add_tail(&mr->mr_list, &mvdev->mres.mr_list_head);
798
799 return 0;
800
801err_iotlb:
802 vhost_iotlb_free(mr->iotlb);
803
804err_mr:
805 if (iotlb)
806 destroy_user_mr(mvdev, mr);
807 else
808 destroy_dma_mr(mvdev, mr);
809
810 return err;
811}
812
813struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
814 struct vhost_iotlb *iotlb)
815{
816 struct mlx5_vdpa_mr *mr;
817 int err;
818
819 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
820 if (!mr)
821 return ERR_PTR(-ENOMEM);
822
823 mutex_lock(&mvdev->mres.lock);
824 err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb);
825 mutex_unlock(&mvdev->mres.lock);
826
827 if (err)
828 goto out_err;
829
830 refcount_set(&mr->refcount, 1);
831
832 return mr;
833
834out_err:
835 kfree(mr);
836 return ERR_PTR(err);
837}
838
839int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
840 struct vhost_iotlb *iotlb,
841 unsigned int asid)
842{
843 int err;
844
845 if (mvdev->mres.group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
846 return 0;
847
848 spin_lock(&mvdev->cvq.iommu_lock);
849
850 prune_iotlb(mvdev->cvq.iotlb);
851 err = dup_iotlb(mvdev->cvq.iotlb, iotlb);
852
853 spin_unlock(&mvdev->cvq.iommu_lock);
854
855 return err;
856}
857
858int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev)
859{
860 struct mlx5_vdpa_mr *mr;
861
862 mr = mlx5_vdpa_create_mr(mvdev, NULL);
863 if (IS_ERR(mr))
864 return PTR_ERR(mr);
865
866 mlx5_vdpa_update_mr(mvdev, mr, 0);
867
868 return mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, 0);
869}
870
871int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
872{
873 if (asid >= MLX5_VDPA_NUM_AS)
874 return -EINVAL;
875
876 mlx5_vdpa_update_mr(mvdev, NULL, asid);
877
878 if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
879 if (mlx5_vdpa_create_dma_mr(mvdev))
880 mlx5_vdpa_warn(mvdev, "create DMA MR failed\n");
881 } else {
882 mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, asid);
883 }
884
885 return 0;
886}
887
888int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev)
889{
890 struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
891
892 mres->wq_gc = create_singlethread_workqueue("mlx5_vdpa_mr_gc");
893 if (!mres->wq_gc)
894 return -ENOMEM;
895
896 INIT_DELAYED_WORK(&mres->gc_dwork_ent, mlx5_vdpa_mr_gc_handler);
897
898 mutex_init(&mres->lock);
899
900 INIT_LIST_HEAD(&mres->mr_list_head);
901 INIT_LIST_HEAD(&mres->mr_gc_list_head);
902
903 return 0;
904}
905
906void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
907{
908 struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
909
910 atomic_set(&mres->shutdown, 1);
911
912 flush_delayed_work(&mres->gc_dwork_ent);
913 destroy_workqueue(mres->wq_gc);
914 mres->wq_gc = NULL;
915 mutex_destroy(&mres->lock);
916}