Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/umem: Support importing dma-buf as user memory region

Dma-buf is a standard cross-driver buffer sharing mechanism that can be
used to support peer-to-peer access from RDMA devices.

Device memory exported via dma-buf is associated with a file descriptor.
This is passed to the user space as a property associated with the buffer
allocation. When the buffer is registered as a memory region, the file
descriptor is passed to the RDMA driver along with other parameters.

Implement the common code for importing dma-buf object and mapping dma-buf
pages.

Link: https://lore.kernel.org/r/1608067636-98073-2-git-send-email-jianxin.xiong@intel.com
Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com>
Reviewed-by: Sean Hefty <sean.hefty@intel.com>
Acked-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Acked-by: Christian Koenig <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Jianxin Xiong and committed by
Jason Gunthorpe
368c0159 abfa4565

+224 -4
+1
drivers/infiniband/Kconfig
··· 41 41 bool 42 42 depends on INFINIBAND_USER_ACCESS != n 43 43 depends on MMU 44 + select DMA_SHARED_BUFFER 44 45 default y 45 46 46 47 config INFINIBAND_ON_DEMAND_PAGING
+1 -1
drivers/infiniband/core/Makefile
··· 40 40 uverbs_std_types_srq.o \ 41 41 uverbs_std_types_wq.o \ 42 42 uverbs_std_types_qp.o 43 - ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o 43 + ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o 44 44 ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
+3
drivers/infiniband/core/umem.c
··· 2 2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 4 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 + * Copyright (c) 2020 Intel Corporation. All rights reserved. 5 6 * 6 7 * This software is available to you under a choice of one of two 7 8 * licenses. You may choose to be licensed under the terms of the GNU ··· 279 278 { 280 279 if (!umem) 281 280 return; 281 + if (umem->is_dmabuf) 282 + return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem)); 282 283 if (umem->is_odp) 283 284 return ib_umem_odp_release(to_ib_umem_odp(umem)); 284 285
+174
drivers/infiniband/core/umem_dmabuf.c
··· 1 + // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) 2 + /* 3 + * Copyright (c) 2020 Intel Corporation. All rights reserved. 4 + */ 5 + 6 + #include <linux/dma-buf.h> 7 + #include <linux/dma-resv.h> 8 + #include <linux/dma-mapping.h> 9 + 10 + #include "uverbs.h" 11 + 12 + int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) 13 + { 14 + struct sg_table *sgt; 15 + struct scatterlist *sg; 16 + struct dma_fence *fence; 17 + unsigned long start, end, cur = 0; 18 + unsigned int nmap = 0; 19 + int i; 20 + 21 + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); 22 + 23 + if (umem_dmabuf->sgt) 24 + goto wait_fence; 25 + 26 + sgt = dma_buf_map_attachment(umem_dmabuf->attach, DMA_BIDIRECTIONAL); 27 + if (IS_ERR(sgt)) 28 + return PTR_ERR(sgt); 29 + 30 + /* modify the sg list in-place to match umem address and length */ 31 + 32 + start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE); 33 + end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length, 34 + PAGE_SIZE); 35 + for_each_sgtable_dma_sg(sgt, sg, i) { 36 + if (start < cur + sg_dma_len(sg) && cur < end) 37 + nmap++; 38 + if (cur <= start && start < cur + sg_dma_len(sg)) { 39 + unsigned long offset = start - cur; 40 + 41 + umem_dmabuf->first_sg = sg; 42 + umem_dmabuf->first_sg_offset = offset; 43 + sg_dma_address(sg) += offset; 44 + sg_dma_len(sg) -= offset; 45 + cur += offset; 46 + } 47 + if (cur < end && end <= cur + sg_dma_len(sg)) { 48 + unsigned long trim = cur + sg_dma_len(sg) - end; 49 + 50 + umem_dmabuf->last_sg = sg; 51 + umem_dmabuf->last_sg_trim = trim; 52 + sg_dma_len(sg) -= trim; 53 + break; 54 + } 55 + cur += sg_dma_len(sg); 56 + } 57 + 58 + umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg; 59 + umem_dmabuf->umem.sg_head.nents = nmap; 60 + umem_dmabuf->umem.nmap = nmap; 61 + umem_dmabuf->sgt = sgt; 62 + 63 + wait_fence: 64 + /* 65 + * Although the sg list is valid now, the content of the pages 66 + * may be not up-to-date. Wait for the exporter to finish 67 + * the migration. 68 + */ 69 + fence = dma_resv_get_excl(umem_dmabuf->attach->dmabuf->resv); 70 + if (fence) 71 + return dma_fence_wait(fence, false); 72 + 73 + return 0; 74 + } 75 + EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); 76 + 77 + void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) 78 + { 79 + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); 80 + 81 + if (!umem_dmabuf->sgt) 82 + return; 83 + 84 + /* retore the original sg list */ 85 + if (umem_dmabuf->first_sg) { 86 + sg_dma_address(umem_dmabuf->first_sg) -= 87 + umem_dmabuf->first_sg_offset; 88 + sg_dma_len(umem_dmabuf->first_sg) += 89 + umem_dmabuf->first_sg_offset; 90 + umem_dmabuf->first_sg = NULL; 91 + umem_dmabuf->first_sg_offset = 0; 92 + } 93 + if (umem_dmabuf->last_sg) { 94 + sg_dma_len(umem_dmabuf->last_sg) += 95 + umem_dmabuf->last_sg_trim; 96 + umem_dmabuf->last_sg = NULL; 97 + umem_dmabuf->last_sg_trim = 0; 98 + } 99 + 100 + dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt, 101 + DMA_BIDIRECTIONAL); 102 + 103 + umem_dmabuf->sgt = NULL; 104 + } 105 + EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages); 106 + 107 + struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, 108 + unsigned long offset, size_t size, 109 + int fd, int access, 110 + const struct dma_buf_attach_ops *ops) 111 + { 112 + struct dma_buf *dmabuf; 113 + struct ib_umem_dmabuf *umem_dmabuf; 114 + struct ib_umem *umem; 115 + unsigned long end; 116 + struct ib_umem_dmabuf *ret = ERR_PTR(-EINVAL); 117 + 118 + if (check_add_overflow(offset, (unsigned long)size, &end)) 119 + return ret; 120 + 121 + if (unlikely(!ops || !ops->move_notify)) 122 + return ret; 123 + 124 + dmabuf = dma_buf_get(fd); 125 + if (IS_ERR(dmabuf)) 126 + return ERR_CAST(dmabuf); 127 + 128 + if (dmabuf->size < end) 129 + goto out_release_dmabuf; 130 + 131 + umem_dmabuf = kzalloc(sizeof(*umem_dmabuf), GFP_KERNEL); 132 + if (!umem_dmabuf) { 133 + ret = ERR_PTR(-ENOMEM); 134 + goto out_release_dmabuf; 135 + } 136 + 137 + umem = &umem_dmabuf->umem; 138 + umem->ibdev = device; 139 + umem->length = size; 140 + umem->address = offset; 141 + umem->writable = ib_access_writable(access); 142 + umem->is_dmabuf = 1; 143 + 144 + if (!ib_umem_num_pages(umem)) 145 + goto out_free_umem; 146 + 147 + umem_dmabuf->attach = dma_buf_dynamic_attach( 148 + dmabuf, 149 + device->dma_device, 150 + ops, 151 + umem_dmabuf); 152 + if (IS_ERR(umem_dmabuf->attach)) { 153 + ret = ERR_CAST(umem_dmabuf->attach); 154 + goto out_free_umem; 155 + } 156 + return umem_dmabuf; 157 + 158 + out_free_umem: 159 + kfree(umem_dmabuf); 160 + 161 + out_release_dmabuf: 162 + dma_buf_put(dmabuf); 163 + return ret; 164 + } 165 + EXPORT_SYMBOL(ib_umem_dmabuf_get); 166 + 167 + void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) 168 + { 169 + struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; 170 + 171 + dma_buf_detach(dmabuf, umem_dmabuf->attach); 172 + dma_buf_put(dmabuf); 173 + kfree(umem_dmabuf); 174 + }
+45 -3
include/rdma/ib_umem.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 2 2 /* 3 3 * Copyright (c) 2007 Cisco Systems. All rights reserved. 4 + * Copyright (c) 2020 Intel Corporation. All rights reserved. 4 5 */ 5 6 6 7 #ifndef IB_UMEM_H ··· 14 13 15 14 struct ib_ucontext; 16 15 struct ib_umem_odp; 16 + struct dma_buf_attach_ops; 17 17 18 18 struct ib_umem { 19 19 struct ib_device *ibdev; ··· 24 22 unsigned long address; 25 23 u32 writable : 1; 26 24 u32 is_odp : 1; 25 + u32 is_dmabuf : 1; 27 26 struct work_struct work; 28 27 struct sg_table sg_head; 29 28 int nmap; 30 29 unsigned int sg_nents; 31 30 }; 31 + 32 + struct ib_umem_dmabuf { 33 + struct ib_umem umem; 34 + struct dma_buf_attachment *attach; 35 + struct sg_table *sgt; 36 + struct scatterlist *first_sg; 37 + struct scatterlist *last_sg; 38 + unsigned long first_sg_offset; 39 + unsigned long last_sg_trim; 40 + void *private; 41 + }; 42 + 43 + static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) 44 + { 45 + return container_of(umem, struct ib_umem_dmabuf, umem); 46 + } 32 47 33 48 /* Returns the offset of the umem start relative to the first page. */ 34 49 static inline int ib_umem_offset(struct ib_umem *umem) ··· 105 86 unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, 106 87 unsigned long pgsz_bitmap, 107 88 unsigned long virt); 89 + 108 90 /** 109 91 * ib_umem_find_best_pgoff - Find best HW page size 110 92 * ··· 136 116 dma_addr & pgoff_bitmask); 137 117 } 138 118 119 + struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, 120 + unsigned long offset, size_t size, 121 + int fd, int access, 122 + const struct dma_buf_attach_ops *ops); 123 + int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); 124 + void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); 125 + void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); 126 + 139 127 #else /* CONFIG_INFINIBAND_USER_MEM */ 140 128 141 129 #include <linux/err.h> ··· 152 124 unsigned long addr, size_t size, 153 125 int access) 154 126 { 155 - return ERR_PTR(-EINVAL); 127 + return ERR_PTR(-EOPNOTSUPP); 156 128 } 157 129 static inline void ib_umem_release(struct ib_umem *umem) { } 158 130 static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, 159 131 size_t length) { 160 - return -EINVAL; 132 + return -EOPNOTSUPP; 161 133 } 162 134 static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, 163 135 unsigned long pgsz_bitmap, ··· 171 143 { 172 144 return 0; 173 145 } 146 + static inline 147 + struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, 148 + unsigned long offset, 149 + size_t size, int fd, 150 + int access, 151 + struct dma_buf_attach_ops *ops) 152 + { 153 + return ERR_PTR(-EOPNOTSUPP); 154 + } 155 + static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) 156 + { 157 + return -EOPNOTSUPP; 158 + } 159 + static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { } 160 + static inline void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { } 174 161 175 162 #endif /* CONFIG_INFINIBAND_USER_MEM */ 176 - 177 163 #endif /* IB_UMEM_H */