Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/rds: Remove FMR support for memory registration

Use FRWR method for memory registration by default and remove the ancient
and unsafe FMR method.

Link: https://lore.kernel.org/r/3-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

authored by

Max Gurtovoy and committed by
Jason Gunthorpe
07549ee2 f273ad4f

+21 -322
+1 -1
net/rds/Makefile
··· 7 7 obj-$(CONFIG_RDS_RDMA) += rds_rdma.o 8 8 rds_rdma-y := rdma_transport.o \ 9 9 ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \ 10 - ib_sysctl.o ib_rdma.o ib_fmr.o ib_frmr.o 10 + ib_sysctl.o ib_rdma.o ib_frmr.o 11 11 12 12 13 13 obj-$(CONFIG_RDS_TCP) += rds_tcp.o
+7 -13
net/rds/ib.c
··· 130 130 static int rds_ib_add_one(struct ib_device *device) 131 131 { 132 132 struct rds_ib_device *rds_ibdev; 133 - bool has_fr, has_fmr; 134 133 int ret; 135 134 136 135 /* Only handle IB (no iWARP) devices */ 137 136 if (device->node_type != RDMA_NODE_IB_CA) 137 + return -EOPNOTSUPP; 138 + 139 + /* Device must support FRWR */ 140 + if (!(device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 138 141 return -EOPNOTSUPP; 139 142 140 143 rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, ··· 155 152 rds_ibdev->max_wrs = device->attrs.max_qp_wr; 156 153 rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); 157 154 158 - has_fr = (device->attrs.device_cap_flags & 159 - IB_DEVICE_MEM_MGT_EXTENSIONS); 160 - has_fmr = (device->ops.alloc_fmr && device->ops.dealloc_fmr && 161 - device->ops.map_phys_fmr && device->ops.unmap_fmr); 162 - rds_ibdev->use_fastreg = (has_fr && !has_fmr); 163 155 rds_ibdev->odp_capable = 164 156 !!(device->attrs.device_cap_flags & 165 157 IB_DEVICE_ON_DEMAND_PAGING) && ··· 163 165 !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & 164 166 IB_ODP_SUPPORT_READ); 165 167 166 - rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; 167 168 rds_ibdev->max_1m_mrs = device->attrs.max_mr ? 168 169 min_t(unsigned int, (device->attrs.max_mr / 2), 169 170 rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size; ··· 216 219 goto put_dev; 217 220 } 218 221 219 - rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", 222 + rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", 220 223 device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, 221 - rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_mrs, 222 - rds_ibdev->max_8k_mrs); 224 + rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs); 223 225 224 - pr_info("RDS/IB: %s: %s supported and preferred\n", 225 - device->name, 226 - rds_ibdev->use_fastreg ? "FRMR" : "FMR"); 226 + pr_info("RDS/IB: %s: added\n", device->name); 227 227 228 228 down_write(&rds_ib_devices_lock); 229 229 list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
-2
net/rds/ib.h
··· 247 247 struct ib_device *dev; 248 248 struct ib_pd *pd; 249 249 struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */ 250 - u8 use_fastreg:1; 251 250 u8 odp_capable:1; 252 251 253 252 unsigned int max_mrs; 254 253 struct rds_ib_mr_pool *mr_1m_pool; 255 254 struct rds_ib_mr_pool *mr_8k_pool; 256 - unsigned int fmr_max_remaps; 257 255 unsigned int max_8k_mrs; 258 256 unsigned int max_1m_mrs; 259 257 int max_sge;
+2 -2
net/rds/ib_cm.c
··· 527 527 return -EOPNOTSUPP; 528 528 529 529 /* The fr_queue_space is currently set to 512, to add extra space on 530 - * completion queue and send queue. This extra space is used for FRMR 530 + * completion queue and send queue. This extra space is used for FRWR 531 531 * registration and invalidation work requests 532 532 */ 533 - fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0); 533 + fr_queue_space = RDS_IB_DEFAULT_FR_WR; 534 534 535 535 /* add the conn now so that connection establishment has the dev */ 536 536 rds_ib_add_conn(rds_ibdev, conn);
-269
net/rds/ib_fmr.c
··· 1 - /* 2 - * Copyright (c) 2016 Oracle. All rights reserved. 3 - * 4 - * This software is available to you under a choice of one of two 5 - * licenses. You may choose to be licensed under the terms of the GNU 6 - * General Public License (GPL) Version 2, available from the file 7 - * COPYING in the main directory of this source tree, or the 8 - * OpenIB.org BSD license below: 9 - * 10 - * Redistribution and use in source and binary forms, with or 11 - * without modification, are permitted provided that the following 12 - * conditions are met: 13 - * 14 - * - Redistributions of source code must retain the above 15 - * copyright notice, this list of conditions and the following 16 - * disclaimer. 17 - * 18 - * - Redistributions in binary form must reproduce the above 19 - * copyright notice, this list of conditions and the following 20 - * disclaimer in the documentation and/or other materials 21 - * provided with the distribution. 22 - * 23 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 - * SOFTWARE. 31 - */ 32 - 33 - #include "ib_mr.h" 34 - 35 - struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) 36 - { 37 - struct rds_ib_mr_pool *pool; 38 - struct rds_ib_mr *ibmr = NULL; 39 - struct rds_ib_fmr *fmr; 40 - int err = 0; 41 - 42 - if (npages <= RDS_MR_8K_MSG_SIZE) 43 - pool = rds_ibdev->mr_8k_pool; 44 - else 45 - pool = rds_ibdev->mr_1m_pool; 46 - 47 - if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) 48 - queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); 49 - 50 - /* Switch pools if one of the pool is reaching upper limit */ 51 - if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) { 52 - if (pool->pool_type == RDS_IB_MR_8K_POOL) 53 - pool = rds_ibdev->mr_1m_pool; 54 - else 55 - pool = rds_ibdev->mr_8k_pool; 56 - } 57 - 58 - ibmr = rds_ib_try_reuse_ibmr(pool); 59 - if (ibmr) 60 - return ibmr; 61 - 62 - ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, 63 - rdsibdev_to_node(rds_ibdev)); 64 - if (!ibmr) { 65 - err = -ENOMEM; 66 - goto out_no_cigar; 67 - } 68 - 69 - fmr = &ibmr->u.fmr; 70 - fmr->fmr = ib_alloc_fmr(rds_ibdev->pd, 71 - (IB_ACCESS_LOCAL_WRITE | 72 - IB_ACCESS_REMOTE_READ | 73 - IB_ACCESS_REMOTE_WRITE | 74 - IB_ACCESS_REMOTE_ATOMIC), 75 - &pool->fmr_attr); 76 - if (IS_ERR(fmr->fmr)) { 77 - err = PTR_ERR(fmr->fmr); 78 - fmr->fmr = NULL; 79 - pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, err); 80 - goto out_no_cigar; 81 - } 82 - 83 - ibmr->pool = pool; 84 - if (pool->pool_type == RDS_IB_MR_8K_POOL) 85 - rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc); 86 - else 87 - rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc); 88 - 89 - return ibmr; 90 - 91 - out_no_cigar: 92 - kfree(ibmr); 93 - atomic_dec(&pool->item_count); 94 - 95 - return ERR_PTR(err); 96 - } 97 - 98 - static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, 99 - struct rds_ib_mr *ibmr, struct scatterlist *sg, 100 - unsigned int nents) 101 - { 102 - struct ib_device *dev = rds_ibdev->dev; 103 - struct rds_ib_fmr *fmr = &ibmr->u.fmr; 104 - struct scatterlist *scat = sg; 105 - u64 io_addr = 0; 106 - u64 *dma_pages; 107 - u32 len; 108 - int page_cnt, sg_dma_len; 109 - int i, j; 110 - int ret; 111 - 112 - sg_dma_len = ib_dma_map_sg(dev, sg, nents, DMA_BIDIRECTIONAL); 113 - if (unlikely(!sg_dma_len)) { 114 - pr_warn("RDS/IB: %s failed!\n", __func__); 115 - return -EBUSY; 116 - } 117 - 118 - len = 0; 119 - page_cnt = 0; 120 - 121 - for (i = 0; i < sg_dma_len; ++i) { 122 - unsigned int dma_len = sg_dma_len(&scat[i]); 123 - u64 dma_addr = sg_dma_address(&scat[i]); 124 - 125 - if (dma_addr & ~PAGE_MASK) { 126 - if (i > 0) { 127 - ib_dma_unmap_sg(dev, sg, nents, 128 - DMA_BIDIRECTIONAL); 129 - return -EINVAL; 130 - } else { 131 - ++page_cnt; 132 - } 133 - } 134 - if ((dma_addr + dma_len) & ~PAGE_MASK) { 135 - if (i < sg_dma_len - 1) { 136 - ib_dma_unmap_sg(dev, sg, nents, 137 - DMA_BIDIRECTIONAL); 138 - return -EINVAL; 139 - } else { 140 - ++page_cnt; 141 - } 142 - } 143 - 144 - len += dma_len; 145 - } 146 - 147 - page_cnt += len >> PAGE_SHIFT; 148 - if (page_cnt > ibmr->pool->fmr_attr.max_pages) { 149 - ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); 150 - return -EINVAL; 151 - } 152 - 153 - dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC, 154 - rdsibdev_to_node(rds_ibdev)); 155 - if (!dma_pages) { 156 - ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); 157 - return -ENOMEM; 158 - } 159 - 160 - page_cnt = 0; 161 - for (i = 0; i < sg_dma_len; ++i) { 162 - unsigned int dma_len = sg_dma_len(&scat[i]); 163 - u64 dma_addr = sg_dma_address(&scat[i]); 164 - 165 - for (j = 0; j < dma_len; j += PAGE_SIZE) 166 - dma_pages[page_cnt++] = 167 - (dma_addr & PAGE_MASK) + j; 168 - } 169 - 170 - ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr); 171 - if (ret) { 172 - ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); 173 - goto out; 174 - } 175 - 176 - /* Success - we successfully remapped the MR, so we can 177 - * safely tear down the old mapping. 178 - */ 179 - rds_ib_teardown_mr(ibmr); 180 - 181 - ibmr->sg = scat; 182 - ibmr->sg_len = nents; 183 - ibmr->sg_dma_len = sg_dma_len; 184 - ibmr->remap_count++; 185 - 186 - if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) 187 - rds_ib_stats_inc(s_ib_rdma_mr_8k_used); 188 - else 189 - rds_ib_stats_inc(s_ib_rdma_mr_1m_used); 190 - ret = 0; 191 - 192 - out: 193 - kfree(dma_pages); 194 - 195 - return ret; 196 - } 197 - 198 - struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *rds_ibdev, 199 - struct scatterlist *sg, 200 - unsigned long nents, 201 - u32 *key) 202 - { 203 - struct rds_ib_mr *ibmr = NULL; 204 - struct rds_ib_fmr *fmr; 205 - int ret; 206 - 207 - ibmr = rds_ib_alloc_fmr(rds_ibdev, nents); 208 - if (IS_ERR(ibmr)) 209 - return ibmr; 210 - 211 - ibmr->device = rds_ibdev; 212 - fmr = &ibmr->u.fmr; 213 - ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); 214 - if (ret == 0) 215 - *key = fmr->fmr->rkey; 216 - else 217 - rds_ib_free_mr(ibmr, 0); 218 - 219 - return ibmr; 220 - } 221 - 222 - void rds_ib_unreg_fmr(struct list_head *list, unsigned int *nfreed, 223 - unsigned long *unpinned, unsigned int goal) 224 - { 225 - struct rds_ib_mr *ibmr, *next; 226 - struct rds_ib_fmr *fmr; 227 - LIST_HEAD(fmr_list); 228 - int ret = 0; 229 - unsigned int freed = *nfreed; 230 - 231 - /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 232 - list_for_each_entry(ibmr, list, unmap_list) { 233 - fmr = &ibmr->u.fmr; 234 - list_add(&fmr->fmr->list, &fmr_list); 235 - } 236 - 237 - ret = ib_unmap_fmr(&fmr_list); 238 - if (ret) 239 - pr_warn("RDS/IB: FMR invalidation failed (err=%d)\n", ret); 240 - 241 - /* Now we can destroy the DMA mapping and unpin any pages */ 242 - list_for_each_entry_safe(ibmr, next, list, unmap_list) { 243 - fmr = &ibmr->u.fmr; 244 - *unpinned += ibmr->sg_len; 245 - __rds_ib_teardown_mr(ibmr); 246 - if (freed < goal || 247 - ibmr->remap_count >= ibmr->pool->fmr_attr.max_maps) { 248 - if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) 249 - rds_ib_stats_inc(s_ib_rdma_mr_8k_free); 250 - else 251 - rds_ib_stats_inc(s_ib_rdma_mr_1m_free); 252 - list_del(&ibmr->unmap_list); 253 - ib_dealloc_fmr(fmr->fmr); 254 - kfree(ibmr); 255 - freed++; 256 - } 257 - } 258 - *nfreed = freed; 259 - } 260 - 261 - void rds_ib_free_fmr_list(struct rds_ib_mr *ibmr) 262 - { 263 - struct rds_ib_mr_pool *pool = ibmr->pool; 264 - 265 - if (ibmr->remap_count >= pool->fmr_attr.max_maps) 266 - llist_add(&ibmr->llnode, &pool->drop_list); 267 - else 268 - llist_add(&ibmr->llnode, &pool->free_list); 269 - }
+2 -2
net/rds/ib_frmr.c
··· 76 76 77 77 frmr = &ibmr->u.frmr; 78 78 frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG, 79 - pool->fmr_attr.max_pages); 79 + pool->max_pages); 80 80 if (IS_ERR(frmr->mr)) { 81 81 pr_warn("RDS/IB: %s failed to allocate MR", __func__); 82 82 err = PTR_ERR(frmr->mr); ··· 240 240 } 241 241 frmr->dma_npages += len >> PAGE_SHIFT; 242 242 243 - if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) { 243 + if (frmr->dma_npages > ibmr->pool->max_pages) { 244 244 ret = -EMSGSIZE; 245 245 goto out_unmap; 246 246 }
+1 -13
net/rds/ib_mr.h
··· 43 43 #define RDS_MR_8K_SCALE (256 / (RDS_MR_8K_MSG_SIZE + 1)) 44 44 #define RDS_MR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2)) 45 45 46 - struct rds_ib_fmr { 47 - struct ib_fmr *fmr; 48 - }; 49 - 50 46 enum rds_ib_fr_state { 51 47 FRMR_IS_FREE, /* mr invalidated & ready for use */ 52 48 FRMR_IS_INUSE, /* mr is in use or used & can be invalidated */ ··· 80 84 81 85 u8 odp:1; 82 86 union { 83 - struct rds_ib_fmr fmr; 84 87 struct rds_ib_frmr frmr; 85 88 struct ib_mr *mr; 86 89 } u; ··· 104 109 unsigned long max_items; 105 110 unsigned long max_items_soft; 106 111 unsigned long max_free_pinned; 107 - struct ib_fmr_attr fmr_attr; 108 - bool use_fastreg; 112 + unsigned int max_pages; 109 113 }; 110 114 111 115 extern struct workqueue_struct *rds_ib_mr_wq; ··· 130 136 131 137 void __rds_ib_teardown_mr(struct rds_ib_mr *); 132 138 void rds_ib_teardown_mr(struct rds_ib_mr *); 133 - struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int); 134 139 struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *); 135 140 int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **); 136 - struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *, 137 - unsigned long, u32 *); 138 141 struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *); 139 - void rds_ib_unreg_fmr(struct list_head *, unsigned int *, 140 - unsigned long *, unsigned int); 141 - void rds_ib_free_fmr_list(struct rds_ib_mr *); 142 142 struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev, 143 143 struct rds_ib_connection *ic, 144 144 struct scatterlist *sg,
+8 -20
net/rds/ib_rdma.c
··· 181 181 struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; 182 182 183 183 iinfo->rdma_mr_max = pool_1m->max_items; 184 - iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages; 184 + iinfo->rdma_mr_size = pool_1m->max_pages; 185 185 } 186 186 187 187 #if IS_ENABLED(CONFIG_IPV6) ··· 191 191 struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; 192 192 193 193 iinfo6->rdma_mr_max = pool_1m->max_items; 194 - iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages; 194 + iinfo6->rdma_mr_size = pool_1m->max_pages; 195 195 } 196 196 #endif 197 197 ··· 406 406 if (list_empty(&unmap_list)) 407 407 goto out; 408 408 409 - if (pool->use_fastreg) 410 - rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal); 411 - else 412 - rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal); 409 + rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal); 413 410 414 411 if (!list_empty(&unmap_list)) { 415 412 unsigned long flags; ··· 500 503 } 501 504 502 505 /* Return it to the pool's free list */ 503 - if (rds_ibdev->use_fastreg) 504 - rds_ib_free_frmr_list(ibmr); 505 - else 506 - rds_ib_free_fmr_list(ibmr); 506 + rds_ib_free_frmr_list(ibmr); 507 507 508 508 atomic_add(ibmr->sg_len, &pool->free_pinned); 509 509 atomic_inc(&pool->dirty_count); ··· 616 622 goto out; 617 623 } 618 624 619 - if (rds_ibdev->use_fastreg) 620 - ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); 621 - else 622 - ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret); 625 + ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); 623 626 if (IS_ERR(ibmr)) { 624 627 ret = PTR_ERR(ibmr); 625 628 pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret); ··· 660 669 661 670 if (pool_type == RDS_IB_MR_1M_POOL) { 662 671 /* +1 allows for unaligned MRs */ 663 - pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1; 672 + pool->max_pages = RDS_MR_1M_MSG_SIZE + 1; 664 673 pool->max_items = rds_ibdev->max_1m_mrs; 665 674 } else { 666 675 /* pool_type == RDS_IB_MR_8K_POOL */ 667 - pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1; 676 + pool->max_pages = RDS_MR_8K_MSG_SIZE + 1; 668 677 pool->max_items = rds_ibdev->max_8k_mrs; 669 678 } 670 679 671 - pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4; 672 - pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; 673 - pool->fmr_attr.page_shift = PAGE_SHIFT; 680 + pool->max_free_pinned = pool->max_items * pool->max_pages / 4; 674 681 pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4; 675 - pool->use_fastreg = rds_ibdev->use_fastreg; 676 682 677 683 return pool; 678 684 }