Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma updates from Doug Ledford:
"Primary 4.7 merge window changes

- Updates to the new Intel X722 iWARP driver
- Updates to the hfi1 driver
- Fixes for the iw_cxgb4 driver
- Misc core fixes
- Generic RDMA READ/WRITE API addition
- SRP updates
- Misc ipoib updates
- Minor mlx5 updates"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (148 commits)
IB/mlx5: Fire the CQ completion handler from tasklet
net/mlx5_core: Use tasklet for user-space CQ completion events
IB/core: Do not require CAP_NET_ADMIN for packet sniffing
IB/mlx4: Fix unaligned access in send_reply_to_slave
IB/mlx5: Report Scatter FCS device capability when supported
IB/mlx5: Add Scatter FCS support for Raw Packet QP
IB/core: Add Scatter FCS create flag
IB/core: Add Raw Scatter FCS device capability
IB/core: Add extended device capability flags
i40iw: pass hw_stats by reference rather than by value
i40iw: Remove unnecessary synchronize_irq() before free_irq()
i40iw: constify i40iw_vf_cqp_ops structure
IB/mlx5: Add UARs write-combining and non-cached mapping
IB/mlx5: Allow mapping the free running counter on PROT_EXEC
IB/mlx4: Use list_for_each_entry_safe
IB/SA: Use correct free function
IB/core: Fix a potential array overrun in CMA and SA agent
IB/core: Remove unnecessary check in ibnl_rcv_msg
IB/IWPM: Fix a potential skb leak
RDMA/nes: replace custom print_hex_dump()
...

+4116 -2605
+2 -2
drivers/infiniband/core/Makefile
··· 8 8 obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ 9 9 $(user_access-y) 10 10 11 - ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \ 11 + ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ 12 12 device.o fmr_pool.o cache.o netlink.o \ 13 - roce_gid_mgmt.o 13 + roce_gid_mgmt.o mr_pool.o 14 14 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o 15 15 ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o 16 16
+3 -1
drivers/infiniband/core/cma.c
··· 800 800 if (id->device != pd->device) 801 801 return -EINVAL; 802 802 803 + qp_init_attr->port_num = id->port_num; 803 804 qp = ib_create_qp(pd, qp_init_attr); 804 805 if (IS_ERR(qp)) 805 806 return PTR_ERR(qp); ··· 4295 4294 if (ret) 4296 4295 goto err; 4297 4296 4298 - if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table)) 4297 + if (ibnl_add_client(RDMA_NL_RDMA_CM, ARRAY_SIZE(cma_cb_table), 4298 + cma_cb_table)) 4299 4299 pr_warn("RDMA CMA: failed to add netlink callback\n"); 4300 4300 cma_configfs_init(); 4301 4301
+2 -2
drivers/infiniband/core/iwcm.c
··· 459 459 if (pm_addr->ss_family == AF_INET) { 460 460 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 461 461 462 - if (pm4_addr->sin_addr.s_addr == INADDR_ANY) { 462 + if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { 463 463 struct sockaddr_in *cm4_addr = 464 464 (struct sockaddr_in *)cm_addr; 465 465 struct sockaddr_in *cm4_outaddr = ··· 1175 1175 if (ret) 1176 1176 pr_err("iw_cm: couldn't init iwpm\n"); 1177 1177 1178 - ret = ibnl_add_client(RDMA_NL_IWCM, RDMA_NL_IWPM_NUM_OPS, 1178 + ret = ibnl_add_client(RDMA_NL_IWCM, ARRAY_SIZE(iwcm_nl_cb_table), 1179 1179 iwcm_nl_cb_table); 1180 1180 if (ret) 1181 1181 pr_err("iw_cm: couldn't register netlink callbacks\n");
+1
drivers/infiniband/core/iwpm_util.c
··· 634 634 if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client, 635 635 RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) { 636 636 pr_warn("%s Unable to put NLMSG_DONE\n", __func__); 637 + dev_kfree_skb(skb); 637 638 return -ENOMEM; 638 639 } 639 640 nlh->nlmsg_type = NLMSG_DONE;
+86
drivers/infiniband/core/mr_pool.c
··· 1 + /* 2 + * Copyright (c) 2016 HGST, a Western Digital Company. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + */ 13 + #include <rdma/ib_verbs.h> 14 + #include <rdma/mr_pool.h> 15 + 16 + struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list) 17 + { 18 + struct ib_mr *mr; 19 + unsigned long flags; 20 + 21 + spin_lock_irqsave(&qp->mr_lock, flags); 22 + mr = list_first_entry_or_null(list, struct ib_mr, qp_entry); 23 + if (mr) { 24 + list_del(&mr->qp_entry); 25 + qp->mrs_used++; 26 + } 27 + spin_unlock_irqrestore(&qp->mr_lock, flags); 28 + 29 + return mr; 30 + } 31 + EXPORT_SYMBOL(ib_mr_pool_get); 32 + 33 + void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr) 34 + { 35 + unsigned long flags; 36 + 37 + spin_lock_irqsave(&qp->mr_lock, flags); 38 + list_add(&mr->qp_entry, list); 39 + qp->mrs_used--; 40 + spin_unlock_irqrestore(&qp->mr_lock, flags); 41 + } 42 + EXPORT_SYMBOL(ib_mr_pool_put); 43 + 44 + int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr, 45 + enum ib_mr_type type, u32 max_num_sg) 46 + { 47 + struct ib_mr *mr; 48 + unsigned long flags; 49 + int ret, i; 50 + 51 + for (i = 0; i < nr; i++) { 52 + mr = ib_alloc_mr(qp->pd, type, max_num_sg); 53 + if (IS_ERR(mr)) { 54 + ret = PTR_ERR(mr); 55 + goto out; 56 + } 57 + 58 + spin_lock_irqsave(&qp->mr_lock, flags); 59 + list_add_tail(&mr->qp_entry, list); 60 + spin_unlock_irqrestore(&qp->mr_lock, flags); 61 + } 62 + 63 + return 0; 64 + out: 65 + ib_mr_pool_destroy(qp, list); 66 + return ret; 67 + } 68 + EXPORT_SYMBOL(ib_mr_pool_init); 69 + 70 + void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list) 71 + { 72 + struct ib_mr *mr; 73 + unsigned long flags; 74 + 75 + spin_lock_irqsave(&qp->mr_lock, flags); 76 + while (!list_empty(list)) { 77 + mr = list_first_entry(list, struct ib_mr, qp_entry); 78 + list_del(&mr->qp_entry); 79 + 80 + spin_unlock_irqrestore(&qp->mr_lock, flags); 81 + ib_dereg_mr(mr); 82 + spin_lock_irqsave(&qp->mr_lock, flags); 83 + } 84 + spin_unlock_irqrestore(&qp->mr_lock, flags); 85 + } 86 + EXPORT_SYMBOL(ib_mr_pool_destroy);
+2 -3
drivers/infiniband/core/netlink.c
··· 151 151 struct ibnl_client *client; 152 152 int type = nlh->nlmsg_type; 153 153 int index = RDMA_NL_GET_CLIENT(type); 154 - int op = RDMA_NL_GET_OP(type); 154 + unsigned int op = RDMA_NL_GET_OP(type); 155 155 156 156 list_for_each_entry(client, &client_list, list) { 157 157 if (client->index == index) { 158 - if (op < 0 || op >= client->nops || 159 - !client->cb_table[op].dump) 158 + if (op >= client->nops || !client->cb_table[op].dump) 160 159 return -EINVAL; 161 160 162 161 /*
+727
drivers/infiniband/core/rw.c
··· 1 + /* 2 + * Copyright (c) 2016 HGST, a Western Digital Company. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + */ 13 + #include <linux/moduleparam.h> 14 + #include <linux/slab.h> 15 + #include <rdma/mr_pool.h> 16 + #include <rdma/rw.h> 17 + 18 + enum { 19 + RDMA_RW_SINGLE_WR, 20 + RDMA_RW_MULTI_WR, 21 + RDMA_RW_MR, 22 + RDMA_RW_SIG_MR, 23 + }; 24 + 25 + static bool rdma_rw_force_mr; 26 + module_param_named(force_mr, rdma_rw_force_mr, bool, 0); 27 + MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations"); 28 + 29 + /* 30 + * Check if the device might use memory registration. This is currently only 31 + * true for iWarp devices. In the future we can hopefully fine tune this based 32 + * on HCA driver input. 33 + */ 34 + static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) 35 + { 36 + if (rdma_protocol_iwarp(dev, port_num)) 37 + return true; 38 + if (unlikely(rdma_rw_force_mr)) 39 + return true; 40 + return false; 41 + } 42 + 43 + /* 44 + * Check if the device will use memory registration for this RW operation. 45 + * We currently always use memory registrations for iWarp RDMA READs, and 46 + * have a debug option to force usage of MRs. 47 + * 48 + * XXX: In the future we can hopefully fine tune this based on HCA driver 49 + * input. 50 + */ 51 + static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, 52 + enum dma_data_direction dir, int dma_nents) 53 + { 54 + if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE) 55 + return true; 56 + if (unlikely(rdma_rw_force_mr)) 57 + return true; 58 + return false; 59 + } 60 + 61 + static inline u32 rdma_rw_max_sge(struct ib_device *dev, 62 + enum dma_data_direction dir) 63 + { 64 + return dir == DMA_TO_DEVICE ? 65 + dev->attrs.max_sge : dev->attrs.max_sge_rd; 66 + } 67 + 68 + static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev) 69 + { 70 + /* arbitrary limit to avoid allocating gigantic resources */ 71 + return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256); 72 + } 73 + 74 + static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, 75 + struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, 76 + u32 sg_cnt, u32 offset) 77 + { 78 + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); 79 + u32 nents = min(sg_cnt, pages_per_mr); 80 + int count = 0, ret; 81 + 82 + reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs); 83 + if (!reg->mr) 84 + return -EAGAIN; 85 + 86 + if (reg->mr->need_inval) { 87 + reg->inv_wr.opcode = IB_WR_LOCAL_INV; 88 + reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey; 89 + reg->inv_wr.next = &reg->reg_wr.wr; 90 + count++; 91 + } else { 92 + reg->inv_wr.next = NULL; 93 + } 94 + 95 + ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); 96 + if (ret < nents) { 97 + ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); 98 + return -EINVAL; 99 + } 100 + 101 + reg->reg_wr.wr.opcode = IB_WR_REG_MR; 102 + reg->reg_wr.mr = reg->mr; 103 + reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE; 104 + if (rdma_protocol_iwarp(qp->device, port_num)) 105 + reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE; 106 + count++; 107 + 108 + reg->sge.addr = reg->mr->iova; 109 + reg->sge.length = reg->mr->length; 110 + return count; 111 + } 112 + 113 + static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 114 + u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset, 115 + u64 remote_addr, u32 rkey, enum dma_data_direction dir) 116 + { 117 + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); 118 + int i, j, ret = 0, count = 0; 119 + 120 + ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr; 121 + ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL); 122 + if (!ctx->reg) { 123 + ret = -ENOMEM; 124 + goto out; 125 + } 126 + 127 + for (i = 0; i < ctx->nr_ops; i++) { 128 + struct rdma_rw_reg_ctx *prev = i ? &ctx->reg[i - 1] : NULL; 129 + struct rdma_rw_reg_ctx *reg = &ctx->reg[i]; 130 + u32 nents = min(sg_cnt, pages_per_mr); 131 + 132 + ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt, 133 + offset); 134 + if (ret < 0) 135 + goto out_free; 136 + count += ret; 137 + 138 + if (prev) { 139 + if (reg->mr->need_inval) 140 + prev->wr.wr.next = &reg->inv_wr; 141 + else 142 + prev->wr.wr.next = &reg->reg_wr.wr; 143 + } 144 + 145 + reg->reg_wr.wr.next = &reg->wr.wr; 146 + 147 + reg->wr.wr.sg_list = &reg->sge; 148 + reg->wr.wr.num_sge = 1; 149 + reg->wr.remote_addr = remote_addr; 150 + reg->wr.rkey = rkey; 151 + if (dir == DMA_TO_DEVICE) { 152 + reg->wr.wr.opcode = IB_WR_RDMA_WRITE; 153 + } else if (!rdma_cap_read_inv(qp->device, port_num)) { 154 + reg->wr.wr.opcode = IB_WR_RDMA_READ; 155 + } else { 156 + reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; 157 + reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey; 158 + } 159 + count++; 160 + 161 + remote_addr += reg->sge.length; 162 + sg_cnt -= nents; 163 + for (j = 0; j < nents; j++) 164 + sg = sg_next(sg); 165 + offset = 0; 166 + } 167 + 168 + ctx->type = RDMA_RW_MR; 169 + return count; 170 + 171 + out_free: 172 + while (--i >= 0) 173 + ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); 174 + kfree(ctx->reg); 175 + out: 176 + return ret; 177 + } 178 + 179 + static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 180 + struct scatterlist *sg, u32 sg_cnt, u32 offset, 181 + u64 remote_addr, u32 rkey, enum dma_data_direction dir) 182 + { 183 + struct ib_device *dev = qp->pd->device; 184 + u32 max_sge = rdma_rw_max_sge(dev, dir); 185 + struct ib_sge *sge; 186 + u32 total_len = 0, i, j; 187 + 188 + ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge); 189 + 190 + ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL); 191 + if (!ctx->map.sges) 192 + goto out; 193 + 194 + ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL); 195 + if (!ctx->map.wrs) 196 + goto out_free_sges; 197 + 198 + for (i = 0; i < ctx->nr_ops; i++) { 199 + struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i]; 200 + u32 nr_sge = min(sg_cnt, max_sge); 201 + 202 + if (dir == DMA_TO_DEVICE) 203 + rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 204 + else 205 + rdma_wr->wr.opcode = IB_WR_RDMA_READ; 206 + rdma_wr->remote_addr = remote_addr + total_len; 207 + rdma_wr->rkey = rkey; 208 + rdma_wr->wr.sg_list = sge; 209 + 210 + for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) { 211 + rdma_wr->wr.num_sge++; 212 + 213 + sge->addr = ib_sg_dma_address(dev, sg) + offset; 214 + sge->length = ib_sg_dma_len(dev, sg) - offset; 215 + sge->lkey = qp->pd->local_dma_lkey; 216 + 217 + total_len += sge->length; 218 + sge++; 219 + sg_cnt--; 220 + offset = 0; 221 + } 222 + 223 + if (i + 1 < ctx->nr_ops) 224 + rdma_wr->wr.next = &ctx->map.wrs[i + 1].wr; 225 + } 226 + 227 + ctx->type = RDMA_RW_MULTI_WR; 228 + return ctx->nr_ops; 229 + 230 + out_free_sges: 231 + kfree(ctx->map.sges); 232 + out: 233 + return -ENOMEM; 234 + } 235 + 236 + static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 237 + struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey, 238 + enum dma_data_direction dir) 239 + { 240 + struct ib_device *dev = qp->pd->device; 241 + struct ib_rdma_wr *rdma_wr = &ctx->single.wr; 242 + 243 + ctx->nr_ops = 1; 244 + 245 + ctx->single.sge.lkey = qp->pd->local_dma_lkey; 246 + ctx->single.sge.addr = ib_sg_dma_address(dev, sg) + offset; 247 + ctx->single.sge.length = ib_sg_dma_len(dev, sg) - offset; 248 + 249 + memset(rdma_wr, 0, sizeof(*rdma_wr)); 250 + if (dir == DMA_TO_DEVICE) 251 + rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 252 + else 253 + rdma_wr->wr.opcode = IB_WR_RDMA_READ; 254 + rdma_wr->wr.sg_list = &ctx->single.sge; 255 + rdma_wr->wr.num_sge = 1; 256 + rdma_wr->remote_addr = remote_addr; 257 + rdma_wr->rkey = rkey; 258 + 259 + ctx->type = RDMA_RW_SINGLE_WR; 260 + return 1; 261 + } 262 + 263 + /** 264 + * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context 265 + * @ctx: context to initialize 266 + * @qp: queue pair to operate on 267 + * @port_num: port num to which the connection is bound 268 + * @sg: scatterlist to READ/WRITE from/to 269 + * @sg_cnt: number of entries in @sg 270 + * @sg_offset: current byte offset into @sg 271 + * @remote_addr:remote address to read/write (relative to @rkey) 272 + * @rkey: remote key to operate on 273 + * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 274 + * 275 + * Returns the number of WQEs that will be needed on the workqueue if 276 + * successful, or a negative error code. 277 + */ 278 + int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 279 + struct scatterlist *sg, u32 sg_cnt, u32 sg_offset, 280 + u64 remote_addr, u32 rkey, enum dma_data_direction dir) 281 + { 282 + struct ib_device *dev = qp->pd->device; 283 + int ret; 284 + 285 + ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); 286 + if (!ret) 287 + return -ENOMEM; 288 + sg_cnt = ret; 289 + 290 + /* 291 + * Skip to the S/G entry that sg_offset falls into: 292 + */ 293 + for (;;) { 294 + u32 len = ib_sg_dma_len(dev, sg); 295 + 296 + if (sg_offset < len) 297 + break; 298 + 299 + sg = sg_next(sg); 300 + sg_offset -= len; 301 + sg_cnt--; 302 + } 303 + 304 + ret = -EIO; 305 + if (WARN_ON_ONCE(sg_cnt == 0)) 306 + goto out_unmap_sg; 307 + 308 + if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) { 309 + ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt, 310 + sg_offset, remote_addr, rkey, dir); 311 + } else if (sg_cnt > 1) { 312 + ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset, 313 + remote_addr, rkey, dir); 314 + } else { 315 + ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset, 316 + remote_addr, rkey, dir); 317 + } 318 + 319 + if (ret < 0) 320 + goto out_unmap_sg; 321 + return ret; 322 + 323 + out_unmap_sg: 324 + ib_dma_unmap_sg(dev, sg, sg_cnt, dir); 325 + return ret; 326 + } 327 + EXPORT_SYMBOL(rdma_rw_ctx_init); 328 + 329 + /** 330 + * rdma_rw_ctx_signature init - initialize a RW context with signature offload 331 + * @ctx: context to initialize 332 + * @qp: queue pair to operate on 333 + * @port_num: port num to which the connection is bound 334 + * @sg: scatterlist to READ/WRITE from/to 335 + * @sg_cnt: number of entries in @sg 336 + * @prot_sg: scatterlist to READ/WRITE protection information from/to 337 + * @prot_sg_cnt: number of entries in @prot_sg 338 + * @sig_attrs: signature offloading algorithms 339 + * @remote_addr:remote address to read/write (relative to @rkey) 340 + * @rkey: remote key to operate on 341 + * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 342 + * 343 + * Returns the number of WQEs that will be needed on the workqueue if 344 + * successful, or a negative error code. 345 + */ 346 + int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 347 + u8 port_num, struct scatterlist *sg, u32 sg_cnt, 348 + struct scatterlist *prot_sg, u32 prot_sg_cnt, 349 + struct ib_sig_attrs *sig_attrs, 350 + u64 remote_addr, u32 rkey, enum dma_data_direction dir) 351 + { 352 + struct ib_device *dev = qp->pd->device; 353 + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); 354 + struct ib_rdma_wr *rdma_wr; 355 + struct ib_send_wr *prev_wr = NULL; 356 + int count = 0, ret; 357 + 358 + if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) { 359 + pr_err("SG count too large\n"); 360 + return -EINVAL; 361 + } 362 + 363 + ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); 364 + if (!ret) 365 + return -ENOMEM; 366 + sg_cnt = ret; 367 + 368 + ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); 369 + if (!ret) { 370 + ret = -ENOMEM; 371 + goto out_unmap_sg; 372 + } 373 + prot_sg_cnt = ret; 374 + 375 + ctx->type = RDMA_RW_SIG_MR; 376 + ctx->nr_ops = 1; 377 + ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL); 378 + if (!ctx->sig) { 379 + ret = -ENOMEM; 380 + goto out_unmap_prot_sg; 381 + } 382 + 383 + ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0); 384 + if (ret < 0) 385 + goto out_free_ctx; 386 + count += ret; 387 + prev_wr = &ctx->sig->data.reg_wr.wr; 388 + 389 + if (prot_sg_cnt) { 390 + ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot, 391 + prot_sg, prot_sg_cnt, 0); 392 + if (ret < 0) 393 + goto out_destroy_data_mr; 394 + count += ret; 395 + 396 + if (ctx->sig->prot.inv_wr.next) 397 + prev_wr->next = &ctx->sig->prot.inv_wr; 398 + else 399 + prev_wr->next = &ctx->sig->prot.reg_wr.wr; 400 + prev_wr = &ctx->sig->prot.reg_wr.wr; 401 + } else { 402 + ctx->sig->prot.mr = NULL; 403 + } 404 + 405 + ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs); 406 + if (!ctx->sig->sig_mr) { 407 + ret = -EAGAIN; 408 + goto out_destroy_prot_mr; 409 + } 410 + 411 + if (ctx->sig->sig_mr->need_inval) { 412 + memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr)); 413 + 414 + ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV; 415 + ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey; 416 + 417 + prev_wr->next = &ctx->sig->sig_inv_wr; 418 + prev_wr = &ctx->sig->sig_inv_wr; 419 + } 420 + 421 + ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR; 422 + ctx->sig->sig_wr.wr.wr_cqe = NULL; 423 + ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge; 424 + ctx->sig->sig_wr.wr.num_sge = 1; 425 + ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE; 426 + ctx->sig->sig_wr.sig_attrs = sig_attrs; 427 + ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr; 428 + if (prot_sg_cnt) 429 + ctx->sig->sig_wr.prot = &ctx->sig->prot.sge; 430 + prev_wr->next = &ctx->sig->sig_wr.wr; 431 + prev_wr = &ctx->sig->sig_wr.wr; 432 + count++; 433 + 434 + ctx->sig->sig_sge.addr = 0; 435 + ctx->sig->sig_sge.length = ctx->sig->data.sge.length; 436 + if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE) 437 + ctx->sig->sig_sge.length += ctx->sig->prot.sge.length; 438 + 439 + rdma_wr = &ctx->sig->data.wr; 440 + rdma_wr->wr.sg_list = &ctx->sig->sig_sge; 441 + rdma_wr->wr.num_sge = 1; 442 + rdma_wr->remote_addr = remote_addr; 443 + rdma_wr->rkey = rkey; 444 + if (dir == DMA_TO_DEVICE) 445 + rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 446 + else 447 + rdma_wr->wr.opcode = IB_WR_RDMA_READ; 448 + prev_wr->next = &rdma_wr->wr; 449 + prev_wr = &rdma_wr->wr; 450 + count++; 451 + 452 + return count; 453 + 454 + out_destroy_prot_mr: 455 + if (prot_sg_cnt) 456 + ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); 457 + out_destroy_data_mr: 458 + ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); 459 + out_free_ctx: 460 + kfree(ctx->sig); 461 + out_unmap_prot_sg: 462 + ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); 463 + out_unmap_sg: 464 + ib_dma_unmap_sg(dev, sg, sg_cnt, dir); 465 + return ret; 466 + } 467 + EXPORT_SYMBOL(rdma_rw_ctx_signature_init); 468 + 469 + /* 470 + * Now that we are going to post the WRs we can update the lkey and need_inval 471 + * state on the MRs. If we were doing this at init time, we would get double 472 + * or missing invalidations if a context was initialized but not actually 473 + * posted. 474 + */ 475 + static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval) 476 + { 477 + reg->mr->need_inval = need_inval; 478 + ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey)); 479 + reg->reg_wr.key = reg->mr->lkey; 480 + reg->sge.lkey = reg->mr->lkey; 481 + } 482 + 483 + /** 484 + * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation 485 + * @ctx: context to operate on 486 + * @qp: queue pair to operate on 487 + * @port_num: port num to which the connection is bound 488 + * @cqe: completion queue entry for the last WR 489 + * @chain_wr: WR to append to the posted chain 490 + * 491 + * Return the WR chain for the set of RDMA READ/WRITE operations described by 492 + * @ctx, as well as any memory registration operations needed. If @chain_wr 493 + * is non-NULL the WR it points to will be appended to the chain of WRs posted. 494 + * If @chain_wr is not set @cqe must be set so that the caller gets a 495 + * completion notification. 496 + */ 497 + struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 498 + u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr) 499 + { 500 + struct ib_send_wr *first_wr, *last_wr; 501 + int i; 502 + 503 + switch (ctx->type) { 504 + case RDMA_RW_SIG_MR: 505 + rdma_rw_update_lkey(&ctx->sig->data, true); 506 + if (ctx->sig->prot.mr) 507 + rdma_rw_update_lkey(&ctx->sig->prot, true); 508 + 509 + ctx->sig->sig_mr->need_inval = true; 510 + ib_update_fast_reg_key(ctx->sig->sig_mr, 511 + ib_inc_rkey(ctx->sig->sig_mr->lkey)); 512 + ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey; 513 + 514 + if (ctx->sig->data.inv_wr.next) 515 + first_wr = &ctx->sig->data.inv_wr; 516 + else 517 + first_wr = &ctx->sig->data.reg_wr.wr; 518 + last_wr = &ctx->sig->data.wr.wr; 519 + break; 520 + case RDMA_RW_MR: 521 + for (i = 0; i < ctx->nr_ops; i++) { 522 + rdma_rw_update_lkey(&ctx->reg[i], 523 + ctx->reg[i].wr.wr.opcode != 524 + IB_WR_RDMA_READ_WITH_INV); 525 + } 526 + 527 + if (ctx->reg[0].inv_wr.next) 528 + first_wr = &ctx->reg[0].inv_wr; 529 + else 530 + first_wr = &ctx->reg[0].reg_wr.wr; 531 + last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr; 532 + break; 533 + case RDMA_RW_MULTI_WR: 534 + first_wr = &ctx->map.wrs[0].wr; 535 + last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr; 536 + break; 537 + case RDMA_RW_SINGLE_WR: 538 + first_wr = &ctx->single.wr.wr; 539 + last_wr = &ctx->single.wr.wr; 540 + break; 541 + default: 542 + BUG(); 543 + } 544 + 545 + if (chain_wr) { 546 + last_wr->next = chain_wr; 547 + } else { 548 + last_wr->wr_cqe = cqe; 549 + last_wr->send_flags |= IB_SEND_SIGNALED; 550 + } 551 + 552 + return first_wr; 553 + } 554 + EXPORT_SYMBOL(rdma_rw_ctx_wrs); 555 + 556 + /** 557 + * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation 558 + * @ctx: context to operate on 559 + * @qp: queue pair to operate on 560 + * @port_num: port num to which the connection is bound 561 + * @cqe: completion queue entry for the last WR 562 + * @chain_wr: WR to append to the posted chain 563 + * 564 + * Post the set of RDMA READ/WRITE operations described by @ctx, as well as 565 + * any memory registration operations needed. If @chain_wr is non-NULL the 566 + * WR it points to will be appended to the chain of WRs posted. If @chain_wr 567 + * is not set @cqe must be set so that the caller gets a completion 568 + * notification. 569 + */ 570 + int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 571 + struct ib_cqe *cqe, struct ib_send_wr *chain_wr) 572 + { 573 + struct ib_send_wr *first_wr, *bad_wr; 574 + 575 + first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr); 576 + return ib_post_send(qp, first_wr, &bad_wr); 577 + } 578 + EXPORT_SYMBOL(rdma_rw_ctx_post); 579 + 580 + /** 581 + * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init 582 + * @ctx: context to release 583 + * @qp: queue pair to operate on 584 + * @port_num: port num to which the connection is bound 585 + * @sg: scatterlist that was used for the READ/WRITE 586 + * @sg_cnt: number of entries in @sg 587 + * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 588 + */ 589 + void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 590 + struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir) 591 + { 592 + int i; 593 + 594 + switch (ctx->type) { 595 + case RDMA_RW_MR: 596 + for (i = 0; i < ctx->nr_ops; i++) 597 + ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); 598 + kfree(ctx->reg); 599 + break; 600 + case RDMA_RW_MULTI_WR: 601 + kfree(ctx->map.wrs); 602 + kfree(ctx->map.sges); 603 + break; 604 + case RDMA_RW_SINGLE_WR: 605 + break; 606 + default: 607 + BUG(); 608 + break; 609 + } 610 + 611 + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 612 + } 613 + EXPORT_SYMBOL(rdma_rw_ctx_destroy); 614 + 615 + /** 616 + * rdma_rw_ctx_destroy_signature - release all resources allocated by 617 + * rdma_rw_ctx_init_signature 618 + * @ctx: context to release 619 + * @qp: queue pair to operate on 620 + * @port_num: port num to which the connection is bound 621 + * @sg: scatterlist that was used for the READ/WRITE 622 + * @sg_cnt: number of entries in @sg 623 + * @prot_sg: scatterlist that was used for the READ/WRITE of the PI 624 + * @prot_sg_cnt: number of entries in @prot_sg 625 + * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 626 + */ 627 + void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 628 + u8 port_num, struct scatterlist *sg, u32 sg_cnt, 629 + struct scatterlist *prot_sg, u32 prot_sg_cnt, 630 + enum dma_data_direction dir) 631 + { 632 + if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR)) 633 + return; 634 + 635 + ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); 636 + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 637 + 638 + if (ctx->sig->prot.mr) { 639 + ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); 640 + ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); 641 + } 642 + 643 + ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr); 644 + kfree(ctx->sig); 645 + } 646 + EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); 647 + 648 + void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) 649 + { 650 + u32 factor; 651 + 652 + WARN_ON_ONCE(attr->port_num == 0); 653 + 654 + /* 655 + * Each context needs at least one RDMA READ or WRITE WR. 656 + * 657 + * For some hardware we might need more, eventually we should ask the 658 + * HCA driver for a multiplier here. 659 + */ 660 + factor = 1; 661 + 662 + /* 663 + * If the devices needs MRs to perform RDMA READ or WRITE operations, 664 + * we'll need two additional MRs for the registrations and the 665 + * invalidation. 666 + */ 667 + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) 668 + factor += 6; /* (inv + reg) * (data + prot + sig) */ 669 + else if (rdma_rw_can_use_mr(dev, attr->port_num)) 670 + factor += 2; /* inv + reg */ 671 + 672 + attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs; 673 + 674 + /* 675 + * But maybe we were just too high in the sky and the device doesn't 676 + * even support all we need, and we'll have to live with what we get.. 677 + */ 678 + attr->cap.max_send_wr = 679 + min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr); 680 + } 681 + 682 + int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) 683 + { 684 + struct ib_device *dev = qp->pd->device; 685 + u32 nr_mrs = 0, nr_sig_mrs = 0; 686 + int ret = 0; 687 + 688 + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) { 689 + nr_sig_mrs = attr->cap.max_rdma_ctxs; 690 + nr_mrs = attr->cap.max_rdma_ctxs * 2; 691 + } else if (rdma_rw_can_use_mr(dev, attr->port_num)) { 692 + nr_mrs = attr->cap.max_rdma_ctxs; 693 + } 694 + 695 + if (nr_mrs) { 696 + ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs, 697 + IB_MR_TYPE_MEM_REG, 698 + rdma_rw_fr_page_list_len(dev)); 699 + if (ret) { 700 + pr_err("%s: failed to allocated %d MRs\n", 701 + __func__, nr_mrs); 702 + return ret; 703 + } 704 + } 705 + 706 + if (nr_sig_mrs) { 707 + ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs, 708 + IB_MR_TYPE_SIGNATURE, 2); 709 + if (ret) { 710 + pr_err("%s: failed to allocated %d SIG MRs\n", 711 + __func__, nr_mrs); 712 + goto out_free_rdma_mrs; 713 + } 714 + } 715 + 716 + return 0; 717 + 718 + out_free_rdma_mrs: 719 + ib_mr_pool_destroy(qp, &qp->rdma_mrs); 720 + return ret; 721 + } 722 + 723 + void rdma_rw_cleanup_mrs(struct ib_qp *qp) 724 + { 725 + ib_mr_pool_destroy(qp, &qp->sig_mrs); 726 + ib_mr_pool_destroy(qp, &qp->rdma_mrs); 727 + }
+2 -2
drivers/infiniband/core/sa_query.c
··· 536 536 data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS, 537 537 RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST); 538 538 if (!data) { 539 - kfree_skb(skb); 539 + nlmsg_free(skb); 540 540 return -EMSGSIZE; 541 541 } 542 542 ··· 1820 1820 goto err3; 1821 1821 } 1822 1822 1823 - if (ibnl_add_client(RDMA_NL_LS, RDMA_NL_LS_NUM_OPS, 1823 + if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table), 1824 1824 ib_sa_cb_table)) { 1825 1825 pr_err("Failed to add netlink callback\n"); 1826 1826 ret = -EINVAL;
+8 -3
drivers/infiniband/core/uverbs_cmd.c
··· 1833 1833 if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | 1834 1834 IB_QP_CREATE_CROSS_CHANNEL | 1835 1835 IB_QP_CREATE_MANAGED_SEND | 1836 - IB_QP_CREATE_MANAGED_RECV)) { 1836 + IB_QP_CREATE_MANAGED_RECV | 1837 + IB_QP_CREATE_SCATTER_FCS)) { 1837 1838 ret = -EINVAL; 1838 1839 goto err_put; 1839 1840 } ··· 3089 3088 if (cmd.comp_mask) 3090 3089 return -EINVAL; 3091 3090 3092 - if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER && 3093 - !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) 3091 + if (!capable(CAP_NET_RAW)) 3094 3092 return -EPERM; 3095 3093 3096 3094 if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED) ··· 3655 3655 resp.hca_core_clock = attr.hca_core_clock; 3656 3656 resp.response_length += sizeof(resp.hca_core_clock); 3657 3657 3658 + if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex)) 3659 + goto end; 3660 + 3661 + resp.device_cap_flags_ex = attr.device_cap_flags; 3662 + resp.response_length += sizeof(resp.device_cap_flags_ex); 3658 3663 end: 3659 3664 err = ib_copy_to_udata(ucore, &resp, resp.response_length); 3660 3665 return err;
+111 -57
drivers/infiniband/core/verbs.c
··· 48 48 #include <rdma/ib_verbs.h> 49 49 #include <rdma/ib_cache.h> 50 50 #include <rdma/ib_addr.h> 51 + #include <rdma/rw.h> 51 52 52 53 #include "core_priv.h" 53 54 ··· 724 723 } 725 724 EXPORT_SYMBOL(ib_open_qp); 726 725 726 + static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, 727 + struct ib_qp_init_attr *qp_init_attr) 728 + { 729 + struct ib_qp *real_qp = qp; 730 + 731 + qp->event_handler = __ib_shared_qp_event_handler; 732 + qp->qp_context = qp; 733 + qp->pd = NULL; 734 + qp->send_cq = qp->recv_cq = NULL; 735 + qp->srq = NULL; 736 + qp->xrcd = qp_init_attr->xrcd; 737 + atomic_inc(&qp_init_attr->xrcd->usecnt); 738 + INIT_LIST_HEAD(&qp->open_list); 739 + 740 + qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, 741 + qp_init_attr->qp_context); 742 + if (!IS_ERR(qp)) 743 + __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); 744 + else 745 + real_qp->device->destroy_qp(real_qp); 746 + return qp; 747 + } 748 + 727 749 struct ib_qp *ib_create_qp(struct ib_pd *pd, 728 750 struct ib_qp_init_attr *qp_init_attr) 729 751 { 730 - struct ib_qp *qp, *real_qp; 731 - struct ib_device *device; 752 + struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; 753 + struct ib_qp *qp; 754 + int ret; 732 755 733 - device = pd ? pd->device : qp_init_attr->xrcd->device; 756 + /* 757 + * If the callers is using the RDMA API calculate the resources 758 + * needed for the RDMA READ/WRITE operations. 759 + * 760 + * Note that these callers need to pass in a port number. 761 + */ 762 + if (qp_init_attr->cap.max_rdma_ctxs) 763 + rdma_rw_init_qp(device, qp_init_attr); 764 + 734 765 qp = device->create_qp(pd, qp_init_attr, NULL); 766 + if (IS_ERR(qp)) 767 + return qp; 735 768 736 - if (!IS_ERR(qp)) { 737 - qp->device = device; 738 - qp->real_qp = qp; 739 - qp->uobject = NULL; 740 - qp->qp_type = qp_init_attr->qp_type; 769 + qp->device = device; 770 + qp->real_qp = qp; 771 + qp->uobject = NULL; 772 + qp->qp_type = qp_init_attr->qp_type; 741 773 742 - atomic_set(&qp->usecnt, 0); 743 - if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { 744 - qp->event_handler = __ib_shared_qp_event_handler; 745 - qp->qp_context = qp; 746 - qp->pd = NULL; 747 - qp->send_cq = qp->recv_cq = NULL; 748 - qp->srq = NULL; 749 - qp->xrcd = qp_init_attr->xrcd; 750 - atomic_inc(&qp_init_attr->xrcd->usecnt); 751 - INIT_LIST_HEAD(&qp->open_list); 774 + atomic_set(&qp->usecnt, 0); 775 + qp->mrs_used = 0; 776 + spin_lock_init(&qp->mr_lock); 777 + INIT_LIST_HEAD(&qp->rdma_mrs); 778 + INIT_LIST_HEAD(&qp->sig_mrs); 752 779 753 - real_qp = qp; 754 - qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, 755 - qp_init_attr->qp_context); 756 - if (!IS_ERR(qp)) 757 - __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); 758 - else 759 - real_qp->device->destroy_qp(real_qp); 760 - } else { 761 - qp->event_handler = qp_init_attr->event_handler; 762 - qp->qp_context = qp_init_attr->qp_context; 763 - if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { 764 - qp->recv_cq = NULL; 765 - qp->srq = NULL; 766 - } else { 767 - qp->recv_cq = qp_init_attr->recv_cq; 768 - atomic_inc(&qp_init_attr->recv_cq->usecnt); 769 - qp->srq = qp_init_attr->srq; 770 - if (qp->srq) 771 - atomic_inc(&qp_init_attr->srq->usecnt); 772 - } 780 + if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) 781 + return ib_create_xrc_qp(qp, qp_init_attr); 773 782 774 - qp->pd = pd; 775 - qp->send_cq = qp_init_attr->send_cq; 776 - qp->xrcd = NULL; 783 + qp->event_handler = qp_init_attr->event_handler; 784 + qp->qp_context = qp_init_attr->qp_context; 785 + if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { 786 + qp->recv_cq = NULL; 787 + qp->srq = NULL; 788 + } else { 789 + qp->recv_cq = qp_init_attr->recv_cq; 790 + atomic_inc(&qp_init_attr->recv_cq->usecnt); 791 + qp->srq = qp_init_attr->srq; 792 + if (qp->srq) 793 + atomic_inc(&qp_init_attr->srq->usecnt); 794 + } 777 795 778 - atomic_inc(&pd->usecnt); 779 - atomic_inc(&qp_init_attr->send_cq->usecnt); 796 + qp->pd = pd; 797 + qp->send_cq = qp_init_attr->send_cq; 798 + qp->xrcd = NULL; 799 + 800 + atomic_inc(&pd->usecnt); 801 + atomic_inc(&qp_init_attr->send_cq->usecnt); 802 + 803 + if (qp_init_attr->cap.max_rdma_ctxs) { 804 + ret = rdma_rw_init_mrs(qp, qp_init_attr); 805 + if (ret) { 806 + pr_err("failed to init MR pool ret= %d\n", ret); 807 + ib_destroy_qp(qp); 808 + qp = ERR_PTR(ret); 780 809 } 781 810 } 782 811 ··· 1281 1250 struct ib_srq *srq; 1282 1251 int ret; 1283 1252 1253 + WARN_ON_ONCE(qp->mrs_used > 0); 1254 + 1284 1255 if (atomic_read(&qp->usecnt)) 1285 1256 return -EBUSY; 1286 1257 ··· 1293 1260 scq = qp->send_cq; 1294 1261 rcq = qp->recv_cq; 1295 1262 srq = qp->srq; 1263 + 1264 + if (!qp->uobject) 1265 + rdma_rw_cleanup_mrs(qp); 1296 1266 1297 1267 ret = qp->device->destroy_qp(qp); 1298 1268 if (!ret) { ··· 1379 1343 mr->pd = pd; 1380 1344 mr->uobject = NULL; 1381 1345 atomic_inc(&pd->usecnt); 1346 + mr->need_inval = false; 1382 1347 } 1383 1348 1384 1349 return mr; ··· 1426 1389 mr->pd = pd; 1427 1390 mr->uobject = NULL; 1428 1391 atomic_inc(&pd->usecnt); 1392 + mr->need_inval = false; 1429 1393 } 1430 1394 1431 1395 return mr; ··· 1635 1597 * @mr: memory region 1636 1598 * @sg: dma mapped scatterlist 1637 1599 * @sg_nents: number of entries in sg 1600 + * @sg_offset: offset in bytes into sg 1638 1601 * @page_size: page vector desired page size 1639 1602 * 1640 1603 * Constraints: ··· 1654 1615 * After this completes successfully, the memory region 1655 1616 * is ready for registration. 1656 1617 */ 1657 - int ib_map_mr_sg(struct ib_mr *mr, 1658 - struct scatterlist *sg, 1659 - int sg_nents, 1660 - unsigned int page_size) 1618 + int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, 1619 + unsigned int *sg_offset, unsigned int page_size) 1661 1620 { 1662 1621 if (unlikely(!mr->device->map_mr_sg)) 1663 1622 return -ENOSYS; 1664 1623 1665 1624 mr->page_size = page_size; 1666 1625 1667 - return mr->device->map_mr_sg(mr, sg, sg_nents); 1626 + return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset); 1668 1627 } 1669 1628 EXPORT_SYMBOL(ib_map_mr_sg); 1670 1629 ··· 1672 1635 * @mr: memory region 1673 1636 * @sgl: dma mapped scatterlist 1674 1637 * @sg_nents: number of entries in sg 1638 + * @sg_offset_p: IN: start offset in bytes into sg 1639 + * OUT: offset in bytes for element n of the sg of the first 1640 + * byte that has not been processed where n is the return 1641 + * value of this function. 1675 1642 * @set_page: driver page assignment function pointer 1676 1643 * 1677 1644 * Core service helper for drivers to convert the largest ··· 1686 1645 * Returns the number of sg elements that were assigned to 1687 1646 * a page vector. 1688 1647 */ 1689 - int ib_sg_to_pages(struct ib_mr *mr, 1690 - struct scatterlist *sgl, 1691 - int sg_nents, 1692 - int (*set_page)(struct ib_mr *, u64)) 1648 + int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, 1649 + unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64)) 1693 1650 { 1694 1651 struct scatterlist *sg; 1695 1652 u64 last_end_dma_addr = 0; 1653 + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1696 1654 unsigned int last_page_off = 0; 1697 1655 u64 page_mask = ~((u64)mr->page_size - 1); 1698 1656 int i, ret; 1699 1657 1700 - mr->iova = sg_dma_address(&sgl[0]); 1658 + if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0]))) 1659 + return -EINVAL; 1660 + 1661 + mr->iova = sg_dma_address(&sgl[0]) + sg_offset; 1701 1662 mr->length = 0; 1702 1663 1703 1664 for_each_sg(sgl, sg, sg_nents, i) { 1704 - u64 dma_addr = sg_dma_address(sg); 1705 - unsigned int dma_len = sg_dma_len(sg); 1665 + u64 dma_addr = sg_dma_address(sg) + sg_offset; 1666 + u64 prev_addr = dma_addr; 1667 + unsigned int dma_len = sg_dma_len(sg) - sg_offset; 1706 1668 u64 end_dma_addr = dma_addr + dma_len; 1707 1669 u64 page_addr = dma_addr & page_mask; 1708 1670 ··· 1729 1685 1730 1686 do { 1731 1687 ret = set_page(mr, page_addr); 1732 - if (unlikely(ret < 0)) 1733 - return i ? : ret; 1688 + if (unlikely(ret < 0)) { 1689 + sg_offset = prev_addr - sg_dma_address(sg); 1690 + mr->length += prev_addr - dma_addr; 1691 + if (sg_offset_p) 1692 + *sg_offset_p = sg_offset; 1693 + return i || sg_offset ? i : ret; 1694 + } 1695 + prev_addr = page_addr; 1734 1696 next_page: 1735 1697 page_addr += mr->page_size; 1736 1698 } while (page_addr < end_dma_addr); ··· 1744 1694 mr->length += dma_len; 1745 1695 last_end_dma_addr = end_dma_addr; 1746 1696 last_page_off = end_dma_addr & ~page_mask; 1697 + 1698 + sg_offset = 0; 1747 1699 } 1748 1700 1701 + if (sg_offset_p) 1702 + *sg_offset_p = 0; 1749 1703 return i; 1750 1704 } 1751 1705 EXPORT_SYMBOL(ib_sg_to_pages);
+3 -4
drivers/infiniband/hw/cxgb3/iwch_provider.c
··· 783 783 return 0; 784 784 } 785 785 786 - static int iwch_map_mr_sg(struct ib_mr *ibmr, 787 - struct scatterlist *sg, 788 - int sg_nents) 786 + static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 787 + int sg_nents, unsigned int *sg_offset) 789 788 { 790 789 struct iwch_mr *mhp = to_iwch_mr(ibmr); 791 790 792 791 mhp->npages = 0; 793 792 794 - return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page); 793 + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page); 795 794 } 796 795 797 796 static int iwch_destroy_qp(struct ib_qp *ib_qp)
+410 -199
drivers/infiniband/hw/cxgb4/cm.c
··· 119 119 static int mpa_rev = 2; 120 120 module_param(mpa_rev, int, 0644); 121 121 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " 122 - "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft" 122 + "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft" 123 123 " compliant (default=2)"); 124 124 125 125 static int markers_enabled; ··· 145 145 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); 146 146 static void ep_timeout(unsigned long arg); 147 147 static void connect_reply_upcall(struct c4iw_ep *ep, int status); 148 + static int sched(struct c4iw_dev *dev, struct sk_buff *skb); 148 149 149 150 static LIST_HEAD(timeout_list); 150 151 static spinlock_t timeout_lock; 152 + 153 + static void deref_cm_id(struct c4iw_ep_common *epc) 154 + { 155 + epc->cm_id->rem_ref(epc->cm_id); 156 + epc->cm_id = NULL; 157 + set_bit(CM_ID_DEREFED, &epc->history); 158 + } 159 + 160 + static void ref_cm_id(struct c4iw_ep_common *epc) 161 + { 162 + set_bit(CM_ID_REFED, &epc->history); 163 + epc->cm_id->add_ref(epc->cm_id); 164 + } 151 165 152 166 static void deref_qp(struct c4iw_ep *ep) 153 167 { 154 168 c4iw_qp_rem_ref(&ep->com.qp->ibqp); 155 169 clear_bit(QP_REFERENCED, &ep->com.flags); 170 + set_bit(QP_DEREFED, &ep->com.history); 156 171 } 157 172 158 173 static void ref_qp(struct c4iw_ep *ep) 159 174 { 160 175 set_bit(QP_REFERENCED, &ep->com.flags); 176 + set_bit(QP_REFED, &ep->com.history); 161 177 c4iw_qp_add_ref(&ep->com.qp->ibqp); 162 178 } 163 179 ··· 217 201 error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); 218 202 if (error < 0) 219 203 kfree_skb(skb); 204 + else if (error == NET_XMIT_DROP) 205 + return -ENOMEM; 220 206 return error < 0 ? error : 0; 221 207 } 222 208 ··· 308 290 return epc; 309 291 } 310 292 293 + static void remove_ep_tid(struct c4iw_ep *ep) 294 + { 295 + unsigned long flags; 296 + 297 + spin_lock_irqsave(&ep->com.dev->lock, flags); 298 + _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); 299 + spin_unlock_irqrestore(&ep->com.dev->lock, flags); 300 + } 301 + 302 + static void insert_ep_tid(struct c4iw_ep *ep) 303 + { 304 + unsigned long flags; 305 + 306 + spin_lock_irqsave(&ep->com.dev->lock, flags); 307 + _insert_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep, ep->hwtid, 0); 308 + spin_unlock_irqrestore(&ep->com.dev->lock, flags); 309 + } 310 + 311 + /* 312 + * Atomically lookup the ep ptr given the tid and grab a reference on the ep. 313 + */ 314 + static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid) 315 + { 316 + struct c4iw_ep *ep; 317 + unsigned long flags; 318 + 319 + spin_lock_irqsave(&dev->lock, flags); 320 + ep = idr_find(&dev->hwtid_idr, tid); 321 + if (ep) 322 + c4iw_get_ep(&ep->com); 323 + spin_unlock_irqrestore(&dev->lock, flags); 324 + return ep; 325 + } 326 + 327 + /* 328 + * Atomically lookup the ep ptr given the stid and grab a reference on the ep. 329 + */ 330 + static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev, 331 + unsigned int stid) 332 + { 333 + struct c4iw_listen_ep *ep; 334 + unsigned long flags; 335 + 336 + spin_lock_irqsave(&dev->lock, flags); 337 + ep = idr_find(&dev->stid_idr, stid); 338 + if (ep) 339 + c4iw_get_ep(&ep->com); 340 + spin_unlock_irqrestore(&dev->lock, flags); 341 + return ep; 342 + } 343 + 311 344 void _c4iw_free_ep(struct kref *kref) 312 345 { 313 346 struct c4iw_ep *ep; 314 347 315 348 ep = container_of(kref, struct c4iw_ep, com.kref); 316 - PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); 349 + PDBG("%s ep %p state %s\n", __func__, ep, states[ep->com.state]); 317 350 if (test_bit(QP_REFERENCED, &ep->com.flags)) 318 351 deref_qp(ep); 319 352 if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { ··· 378 309 (const u32 *)&sin6->sin6_addr.s6_addr, 379 310 1); 380 311 } 381 - remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); 382 312 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); 383 313 dst_release(ep->dst); 384 314 cxgb4_l2t_release(ep->l2t); 315 + if (ep->mpa_skb) 316 + kfree_skb(ep->mpa_skb); 385 317 } 386 318 kfree(ep); 387 319 } ··· 390 320 static void release_ep_resources(struct c4iw_ep *ep) 391 321 { 392 322 set_bit(RELEASE_RESOURCES, &ep->com.flags); 323 + 324 + /* 325 + * If we have a hwtid, then remove it from the idr table 326 + * so lookups will no longer find this endpoint. Otherwise 327 + * we have a race where one thread finds the ep ptr just 328 + * before the other thread is freeing the ep memory. 329 + */ 330 + if (ep->hwtid != -1) 331 + remove_ep_tid(ep); 393 332 c4iw_put_ep(&ep->com); 394 333 } 395 334 ··· 511 432 512 433 static void arp_failure_discard(void *handle, struct sk_buff *skb) 513 434 { 514 - PDBG("%s c4iw_dev %p\n", __func__, handle); 435 + pr_err(MOD "ARP failure\n"); 515 436 kfree_skb(skb); 437 + } 438 + 439 + static void mpa_start_arp_failure(void *handle, struct sk_buff *skb) 440 + { 441 + pr_err("ARP failure during MPA Negotiation - Closing Connection\n"); 442 + } 443 + 444 + enum { 445 + NUM_FAKE_CPLS = 2, 446 + FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0, 447 + FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1, 448 + }; 449 + 450 + static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) 451 + { 452 + struct c4iw_ep *ep; 453 + 454 + ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); 455 + release_ep_resources(ep); 456 + return 0; 457 + } 458 + 459 + static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) 460 + { 461 + struct c4iw_ep *ep; 462 + 463 + ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); 464 + c4iw_put_ep(&ep->parent_ep->com); 465 + release_ep_resources(ep); 466 + return 0; 467 + } 468 + 469 + /* 470 + * Fake up a special CPL opcode and call sched() so process_work() will call 471 + * _put_ep_safe() in a safe context to free the ep resources. This is needed 472 + * because ARP error handlers are called in an ATOMIC context, and 473 + * _c4iw_free_ep() needs to block. 474 + */ 475 + static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb, 476 + int cpl) 477 + { 478 + struct cpl_act_establish *rpl = cplhdr(skb); 479 + 480 + /* Set our special ARP_FAILURE opcode */ 481 + rpl->ot.opcode = cpl; 482 + 483 + /* 484 + * Save ep in the skb->cb area, after where sched() will save the dev 485 + * ptr. 486 + */ 487 + *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep; 488 + sched(ep->com.dev, skb); 489 + } 490 + 491 + /* Handle an ARP failure for an accept */ 492 + static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb) 493 + { 494 + struct c4iw_ep *ep = handle; 495 + 496 + pr_err(MOD "ARP failure during accept - tid %u -dropping connection\n", 497 + ep->hwtid); 498 + 499 + __state_set(&ep->com, DEAD); 500 + queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE); 516 501 } 517 502 518 503 /* ··· 587 444 struct c4iw_ep *ep = handle; 588 445 589 446 printk(KERN_ERR MOD "ARP failure during connect\n"); 590 - kfree_skb(skb); 591 447 connect_reply_upcall(ep, -EHOSTUNREACH); 592 - state_set(&ep->com, DEAD); 448 + __state_set(&ep->com, DEAD); 593 449 if (ep->com.remote_addr.ss_family == AF_INET6) { 594 450 struct sockaddr_in6 *sin6 = 595 451 (struct sockaddr_in6 *)&ep->com.local_addr; ··· 597 455 } 598 456 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); 599 457 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 600 - dst_release(ep->dst); 601 - cxgb4_l2t_release(ep->l2t); 602 - c4iw_put_ep(&ep->com); 458 + queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); 603 459 } 604 460 605 461 /* ··· 606 466 */ 607 467 static void abort_arp_failure(void *handle, struct sk_buff *skb) 608 468 { 609 - struct c4iw_rdev *rdev = handle; 469 + int ret; 470 + struct c4iw_ep *ep = handle; 471 + struct c4iw_rdev *rdev = &ep->com.dev->rdev; 610 472 struct cpl_abort_req *req = cplhdr(skb); 611 473 612 474 PDBG("%s rdev %p\n", __func__, rdev); 613 475 req->cmd = CPL_ABORT_NO_RST; 614 - c4iw_ofld_send(rdev, skb); 476 + ret = c4iw_ofld_send(rdev, skb); 477 + if (ret) { 478 + __state_set(&ep->com, DEAD); 479 + queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); 480 + } 615 481 } 616 482 617 - static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) 483 + static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) 618 484 { 619 485 unsigned int flowclen = 80; 620 486 struct fw_flowc_wr *flowc; ··· 676 530 } 677 531 678 532 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 679 - c4iw_ofld_send(&ep->com.dev->rdev, skb); 533 + return c4iw_ofld_send(&ep->com.dev->rdev, skb); 680 534 } 681 535 682 536 static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) ··· 714 568 return -ENOMEM; 715 569 } 716 570 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 717 - t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure); 571 + t4_set_arp_err_handler(skb, ep, abort_arp_failure); 718 572 req = (struct cpl_abort_req *) skb_put(skb, wrlen); 719 573 memset(req, 0, wrlen); 720 574 INIT_TP_WR(req, ep->hwtid); ··· 953 807 return ret; 954 808 } 955 809 956 - static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, 957 - u8 mpa_rev_to_use) 810 + static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, 811 + u8 mpa_rev_to_use) 958 812 { 959 - int mpalen, wrlen; 813 + int mpalen, wrlen, ret; 960 814 struct fw_ofld_tx_data_wr *req; 961 815 struct mpa_message *mpa; 962 816 struct mpa_v2_conn_params mpa_v2_params; ··· 972 826 skb = get_skb(skb, wrlen, GFP_KERNEL); 973 827 if (!skb) { 974 828 connect_reply_upcall(ep, -ENOMEM); 975 - return; 829 + return -ENOMEM; 976 830 } 977 831 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 978 832 ··· 1040 894 t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 1041 895 BUG_ON(ep->mpa_skb); 1042 896 ep->mpa_skb = skb; 1043 - c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 897 + ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 898 + if (ret) 899 + return ret; 1044 900 start_ep_timer(ep); 1045 901 __state_set(&ep->com, MPA_REQ_SENT); 1046 902 ep->mpa_attr.initiator = 1; 1047 903 ep->snd_seq += mpalen; 1048 - return; 904 + return ret; 1049 905 } 1050 906 1051 907 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) ··· 1123 975 */ 1124 976 skb_get(skb); 1125 977 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 1126 - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 978 + t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); 1127 979 BUG_ON(ep->mpa_skb); 1128 980 ep->mpa_skb = skb; 1129 981 ep->snd_seq += mpalen; ··· 1208 1060 * Function fw4_ack() will deref it. 1209 1061 */ 1210 1062 skb_get(skb); 1211 - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 1063 + t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); 1212 1064 ep->mpa_skb = skb; 1213 1065 __state_set(&ep->com, MPA_REP_SENT); 1214 1066 ep->snd_seq += mpalen; ··· 1222 1074 unsigned int tid = GET_TID(req); 1223 1075 unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); 1224 1076 struct tid_info *t = dev->rdev.lldi.tids; 1077 + int ret; 1225 1078 1226 1079 ep = lookup_atid(t, atid); 1227 1080 ··· 1235 1086 /* setup the hwtid for this connection */ 1236 1087 ep->hwtid = tid; 1237 1088 cxgb4_insert_tid(t, ep, tid); 1238 - insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); 1089 + insert_ep_tid(ep); 1239 1090 1240 1091 ep->snd_seq = be32_to_cpu(req->snd_isn); 1241 1092 ep->rcv_seq = be32_to_cpu(req->rcv_isn); ··· 1248 1099 set_bit(ACT_ESTAB, &ep->com.history); 1249 1100 1250 1101 /* start MPA negotiation */ 1251 - send_flowc(ep, NULL); 1102 + ret = send_flowc(ep, NULL); 1103 + if (ret) 1104 + goto err; 1252 1105 if (ep->retry_with_mpa_v1) 1253 - send_mpa_req(ep, skb, 1); 1106 + ret = send_mpa_req(ep, skb, 1); 1254 1107 else 1255 - send_mpa_req(ep, skb, mpa_rev); 1108 + ret = send_mpa_req(ep, skb, mpa_rev); 1109 + if (ret) 1110 + goto err; 1256 1111 mutex_unlock(&ep->com.mutex); 1112 + return 0; 1113 + err: 1114 + mutex_unlock(&ep->com.mutex); 1115 + connect_reply_upcall(ep, -ENOMEM); 1116 + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 1257 1117 return 0; 1258 1118 } 1259 1119 ··· 1278 1120 PDBG("close complete delivered ep %p cm_id %p tid %u\n", 1279 1121 ep, ep->com.cm_id, ep->hwtid); 1280 1122 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1281 - ep->com.cm_id->rem_ref(ep->com.cm_id); 1282 - ep->com.cm_id = NULL; 1123 + deref_cm_id(&ep->com); 1283 1124 set_bit(CLOSE_UPCALL, &ep->com.history); 1284 1125 } 1285 - } 1286 - 1287 - static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) 1288 - { 1289 - PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 1290 - __state_set(&ep->com, ABORTING); 1291 - set_bit(ABORT_CONN, &ep->com.history); 1292 - return send_abort(ep, skb, gfp); 1293 1126 } 1294 1127 1295 1128 static void peer_close_upcall(struct c4iw_ep *ep) ··· 1310 1161 PDBG("abort delivered ep %p cm_id %p tid %u\n", ep, 1311 1162 ep->com.cm_id, ep->hwtid); 1312 1163 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1313 - ep->com.cm_id->rem_ref(ep->com.cm_id); 1314 - ep->com.cm_id = NULL; 1164 + deref_cm_id(&ep->com); 1315 1165 set_bit(ABORT_UPCALL, &ep->com.history); 1316 1166 } 1317 1167 } ··· 1353 1205 set_bit(CONN_RPL_UPCALL, &ep->com.history); 1354 1206 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1355 1207 1356 - if (status < 0) { 1357 - ep->com.cm_id->rem_ref(ep->com.cm_id); 1358 - ep->com.cm_id = NULL; 1359 - } 1208 + if (status < 0) 1209 + deref_cm_id(&ep->com); 1360 1210 } 1361 1211 1362 1212 static int connect_request_upcall(struct c4iw_ep *ep) ··· 1447 1301 1448 1302 #define RELAXED_IRD_NEGOTIATION 1 1449 1303 1304 + /* 1305 + * process_mpa_reply - process streaming mode MPA reply 1306 + * 1307 + * Returns: 1308 + * 1309 + * 0 upon success indicating a connect request was delivered to the ULP 1310 + * or the mpa request is incomplete but valid so far. 1311 + * 1312 + * 1 if a failure requires the caller to close the connection. 1313 + * 1314 + * 2 if a failure requires the caller to abort the connection. 1315 + */ 1450 1316 static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) 1451 1317 { 1452 1318 struct mpa_message *mpa; ··· 1474 1316 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 1475 1317 1476 1318 /* 1477 - * Stop mpa timer. If it expired, then 1478 - * we ignore the MPA reply. process_timeout() 1479 - * will abort the connection. 1480 - */ 1481 - if (stop_ep_timer(ep)) 1482 - return 0; 1483 - 1484 - /* 1485 1319 * If we get more than the supported amount of private data 1486 1320 * then we must fail this connection. 1487 1321 */ 1488 1322 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { 1489 1323 err = -EINVAL; 1490 - goto err; 1324 + goto err_stop_timer; 1491 1325 } 1492 1326 1493 1327 /* ··· 1501 1351 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," 1502 1352 " Received = %d\n", __func__, mpa_rev, mpa->revision); 1503 1353 err = -EPROTO; 1504 - goto err; 1354 + goto err_stop_timer; 1505 1355 } 1506 1356 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 1507 1357 err = -EPROTO; 1508 - goto err; 1358 + goto err_stop_timer; 1509 1359 } 1510 1360 1511 1361 plen = ntohs(mpa->private_data_size); ··· 1515 1365 */ 1516 1366 if (plen > MPA_MAX_PRIVATE_DATA) { 1517 1367 err = -EPROTO; 1518 - goto err; 1368 + goto err_stop_timer; 1519 1369 } 1520 1370 1521 1371 /* ··· 1523 1373 */ 1524 1374 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 1525 1375 err = -EPROTO; 1526 - goto err; 1376 + goto err_stop_timer; 1527 1377 } 1528 1378 1529 1379 ep->plen = (u8) plen; ··· 1537 1387 1538 1388 if (mpa->flags & MPA_REJECT) { 1539 1389 err = -ECONNREFUSED; 1540 - goto err; 1390 + goto err_stop_timer; 1541 1391 } 1392 + 1393 + /* 1394 + * Stop mpa timer. If it expired, then 1395 + * we ignore the MPA reply. process_timeout() 1396 + * will abort the connection. 1397 + */ 1398 + if (stop_ep_timer(ep)) 1399 + return 0; 1542 1400 1543 1401 /* 1544 1402 * If we get here we have accumulated the entire mpa ··· 1687 1529 goto out; 1688 1530 } 1689 1531 goto out; 1532 + err_stop_timer: 1533 + stop_ep_timer(ep); 1690 1534 err: 1691 - __state_set(&ep->com, ABORTING); 1692 - send_abort(ep, skb, GFP_KERNEL); 1535 + disconnect = 2; 1693 1536 out: 1694 1537 connect_reply_upcall(ep, err); 1695 1538 return disconnect; 1696 1539 } 1697 1540 1698 - static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) 1541 + /* 1542 + * process_mpa_request - process streaming mode MPA request 1543 + * 1544 + * Returns: 1545 + * 1546 + * 0 upon success indicating a connect request was delivered to the ULP 1547 + * or the mpa request is incomplete but valid so far. 1548 + * 1549 + * 1 if a failure requires the caller to close the connection. 1550 + * 1551 + * 2 if a failure requires the caller to abort the connection. 1552 + */ 1553 + static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) 1699 1554 { 1700 1555 struct mpa_message *mpa; 1701 1556 struct mpa_v2_conn_params *mpa_v2_params; ··· 1720 1549 * If we get more than the supported amount of private data 1721 1550 * then we must fail this connection. 1722 1551 */ 1723 - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { 1724 - (void)stop_ep_timer(ep); 1725 - abort_connection(ep, skb, GFP_KERNEL); 1726 - return; 1727 - } 1552 + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) 1553 + goto err_stop_timer; 1728 1554 1729 1555 PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); 1730 1556 ··· 1737 1569 * We'll continue process when more data arrives. 1738 1570 */ 1739 1571 if (ep->mpa_pkt_len < sizeof(*mpa)) 1740 - return; 1572 + return 0; 1741 1573 1742 1574 PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); 1743 1575 mpa = (struct mpa_message *) ep->mpa_pkt; ··· 1748 1580 if (mpa->revision > mpa_rev) { 1749 1581 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," 1750 1582 " Received = %d\n", __func__, mpa_rev, mpa->revision); 1751 - (void)stop_ep_timer(ep); 1752 - abort_connection(ep, skb, GFP_KERNEL); 1753 - return; 1583 + goto err_stop_timer; 1754 1584 } 1755 1585 1756 - if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { 1757 - (void)stop_ep_timer(ep); 1758 - abort_connection(ep, skb, GFP_KERNEL); 1759 - return; 1760 - } 1586 + if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) 1587 + goto err_stop_timer; 1761 1588 1762 1589 plen = ntohs(mpa->private_data_size); 1763 1590 1764 1591 /* 1765 1592 * Fail if there's too much private data. 1766 1593 */ 1767 - if (plen > MPA_MAX_PRIVATE_DATA) { 1768 - (void)stop_ep_timer(ep); 1769 - abort_connection(ep, skb, GFP_KERNEL); 1770 - return; 1771 - } 1594 + if (plen > MPA_MAX_PRIVATE_DATA) 1595 + goto err_stop_timer; 1772 1596 1773 1597 /* 1774 1598 * If plen does not account for pkt size 1775 1599 */ 1776 - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 1777 - (void)stop_ep_timer(ep); 1778 - abort_connection(ep, skb, GFP_KERNEL); 1779 - return; 1780 - } 1600 + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) 1601 + goto err_stop_timer; 1781 1602 ep->plen = (u8) plen; 1782 1603 1783 1604 /* 1784 1605 * If we don't have all the pdata yet, then bail. 1785 1606 */ 1786 1607 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 1787 - return; 1608 + return 0; 1788 1609 1789 1610 /* 1790 1611 * If we get here we have accumulated the entire mpa ··· 1822 1665 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, 1823 1666 ep->mpa_attr.p2p_type); 1824 1667 1825 - /* 1826 - * If the endpoint timer already expired, then we ignore 1827 - * the start request. process_timeout() will abort 1828 - * the connection. 1829 - */ 1830 - if (!stop_ep_timer(ep)) { 1831 - __state_set(&ep->com, MPA_REQ_RCVD); 1668 + __state_set(&ep->com, MPA_REQ_RCVD); 1832 1669 1833 - /* drive upcall */ 1834 - mutex_lock_nested(&ep->parent_ep->com.mutex, 1835 - SINGLE_DEPTH_NESTING); 1836 - if (ep->parent_ep->com.state != DEAD) { 1837 - if (connect_request_upcall(ep)) 1838 - abort_connection(ep, skb, GFP_KERNEL); 1839 - } else { 1840 - abort_connection(ep, skb, GFP_KERNEL); 1841 - } 1842 - mutex_unlock(&ep->parent_ep->com.mutex); 1670 + /* drive upcall */ 1671 + mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING); 1672 + if (ep->parent_ep->com.state != DEAD) { 1673 + if (connect_request_upcall(ep)) 1674 + goto err_unlock_parent; 1675 + } else { 1676 + goto err_unlock_parent; 1843 1677 } 1844 - return; 1678 + mutex_unlock(&ep->parent_ep->com.mutex); 1679 + return 0; 1680 + 1681 + err_unlock_parent: 1682 + mutex_unlock(&ep->parent_ep->com.mutex); 1683 + goto err_out; 1684 + err_stop_timer: 1685 + (void)stop_ep_timer(ep); 1686 + err_out: 1687 + return 2; 1845 1688 } 1846 1689 1847 1690 static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) ··· 1850 1693 struct cpl_rx_data *hdr = cplhdr(skb); 1851 1694 unsigned int dlen = ntohs(hdr->len); 1852 1695 unsigned int tid = GET_TID(hdr); 1853 - struct tid_info *t = dev->rdev.lldi.tids; 1854 1696 __u8 status = hdr->status; 1855 1697 int disconnect = 0; 1856 1698 1857 - ep = lookup_tid(t, tid); 1699 + ep = get_ep_from_tid(dev, tid); 1858 1700 if (!ep) 1859 1701 return 0; 1860 1702 PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); ··· 1871 1715 break; 1872 1716 case MPA_REQ_WAIT: 1873 1717 ep->rcv_seq += dlen; 1874 - process_mpa_request(ep, skb); 1718 + disconnect = process_mpa_request(ep, skb); 1875 1719 break; 1876 1720 case FPDU_MODE: { 1877 1721 struct c4iw_qp_attributes attrs; ··· 1892 1736 } 1893 1737 mutex_unlock(&ep->com.mutex); 1894 1738 if (disconnect) 1895 - c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 1739 + c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); 1740 + c4iw_put_ep(&ep->com); 1896 1741 return 0; 1897 1742 } 1898 1743 ··· 1903 1746 struct cpl_abort_rpl_rss *rpl = cplhdr(skb); 1904 1747 int release = 0; 1905 1748 unsigned int tid = GET_TID(rpl); 1906 - struct tid_info *t = dev->rdev.lldi.tids; 1907 1749 1908 - ep = lookup_tid(t, tid); 1750 + ep = get_ep_from_tid(dev, tid); 1909 1751 if (!ep) { 1910 1752 printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n"); 1911 1753 return 0; ··· 1926 1770 1927 1771 if (release) 1928 1772 release_ep_resources(ep); 1773 + c4iw_put_ep(&ep->com); 1929 1774 return 0; 1930 1775 } 1931 1776 1932 - static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) 1777 + static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) 1933 1778 { 1934 1779 struct sk_buff *skb; 1935 1780 struct fw_ofld_connection_wr *req; ··· 2000 1843 req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2); 2001 1844 set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); 2002 1845 set_bit(ACT_OFLD_CONN, &ep->com.history); 2003 - c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 1846 + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 2004 1847 } 2005 1848 2006 1849 /* ··· 2143 1986 2144 1987 PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); 2145 1988 init_timer(&ep->timer); 1989 + c4iw_init_wr_wait(&ep->com.wr_wait); 2146 1990 2147 1991 /* 2148 1992 * Allocate an active TID to initiate a TCP connection. ··· 2227 2069 struct sockaddr_in *ra; 2228 2070 struct sockaddr_in6 *la6; 2229 2071 struct sockaddr_in6 *ra6; 2072 + int ret = 0; 2230 2073 2231 2074 ep = lookup_atid(t, atid); 2232 2075 la = (struct sockaddr_in *)&ep->com.local_addr; ··· 2263 2104 mutex_unlock(&dev->rdev.stats.lock); 2264 2105 if (ep->com.local_addr.ss_family == AF_INET && 2265 2106 dev->rdev.lldi.enable_fw_ofld_conn) { 2266 - send_fw_act_open_req(ep, 2267 - TID_TID_G(AOPEN_ATID_G( 2268 - ntohl(rpl->atid_status)))); 2107 + ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G( 2108 + ntohl(rpl->atid_status)))); 2109 + if (ret) 2110 + goto fail; 2269 2111 return 0; 2270 2112 } 2271 2113 break; ··· 2306 2146 break; 2307 2147 } 2308 2148 2149 + fail: 2309 2150 connect_reply_upcall(ep, status2errno(status)); 2310 2151 state_set(&ep->com, DEAD); 2311 2152 ··· 2331 2170 static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2332 2171 { 2333 2172 struct cpl_pass_open_rpl *rpl = cplhdr(skb); 2334 - struct tid_info *t = dev->rdev.lldi.tids; 2335 2173 unsigned int stid = GET_TID(rpl); 2336 - struct c4iw_listen_ep *ep = lookup_stid(t, stid); 2174 + struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); 2337 2175 2338 2176 if (!ep) { 2339 2177 PDBG("%s stid %d lookup failure!\n", __func__, stid); ··· 2341 2181 PDBG("%s ep %p status %d error %d\n", __func__, ep, 2342 2182 rpl->status, status2errno(rpl->status)); 2343 2183 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); 2344 - 2184 + c4iw_put_ep(&ep->com); 2345 2185 out: 2346 2186 return 0; 2347 2187 } ··· 2349 2189 static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 2350 2190 { 2351 2191 struct cpl_close_listsvr_rpl *rpl = cplhdr(skb); 2352 - struct tid_info *t = dev->rdev.lldi.tids; 2353 2192 unsigned int stid = GET_TID(rpl); 2354 - struct c4iw_listen_ep *ep = lookup_stid(t, stid); 2193 + struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); 2355 2194 2356 2195 PDBG("%s ep %p\n", __func__, ep); 2357 2196 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); 2197 + c4iw_put_ep(&ep->com); 2358 2198 return 0; 2359 2199 } 2360 2200 2361 - static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, 2362 - struct cpl_pass_accept_req *req) 2201 + static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, 2202 + struct cpl_pass_accept_req *req) 2363 2203 { 2364 2204 struct cpl_pass_accept_rpl *rpl; 2365 2205 unsigned int mtu_idx; ··· 2447 2287 rpl->opt0 = cpu_to_be64(opt0); 2448 2288 rpl->opt2 = cpu_to_be32(opt2); 2449 2289 set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); 2450 - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); 2451 - c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 2290 + t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure); 2452 2291 2453 - return; 2292 + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 2454 2293 } 2455 2294 2456 2295 static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) ··· 2514 2355 unsigned short hdrs; 2515 2356 u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); 2516 2357 2517 - parent_ep = lookup_stid(t, stid); 2358 + parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); 2518 2359 if (!parent_ep) { 2519 2360 PDBG("%s connect request on invalid stid %d\n", __func__, stid); 2520 2361 goto reject; ··· 2627 2468 2628 2469 init_timer(&child_ep->timer); 2629 2470 cxgb4_insert_tid(t, child_ep, hwtid); 2630 - insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid); 2631 - accept_cr(child_ep, skb, req); 2632 - set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); 2471 + insert_ep_tid(child_ep); 2472 + if (accept_cr(child_ep, skb, req)) { 2473 + c4iw_put_ep(&parent_ep->com); 2474 + release_ep_resources(child_ep); 2475 + } else { 2476 + set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); 2477 + } 2633 2478 if (iptype == 6) { 2634 2479 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; 2635 2480 cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0], ··· 2642 2479 goto out; 2643 2480 reject: 2644 2481 reject_cr(dev, hwtid, skb); 2482 + if (parent_ep) 2483 + c4iw_put_ep(&parent_ep->com); 2645 2484 out: 2646 2485 return 0; 2647 2486 } ··· 2652 2487 { 2653 2488 struct c4iw_ep *ep; 2654 2489 struct cpl_pass_establish *req = cplhdr(skb); 2655 - struct tid_info *t = dev->rdev.lldi.tids; 2656 2490 unsigned int tid = GET_TID(req); 2491 + int ret; 2657 2492 2658 - ep = lookup_tid(t, tid); 2493 + ep = get_ep_from_tid(dev, tid); 2659 2494 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 2660 2495 ep->snd_seq = be32_to_cpu(req->snd_isn); 2661 2496 ep->rcv_seq = be32_to_cpu(req->rcv_isn); ··· 2666 2501 set_emss(ep, ntohs(req->tcp_opt)); 2667 2502 2668 2503 dst_confirm(ep->dst); 2669 - state_set(&ep->com, MPA_REQ_WAIT); 2504 + mutex_lock(&ep->com.mutex); 2505 + ep->com.state = MPA_REQ_WAIT; 2670 2506 start_ep_timer(ep); 2671 - send_flowc(ep, skb); 2672 2507 set_bit(PASS_ESTAB, &ep->com.history); 2508 + ret = send_flowc(ep, skb); 2509 + mutex_unlock(&ep->com.mutex); 2510 + if (ret) 2511 + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 2512 + c4iw_put_ep(&ep->com); 2673 2513 2674 2514 return 0; 2675 2515 } ··· 2686 2516 struct c4iw_qp_attributes attrs; 2687 2517 int disconnect = 1; 2688 2518 int release = 0; 2689 - struct tid_info *t = dev->rdev.lldi.tids; 2690 2519 unsigned int tid = GET_TID(hdr); 2691 2520 int ret; 2692 2521 2693 - ep = lookup_tid(t, tid); 2522 + ep = get_ep_from_tid(dev, tid); 2523 + if (!ep) 2524 + return 0; 2525 + 2694 2526 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 2695 2527 dst_confirm(ep->dst); 2696 2528 ··· 2764 2592 c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 2765 2593 if (release) 2766 2594 release_ep_resources(ep); 2595 + c4iw_put_ep(&ep->com); 2767 2596 return 0; 2768 2597 } 2769 2598 ··· 2777 2604 struct c4iw_qp_attributes attrs; 2778 2605 int ret; 2779 2606 int release = 0; 2780 - struct tid_info *t = dev->rdev.lldi.tids; 2781 2607 unsigned int tid = GET_TID(req); 2782 2608 2783 - ep = lookup_tid(t, tid); 2609 + ep = get_ep_from_tid(dev, tid); 2610 + if (!ep) 2611 + return 0; 2612 + 2784 2613 if (is_neg_adv(req->status)) { 2785 2614 PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", 2786 2615 __func__, ep->hwtid, req->status, ··· 2791 2616 mutex_lock(&dev->rdev.stats.lock); 2792 2617 dev->rdev.stats.neg_adv++; 2793 2618 mutex_unlock(&dev->rdev.stats.lock); 2794 - return 0; 2619 + goto deref_ep; 2795 2620 } 2796 2621 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, 2797 2622 ep->com.state); ··· 2808 2633 mutex_lock(&ep->com.mutex); 2809 2634 switch (ep->com.state) { 2810 2635 case CONNECTING: 2636 + c4iw_put_ep(&ep->parent_ep->com); 2811 2637 break; 2812 2638 case MPA_REQ_WAIT: 2813 2639 (void)stop_ep_timer(ep); ··· 2857 2681 case DEAD: 2858 2682 PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); 2859 2683 mutex_unlock(&ep->com.mutex); 2860 - return 0; 2684 + goto deref_ep; 2861 2685 default: 2862 2686 BUG_ON(1); 2863 2687 break; ··· 2904 2728 c4iw_reconnect(ep); 2905 2729 } 2906 2730 2731 + deref_ep: 2732 + c4iw_put_ep(&ep->com); 2733 + /* Dereferencing ep, referenced in peer_abort_intr() */ 2734 + c4iw_put_ep(&ep->com); 2907 2735 return 0; 2908 2736 } 2909 2737 ··· 2917 2737 struct c4iw_qp_attributes attrs; 2918 2738 struct cpl_close_con_rpl *rpl = cplhdr(skb); 2919 2739 int release = 0; 2920 - struct tid_info *t = dev->rdev.lldi.tids; 2921 2740 unsigned int tid = GET_TID(rpl); 2922 2741 2923 - ep = lookup_tid(t, tid); 2742 + ep = get_ep_from_tid(dev, tid); 2743 + if (!ep) 2744 + return 0; 2924 2745 2925 2746 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 2926 2747 BUG_ON(!ep); 2927 2748 2928 2749 /* The cm_id may be null if we failed to connect */ 2929 2750 mutex_lock(&ep->com.mutex); 2751 + set_bit(CLOSE_CON_RPL, &ep->com.history); 2930 2752 switch (ep->com.state) { 2931 2753 case CLOSING: 2932 2754 __state_set(&ep->com, MORIBUND); ··· 2956 2774 mutex_unlock(&ep->com.mutex); 2957 2775 if (release) 2958 2776 release_ep_resources(ep); 2777 + c4iw_put_ep(&ep->com); 2959 2778 return 0; 2960 2779 } 2961 2780 2962 2781 static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) 2963 2782 { 2964 2783 struct cpl_rdma_terminate *rpl = cplhdr(skb); 2965 - struct tid_info *t = dev->rdev.lldi.tids; 2966 2784 unsigned int tid = GET_TID(rpl); 2967 2785 struct c4iw_ep *ep; 2968 2786 struct c4iw_qp_attributes attrs; 2969 2787 2970 - ep = lookup_tid(t, tid); 2788 + ep = get_ep_from_tid(dev, tid); 2971 2789 BUG_ON(!ep); 2972 2790 2973 2791 if (ep && ep->com.qp) { ··· 2978 2796 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 2979 2797 } else 2980 2798 printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid); 2799 + c4iw_put_ep(&ep->com); 2981 2800 2982 2801 return 0; 2983 2802 } ··· 2994 2811 struct cpl_fw4_ack *hdr = cplhdr(skb); 2995 2812 u8 credits = hdr->credits; 2996 2813 unsigned int tid = GET_TID(hdr); 2997 - struct tid_info *t = dev->rdev.lldi.tids; 2998 2814 2999 2815 3000 - ep = lookup_tid(t, tid); 2816 + ep = get_ep_from_tid(dev, tid); 2817 + if (!ep) 2818 + return 0; 3001 2819 PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); 3002 2820 if (credits == 0) { 3003 2821 PDBG("%s 0 credit ack ep %p tid %u state %u\n", 3004 2822 __func__, ep, ep->hwtid, state_read(&ep->com)); 3005 - return 0; 2823 + goto out; 3006 2824 } 3007 2825 3008 2826 dst_confirm(ep->dst); ··· 3013 2829 state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0); 3014 2830 kfree_skb(ep->mpa_skb); 3015 2831 ep->mpa_skb = NULL; 2832 + mutex_lock(&ep->com.mutex); 2833 + if (test_bit(STOP_MPA_TIMER, &ep->com.flags)) 2834 + stop_ep_timer(ep); 2835 + mutex_unlock(&ep->com.mutex); 3016 2836 } 2837 + out: 2838 + c4iw_put_ep(&ep->com); 3017 2839 return 0; 3018 2840 } 3019 2841 ··· 3031 2841 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 3032 2842 3033 2843 mutex_lock(&ep->com.mutex); 3034 - if (ep->com.state == DEAD) { 2844 + if (ep->com.state != MPA_REQ_RCVD) { 3035 2845 mutex_unlock(&ep->com.mutex); 3036 2846 c4iw_put_ep(&ep->com); 3037 2847 return -ECONNRESET; 3038 2848 } 3039 2849 set_bit(ULP_REJECT, &ep->com.history); 3040 - BUG_ON(ep->com.state != MPA_REQ_RCVD); 3041 2850 if (mpa_rev == 0) 3042 - abort_connection(ep, NULL, GFP_KERNEL); 2851 + disconnect = 2; 3043 2852 else { 3044 2853 err = send_mpa_reject(ep, pdata, pdata_len); 3045 2854 disconnect = 1; 3046 2855 } 3047 2856 mutex_unlock(&ep->com.mutex); 3048 - if (disconnect) 3049 - err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); 2857 + if (disconnect) { 2858 + stop_ep_timer(ep); 2859 + err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); 2860 + } 3050 2861 c4iw_put_ep(&ep->com); 3051 2862 return 0; 3052 2863 } ··· 3060 2869 struct c4iw_ep *ep = to_ep(cm_id); 3061 2870 struct c4iw_dev *h = to_c4iw_dev(cm_id->device); 3062 2871 struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); 2872 + int abort = 0; 3063 2873 3064 2874 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 3065 2875 3066 2876 mutex_lock(&ep->com.mutex); 3067 - if (ep->com.state == DEAD) { 2877 + if (ep->com.state != MPA_REQ_RCVD) { 3068 2878 err = -ECONNRESET; 3069 - goto err; 2879 + goto err_out; 3070 2880 } 3071 2881 3072 - BUG_ON(ep->com.state != MPA_REQ_RCVD); 3073 2882 BUG_ON(!qp); 3074 2883 3075 2884 set_bit(ULP_ACCEPT, &ep->com.history); 3076 2885 if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) || 3077 2886 (conn_param->ird > cur_max_read_depth(ep->com.dev))) { 3078 - abort_connection(ep, NULL, GFP_KERNEL); 3079 2887 err = -EINVAL; 3080 - goto err; 2888 + goto err_abort; 3081 2889 } 3082 2890 3083 2891 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { ··· 3088 2898 ep->ord = conn_param->ord; 3089 2899 send_mpa_reject(ep, conn_param->private_data, 3090 2900 conn_param->private_data_len); 3091 - abort_connection(ep, NULL, GFP_KERNEL); 3092 2901 err = -ENOMEM; 3093 - goto err; 2902 + goto err_abort; 3094 2903 } 3095 2904 } 3096 2905 if (conn_param->ird < ep->ord) { ··· 3097 2908 ep->ord <= h->rdev.lldi.max_ordird_qp) { 3098 2909 conn_param->ird = ep->ord; 3099 2910 } else { 3100 - abort_connection(ep, NULL, GFP_KERNEL); 3101 2911 err = -ENOMEM; 3102 - goto err; 2912 + goto err_abort; 3103 2913 } 3104 2914 } 3105 2915 } ··· 3117 2929 3118 2930 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); 3119 2931 3120 - cm_id->add_ref(cm_id); 3121 2932 ep->com.cm_id = cm_id; 2933 + ref_cm_id(&ep->com); 3122 2934 ep->com.qp = qp; 3123 2935 ref_qp(ep); 3124 2936 ··· 3139 2951 err = c4iw_modify_qp(ep->com.qp->rhp, 3140 2952 ep->com.qp, mask, &attrs, 1); 3141 2953 if (err) 3142 - goto err1; 2954 + goto err_deref_cm_id; 2955 + 2956 + set_bit(STOP_MPA_TIMER, &ep->com.flags); 3143 2957 err = send_mpa_reply(ep, conn_param->private_data, 3144 2958 conn_param->private_data_len); 3145 2959 if (err) 3146 - goto err1; 2960 + goto err_deref_cm_id; 3147 2961 3148 2962 __state_set(&ep->com, FPDU_MODE); 3149 2963 established_upcall(ep); 3150 2964 mutex_unlock(&ep->com.mutex); 3151 2965 c4iw_put_ep(&ep->com); 3152 2966 return 0; 3153 - err1: 3154 - ep->com.cm_id = NULL; 3155 - abort_connection(ep, NULL, GFP_KERNEL); 3156 - cm_id->rem_ref(cm_id); 3157 - err: 2967 + err_deref_cm_id: 2968 + deref_cm_id(&ep->com); 2969 + err_abort: 2970 + abort = 1; 2971 + err_out: 3158 2972 mutex_unlock(&ep->com.mutex); 2973 + if (abort) 2974 + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 3159 2975 c4iw_put_ep(&ep->com); 3160 2976 return err; 3161 2977 } ··· 3259 3067 if (peer2peer && ep->ord == 0) 3260 3068 ep->ord = 1; 3261 3069 3262 - cm_id->add_ref(cm_id); 3263 - ep->com.dev = dev; 3264 3070 ep->com.cm_id = cm_id; 3071 + ref_cm_id(&ep->com); 3072 + ep->com.dev = dev; 3265 3073 ep->com.qp = get_qhp(dev, conn_param->qpn); 3266 3074 if (!ep->com.qp) { 3267 3075 PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); ··· 3300 3108 /* 3301 3109 * Handle loopback requests to INADDR_ANY. 3302 3110 */ 3303 - if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) { 3111 + if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { 3304 3112 err = pick_local_ipaddrs(dev, cm_id); 3305 3113 if (err) 3306 3114 goto fail1; ··· 3368 3176 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); 3369 3177 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 3370 3178 fail1: 3371 - cm_id->rem_ref(cm_id); 3179 + deref_cm_id(&ep->com); 3372 3180 c4iw_put_ep(&ep->com); 3373 3181 out: 3374 3182 return err; ··· 3462 3270 goto fail1; 3463 3271 } 3464 3272 PDBG("%s ep %p\n", __func__, ep); 3465 - cm_id->add_ref(cm_id); 3466 3273 ep->com.cm_id = cm_id; 3274 + ref_cm_id(&ep->com); 3467 3275 ep->com.dev = dev; 3468 3276 ep->backlog = backlog; 3469 3277 memcpy(&ep->com.local_addr, &cm_id->m_local_addr, ··· 3503 3311 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, 3504 3312 ep->com.local_addr.ss_family); 3505 3313 fail2: 3506 - cm_id->rem_ref(cm_id); 3314 + deref_cm_id(&ep->com); 3507 3315 c4iw_put_ep(&ep->com); 3508 3316 fail1: 3509 3317 out: ··· 3542 3350 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, 3543 3351 ep->com.local_addr.ss_family); 3544 3352 done: 3545 - cm_id->rem_ref(cm_id); 3353 + deref_cm_id(&ep->com); 3546 3354 c4iw_put_ep(&ep->com); 3547 3355 return err; 3548 3356 } ··· 3558 3366 3559 3367 PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, 3560 3368 states[ep->com.state], abrupt); 3369 + 3370 + /* 3371 + * Ref the ep here in case we have fatal errors causing the 3372 + * ep to be released and freed. 3373 + */ 3374 + c4iw_get_ep(&ep->com); 3561 3375 3562 3376 rdev = &ep->com.dev->rdev; 3563 3377 if (c4iw_fatal_error(rdev)) { ··· 3616 3418 set_bit(EP_DISC_CLOSE, &ep->com.history); 3617 3419 ret = send_halfclose(ep, gfp); 3618 3420 } 3619 - if (ret) 3421 + if (ret) { 3422 + set_bit(EP_DISC_FAIL, &ep->com.history); 3423 + if (!abrupt) { 3424 + stop_ep_timer(ep); 3425 + close_complete_upcall(ep, -EIO); 3426 + } 3427 + if (ep->com.qp) { 3428 + struct c4iw_qp_attributes attrs; 3429 + 3430 + attrs.next_state = C4IW_QP_STATE_ERROR; 3431 + ret = c4iw_modify_qp(ep->com.qp->rhp, 3432 + ep->com.qp, 3433 + C4IW_QP_ATTR_NEXT_STATE, 3434 + &attrs, 1); 3435 + if (ret) 3436 + pr_err(MOD 3437 + "%s - qp <- error failed!\n", 3438 + __func__); 3439 + } 3620 3440 fatal = 1; 3441 + } 3621 3442 } 3622 3443 mutex_unlock(&ep->com.mutex); 3444 + c4iw_put_ep(&ep->com); 3623 3445 if (fatal) 3624 3446 release_ep_resources(ep); 3625 3447 return ret; ··· 3894 3676 struct cpl_pass_accept_req *req = (void *)(rss + 1); 3895 3677 struct l2t_entry *e; 3896 3678 struct dst_entry *dst; 3897 - struct c4iw_ep *lep; 3679 + struct c4iw_ep *lep = NULL; 3898 3680 u16 window; 3899 3681 struct port_info *pi; 3900 3682 struct net_device *pdev; ··· 3919 3701 */ 3920 3702 stid = (__force int) cpu_to_be32((__force u32) rss->hash_val); 3921 3703 3922 - lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); 3704 + lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); 3923 3705 if (!lep) { 3924 3706 PDBG("%s connect request on invalid stid %d\n", __func__, stid); 3925 3707 goto reject; ··· 4020 3802 free_dst: 4021 3803 dst_release(dst); 4022 3804 reject: 3805 + if (lep) 3806 + c4iw_put_ep(&lep->com); 4023 3807 return 0; 4024 3808 } 4025 3809 ··· 4029 3809 * These are the real handlers that are called from a 4030 3810 * work queue. 4031 3811 */ 4032 - static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { 3812 + static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = { 4033 3813 [CPL_ACT_ESTABLISH] = act_establish, 4034 3814 [CPL_ACT_OPEN_RPL] = act_open_rpl, 4035 3815 [CPL_RX_DATA] = rx_data, ··· 4045 3825 [CPL_RDMA_TERMINATE] = terminate, 4046 3826 [CPL_FW4_ACK] = fw4_ack, 4047 3827 [CPL_FW6_MSG] = deferred_fw6_msg, 4048 - [CPL_RX_PKT] = rx_pkt 3828 + [CPL_RX_PKT] = rx_pkt, 3829 + [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe, 3830 + [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe 4049 3831 }; 4050 3832 4051 3833 static void process_timeout(struct c4iw_ep *ep) ··· 4061 3839 set_bit(TIMEDOUT, &ep->com.history); 4062 3840 switch (ep->com.state) { 4063 3841 case MPA_REQ_SENT: 4064 - __state_set(&ep->com, ABORTING); 4065 3842 connect_reply_upcall(ep, -ETIMEDOUT); 4066 3843 break; 4067 3844 case MPA_REQ_WAIT: 4068 - __state_set(&ep->com, ABORTING); 3845 + case MPA_REQ_RCVD: 3846 + case MPA_REP_SENT: 3847 + case FPDU_MODE: 4069 3848 break; 4070 3849 case CLOSING: 4071 3850 case MORIBUND: ··· 4076 3853 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, 4077 3854 &attrs, 1); 4078 3855 } 4079 - __state_set(&ep->com, ABORTING); 4080 3856 close_complete_upcall(ep, -ETIMEDOUT); 4081 3857 break; 4082 3858 case ABORTING: ··· 4093 3871 __func__, ep, ep->hwtid, ep->com.state); 4094 3872 abort = 0; 4095 3873 } 4096 - if (abort) 4097 - abort_connection(ep, NULL, GFP_KERNEL); 4098 3874 mutex_unlock(&ep->com.mutex); 3875 + if (abort) 3876 + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); 4099 3877 c4iw_put_ep(&ep->com); 4100 3878 } 4101 3879 ··· 4228 4006 { 4229 4007 struct cpl_abort_req_rss *req = cplhdr(skb); 4230 4008 struct c4iw_ep *ep; 4231 - struct tid_info *t = dev->rdev.lldi.tids; 4232 4009 unsigned int tid = GET_TID(req); 4233 4010 4234 - ep = lookup_tid(t, tid); 4011 + ep = get_ep_from_tid(dev, tid); 4012 + /* This EP will be dereferenced in peer_abort() */ 4235 4013 if (!ep) { 4236 4014 printk(KERN_WARNING MOD 4237 4015 "Abort on non-existent endpoint, tid %d\n", tid); ··· 4242 4020 PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", 4243 4021 __func__, ep->hwtid, req->status, 4244 4022 neg_adv_str(req->status)); 4245 - ep->stats.abort_neg_adv++; 4246 - dev->rdev.stats.neg_adv++; 4247 - kfree_skb(skb); 4248 - return 0; 4023 + goto out; 4249 4024 } 4250 4025 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, 4251 4026 ep->com.state); 4252 4027 4253 - /* 4254 - * Wake up any threads in rdma_init() or rdma_fini(). 4255 - * However, if we are on MPAv2 and want to retry with MPAv1 4256 - * then, don't wake up yet. 4257 - */ 4258 - if (mpa_rev == 2 && !ep->tried_with_mpa_v1) { 4259 - if (ep->com.state != MPA_REQ_SENT) 4260 - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); 4261 - } else 4262 - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); 4028 + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); 4029 + out: 4263 4030 sched(dev, skb); 4264 4031 return 0; 4265 4032 }
+10 -4
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
··· 755 755 CLOSE_SENT = 3, 756 756 TIMEOUT = 4, 757 757 QP_REFERENCED = 5, 758 + STOP_MPA_TIMER = 7, 758 759 }; 759 760 760 761 enum c4iw_ep_history { ··· 780 779 EP_DISC_ABORT = 18, 781 780 CONN_RPL_UPCALL = 19, 782 781 ACT_RETRY_NOMEM = 20, 783 - ACT_RETRY_INUSE = 21 782 + ACT_RETRY_INUSE = 21, 783 + CLOSE_CON_RPL = 22, 784 + EP_DISC_FAIL = 24, 785 + QP_REFED = 25, 786 + QP_DEREFED = 26, 787 + CM_ID_REFED = 27, 788 + CM_ID_DEREFED = 28, 784 789 }; 785 790 786 791 struct c4iw_ep_common { ··· 924 917 struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, 925 918 enum ib_mr_type mr_type, 926 919 u32 max_num_sg); 927 - int c4iw_map_mr_sg(struct ib_mr *ibmr, 928 - struct scatterlist *sg, 929 - int sg_nents); 920 + int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 921 + unsigned int *sg_offset); 930 922 int c4iw_dealloc_mw(struct ib_mw *mw); 931 923 struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 932 924 struct ib_udata *udata);
+6 -6
drivers/infiniband/hw/cxgb4/mem.c
··· 86 86 (wait ? FW_WR_COMPL_F : 0)); 87 87 req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L; 88 88 req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16))); 89 - req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE)); 90 - req->cmd |= cpu_to_be32(T5_ULP_MEMIO_ORDER_V(1)); 89 + req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) | 90 + T5_ULP_MEMIO_ORDER_V(1) | 91 + T5_ULP_MEMIO_FID_V(rdev->lldi.rxq_ids[0])); 91 92 req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(len>>5)); 92 93 req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16)); 93 94 req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr)); ··· 691 690 return 0; 692 691 } 693 692 694 - int c4iw_map_mr_sg(struct ib_mr *ibmr, 695 - struct scatterlist *sg, 696 - int sg_nents) 693 + int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 694 + unsigned int *sg_offset) 697 695 { 698 696 struct c4iw_mr *mhp = to_c4iw_mr(ibmr); 699 697 700 698 mhp->mpl_len = 0; 701 699 702 - return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page); 700 + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page); 703 701 } 704 702 705 703 int c4iw_dereg_mr(struct ib_mr *ib_mr)
+3 -4
drivers/infiniband/hw/i40iw/i40iw.h
··· 50 50 #include <rdma/ib_pack.h> 51 51 #include <rdma/rdma_cm.h> 52 52 #include <rdma/iw_cm.h> 53 - #include <rdma/iw_portmap.h> 54 - #include <rdma/rdma_netlink.h> 55 53 #include <crypto/hash.h> 56 54 57 55 #include "i40iw_status.h" ··· 252 254 u32 arp_table_size; 253 255 u32 next_arp_index; 254 256 spinlock_t resource_lock; /* hw resource access */ 257 + spinlock_t qptable_lock; 255 258 u32 vendor_id; 256 259 u32 vendor_part_id; 257 260 u32 of_device_registered; ··· 391 392 392 393 void i40iw_manage_arp_cache(struct i40iw_device *iwdev, 393 394 unsigned char *mac_addr, 394 - __be32 *ip_addr, 395 + u32 *ip_addr, 395 396 bool ipv4, 396 397 u32 action); 397 398 ··· 549 550 struct i40iw_qp_flush_info *info, 550 551 bool wait); 551 552 552 - void i40iw_copy_ip_ntohl(u32 *dst, u32 *src); 553 + void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src); 553 554 struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd, 554 555 u64 addr, 555 556 u64 size,
+77 -71
drivers/infiniband/hw/i40iw/i40iw_cm.c
··· 771 771 { 772 772 struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr; 773 773 struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg; 774 + u16 ctrl_ird, ctrl_ord; 774 775 775 776 /* initialize the upper 5 bytes of the frame */ 776 777 i40iw_build_mpa_v1(cm_node, start_addr, mpa_key); ··· 780 779 781 780 /* initialize RTR msg */ 782 781 if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { 783 - rtr_msg->ctrl_ird = IETF_NO_IRD_ORD; 784 - rtr_msg->ctrl_ord = IETF_NO_IRD_ORD; 782 + ctrl_ird = IETF_NO_IRD_ORD; 783 + ctrl_ord = IETF_NO_IRD_ORD; 785 784 } else { 786 - rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? 785 + ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? 787 786 IETF_NO_IRD_ORD : cm_node->ird_size; 788 - rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? 787 + ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? 789 788 IETF_NO_IRD_ORD : cm_node->ord_size; 790 789 } 791 790 792 - rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER; 793 - rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN; 791 + ctrl_ird |= IETF_PEER_TO_PEER; 792 + ctrl_ird |= IETF_FLPDU_ZERO_LEN; 794 793 795 794 switch (mpa_key) { 796 795 case MPA_KEY_REQUEST: 797 - rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE; 798 - rtr_msg->ctrl_ord |= IETF_RDMA0_READ; 796 + ctrl_ord |= IETF_RDMA0_WRITE; 797 + ctrl_ord |= IETF_RDMA0_READ; 799 798 break; 800 799 case MPA_KEY_REPLY: 801 800 switch (cm_node->send_rdma0_op) { 802 801 case SEND_RDMA_WRITE_ZERO: 803 - rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE; 802 + ctrl_ord |= IETF_RDMA0_WRITE; 804 803 break; 805 804 case SEND_RDMA_READ_ZERO: 806 - rtr_msg->ctrl_ord |= IETF_RDMA0_READ; 805 + ctrl_ord |= IETF_RDMA0_READ; 807 806 break; 808 807 } 809 808 break; 810 809 default: 811 810 break; 812 811 } 813 - rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird); 814 - rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord); 812 + rtr_msg->ctrl_ird = htons(ctrl_ird); 813 + rtr_msg->ctrl_ord = htons(ctrl_ord); 815 814 } 816 815 817 816 /** ··· 2108 2107 struct in6_addr raddr6; 2109 2108 2110 2109 i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr); 2111 - return (!memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6)); 2110 + return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6); 2112 2111 } 2113 2112 2114 2113 /** ··· 2161 2160 cm_node->tcp_cntxt.rcv_wnd = 2162 2161 I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; 2163 2162 ts = current_kernel_time(); 2164 - cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); 2163 + cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; 2165 2164 cm_node->tcp_cntxt.mss = iwdev->mss; 2166 2165 2167 2166 cm_node->iwdev = iwdev; ··· 2235 2234 if (cm_node->listener) { 2236 2235 i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); 2237 2236 } else { 2238 - if (!i40iw_listen_port_in_use(cm_core, htons(cm_node->loc_port)) && 2237 + if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) && 2239 2238 cm_node->apbvt_set && cm_node->iwdev) { 2240 2239 i40iw_manage_apbvt(cm_node->iwdev, 2241 2240 cm_node->loc_port, ··· 2853 2852 void *private_data, 2854 2853 struct i40iw_cm_info *cm_info) 2855 2854 { 2856 - int ret; 2857 2855 struct i40iw_cm_node *cm_node; 2858 2856 struct i40iw_cm_listener *loopback_remotelistener; 2859 2857 struct i40iw_cm_node *loopback_remotenode; ··· 2922 2922 memcpy(cm_node->pdata_buf, private_data, private_data_len); 2923 2923 2924 2924 cm_node->state = I40IW_CM_STATE_SYN_SENT; 2925 - ret = i40iw_send_syn(cm_node, 0); 2926 - 2927 - if (ret) { 2928 - if (cm_node->ipv4) 2929 - i40iw_debug(cm_node->dev, 2930 - I40IW_DEBUG_CM, 2931 - "Api - connect() FAILED: dest addr=%pI4", 2932 - cm_node->rem_addr); 2933 - else 2934 - i40iw_debug(cm_node->dev, I40IW_DEBUG_CM, 2935 - "Api - connect() FAILED: dest addr=%pI6", 2936 - cm_node->rem_addr); 2937 - i40iw_rem_ref_cm_node(cm_node); 2938 - cm_node = NULL; 2939 - } 2940 - 2941 - if (cm_node) 2942 - i40iw_debug(cm_node->dev, 2943 - I40IW_DEBUG_CM, 2944 - "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n", 2945 - cm_node->rem_port, 2946 - cm_node, 2947 - cm_node->cm_id); 2948 - 2949 2925 return cm_node; 2950 2926 } 2951 2927 ··· 3242 3266 3243 3267 tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]); 3244 3268 tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]); 3245 - tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(iwqp->iwdev, 3246 - &tcp_info->dest_ip_addr3, 3247 - true, 3248 - NULL, 3249 - I40IW_ARP_RESOLVE)); 3269 + tcp_info->arp_idx = 3270 + cpu_to_le16((u16)i40iw_arp_table( 3271 + iwqp->iwdev, 3272 + &tcp_info->dest_ip_addr3, 3273 + true, 3274 + NULL, 3275 + I40IW_ARP_RESOLVE)); 3250 3276 } else { 3251 3277 tcp_info->src_port = cpu_to_le16(cm_node->loc_port); 3252 3278 tcp_info->dst_port = cpu_to_le16(cm_node->rem_port); ··· 3260 3282 tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]); 3261 3283 tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]); 3262 3284 tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]); 3263 - tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table( 3264 - iwqp->iwdev, 3265 - &tcp_info->dest_ip_addr0, 3266 - false, 3267 - NULL, 3268 - I40IW_ARP_RESOLVE)); 3285 + tcp_info->arp_idx = 3286 + cpu_to_le16((u16)i40iw_arp_table( 3287 + iwqp->iwdev, 3288 + &tcp_info->dest_ip_addr0, 3289 + false, 3290 + NULL, 3291 + I40IW_ARP_RESOLVE)); 3269 3292 } 3270 3293 } 3271 3294 ··· 3543 3564 struct i40iw_cm_node *cm_node; 3544 3565 struct ib_qp_attr attr; 3545 3566 int passive_state; 3546 - struct i40iw_ib_device *iwibdev; 3547 3567 struct ib_mr *ibmr; 3548 3568 struct i40iw_pd *iwpd; 3549 3569 u16 buf_len = 0; ··· 3605 3627 !i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) || 3606 3628 (!cm_node->ipv4 && 3607 3629 !i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) { 3608 - iwibdev = iwdev->iwibdev; 3609 3630 iwpd = iwqp->iwpd; 3610 3631 tagged_offset = (uintptr_t)iwqp->ietf_mem.va; 3611 3632 ibmr = i40iw_reg_phys_mr(&iwpd->ibpd, ··· 3729 3752 struct sockaddr_in *raddr; 3730 3753 struct sockaddr_in6 *laddr6; 3731 3754 struct sockaddr_in6 *raddr6; 3755 + bool qhash_set = false; 3732 3756 int apbvt_set = 0; 3733 3757 enum i40iw_status_code status; 3734 3758 ··· 3788 3810 true); 3789 3811 if (status) 3790 3812 return -EINVAL; 3813 + qhash_set = true; 3791 3814 } 3792 3815 status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD); 3793 3816 if (status) { ··· 3807 3828 conn_param->private_data_len, 3808 3829 (void *)conn_param->private_data, 3809 3830 &cm_info); 3810 - if (!cm_node) { 3811 - i40iw_manage_qhash(iwdev, 3812 - &cm_info, 3813 - I40IW_QHASH_TYPE_TCP_ESTABLISHED, 3814 - I40IW_QHASH_MANAGE_TYPE_DELETE, 3815 - NULL, 3816 - false); 3817 - 3818 - if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core, 3819 - cm_info.loc_port)) 3820 - i40iw_manage_apbvt(iwdev, 3821 - cm_info.loc_port, 3822 - I40IW_MANAGE_APBVT_DEL); 3823 - cm_id->rem_ref(cm_id); 3824 - iwdev->cm_core.stats_connect_errs++; 3825 - return -ENOMEM; 3826 - } 3831 + if (!cm_node) 3832 + goto err; 3827 3833 3828 3834 i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); 3829 3835 if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && ··· 3816 3852 cm_node->ord_size = 1; 3817 3853 3818 3854 cm_node->apbvt_set = apbvt_set; 3819 - cm_node->qhash_set = true; 3855 + cm_node->qhash_set = qhash_set; 3820 3856 iwqp->cm_node = cm_node; 3821 3857 cm_node->iwqp = iwqp; 3822 3858 iwqp->cm_id = cm_id; 3823 3859 i40iw_add_ref(&iwqp->ibqp); 3860 + 3861 + if (cm_node->state == I40IW_CM_STATE_SYN_SENT) { 3862 + if (i40iw_send_syn(cm_node, 0)) { 3863 + i40iw_rem_ref_cm_node(cm_node); 3864 + goto err; 3865 + } 3866 + } 3867 + 3868 + i40iw_debug(cm_node->dev, 3869 + I40IW_DEBUG_CM, 3870 + "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n", 3871 + cm_node->rem_port, 3872 + cm_node, 3873 + cm_node->cm_id); 3824 3874 return 0; 3875 + 3876 + err: 3877 + if (cm_node) { 3878 + if (cm_node->ipv4) 3879 + i40iw_debug(cm_node->dev, 3880 + I40IW_DEBUG_CM, 3881 + "Api - connect() FAILED: dest addr=%pI4", 3882 + cm_node->rem_addr); 3883 + else 3884 + i40iw_debug(cm_node->dev, I40IW_DEBUG_CM, 3885 + "Api - connect() FAILED: dest addr=%pI6", 3886 + cm_node->rem_addr); 3887 + } 3888 + i40iw_manage_qhash(iwdev, 3889 + &cm_info, 3890 + I40IW_QHASH_TYPE_TCP_ESTABLISHED, 3891 + I40IW_QHASH_MANAGE_TYPE_DELETE, 3892 + NULL, 3893 + false); 3894 + 3895 + if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core, 3896 + cm_info.loc_port)) 3897 + i40iw_manage_apbvt(iwdev, 3898 + cm_info.loc_port, 3899 + I40IW_MANAGE_APBVT_DEL); 3900 + cm_id->rem_ref(cm_id); 3901 + iwdev->cm_core.stats_connect_errs++; 3902 + return -ENOMEM; 3825 3903 } 3826 3904 3827 3905 /**
+1 -9
drivers/infiniband/hw/i40iw/i40iw_cm.h
··· 1 1 /******************************************************************************* 2 2 * 3 - * Copyright (c) 2015 Intel Corporation. All rights reserved. 3 + * Copyright (c) 2015-2016 Intel Corporation. All rights reserved. 4 4 * 5 5 * This software is available to you under a choice of one of two 6 6 * licenses. You may choose to be licensed under the terms of the GNU ··· 291 291 u8 loc_mac[ETH_ALEN]; 292 292 u32 loc_addr[4]; 293 293 u16 loc_port; 294 - u32 map_loc_addr[4]; 295 - u16 map_loc_port; 296 294 struct iw_cm_id *cm_id; 297 295 atomic_t ref_count; 298 296 struct i40iw_device *iwdev; ··· 315 317 struct i40iw_cm_node { 316 318 u32 loc_addr[4], rem_addr[4]; 317 319 u16 loc_port, rem_port; 318 - u32 map_loc_addr[4], map_rem_addr[4]; 319 - u16 map_loc_port, map_rem_port; 320 320 u16 vlan_id; 321 321 enum i40iw_cm_node_state state; 322 322 u8 loc_mac[ETH_ALEN]; ··· 366 370 u16 rem_port; 367 371 u32 loc_addr[4]; 368 372 u32 rem_addr[4]; 369 - u16 map_loc_port; 370 - u16 map_rem_port; 371 - u32 map_loc_addr[4]; 372 - u32 map_rem_addr[4]; 373 373 u16 vlan_id; 374 374 int backlog; 375 375 u16 user_pri;
+137 -48
drivers/infiniband/hw/i40iw/i40iw_ctrl.c
··· 114 114 * i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer 115 115 * @buf: ptr to fpm commit buffer 116 116 * @info: ptr to i40iw_hmc_obj_info struct 117 + * @sd: number of SDs for HMC objects 117 118 * 118 119 * parses fpm commit info and copy base value 119 120 * of hmc objects in hmc_info 120 121 */ 121 122 static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( 122 123 u64 *buf, 123 - struct i40iw_hmc_obj_info *info) 124 + struct i40iw_hmc_obj_info *info, 125 + u32 *sd) 124 126 { 125 127 u64 temp; 128 + u64 size; 129 + u64 base = 0; 126 130 u32 i, j; 131 + u32 k = 0; 127 132 u32 low; 128 133 129 134 /* copy base values in obj_info */ ··· 136 131 i <= I40IW_HMC_IW_PBLE; i++, j += 8) { 137 132 get_64bit_val(buf, j, &temp); 138 133 info[i].base = RS_64_1(temp, 32) * 512; 134 + if (info[i].base > base) { 135 + base = info[i].base; 136 + k = i; 137 + } 139 138 low = (u32)(temp); 140 139 if (low) 141 140 info[i].cnt = low; 142 141 } 142 + size = info[k].cnt * info[k].size + info[k].base; 143 + if (size & 0x1FFFFF) 144 + *sd = (u32)((size >> 21) + 1); /* add 1 for remainder */ 145 + else 146 + *sd = (u32)(size >> 21); 147 + 143 148 return 0; 144 149 } 145 150 ··· 2924 2909 } 2925 2910 2926 2911 /** 2912 + * i40iw_sc_mr_fast_register - Posts RDMA fast register mr WR to iwarp qp 2913 + * @qp: sc qp struct 2914 + * @info: fast mr info 2915 + * @post_sq: flag for cqp db to ring 2916 + */ 2917 + enum i40iw_status_code i40iw_sc_mr_fast_register( 2918 + struct i40iw_sc_qp *qp, 2919 + struct i40iw_fast_reg_stag_info *info, 2920 + bool post_sq) 2921 + { 2922 + u64 temp, header; 2923 + u64 *wqe; 2924 + u32 wqe_idx; 2925 + 2926 + wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 2927 + 0, info->wr_id); 2928 + if (!wqe) 2929 + return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 2930 + 2931 + i40iw_debug(qp->dev, I40IW_DEBUG_MR, "%s: wr_id[%llxh] wqe_idx[%04d] location[%p]\n", 2932 + __func__, info->wr_id, wqe_idx, 2933 + &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid); 2934 + temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo; 2935 + set_64bit_val(wqe, 0, temp); 2936 + 2937 + temp = RS_64(info->first_pm_pbl_index >> 16, I40IWQPSQ_FIRSTPMPBLIDXHI); 2938 + set_64bit_val(wqe, 2939 + 8, 2940 + LS_64(temp, I40IWQPSQ_FIRSTPMPBLIDXHI) | 2941 + LS_64(info->reg_addr_pa >> I40IWQPSQ_PBLADDR_SHIFT, I40IWQPSQ_PBLADDR)); 2942 + 2943 + set_64bit_val(wqe, 2944 + 16, 2945 + info->total_len | 2946 + LS_64(info->first_pm_pbl_index, I40IWQPSQ_FIRSTPMPBLIDXLO)); 2947 + 2948 + header = LS_64(info->stag_key, I40IWQPSQ_STAGKEY) | 2949 + LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) | 2950 + LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) | 2951 + LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) | 2952 + LS_64(info->page_size, I40IWQPSQ_HPAGESIZE) | 2953 + LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) | 2954 + LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) | 2955 + LS_64(info->read_fence, I40IWQPSQ_READFENCE) | 2956 + LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | 2957 + LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | 2958 + LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID); 2959 + 2960 + i40iw_insert_wqe_hdr(wqe, header); 2961 + 2962 + i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "FAST_REG WQE", 2963 + wqe, I40IW_QP_WQE_MIN_SIZE); 2964 + 2965 + if (post_sq) 2966 + i40iw_qp_post_wr(&qp->qp_uk); 2967 + return 0; 2968 + } 2969 + 2970 + /** 2927 2971 * i40iw_sc_send_lsmm - send last streaming mode message 2928 2972 * @qp: sc qp struct 2929 2973 * @lsmm_buf: buffer with lsmm message ··· 3221 3147 i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id); 3222 3148 3223 3149 /* parse the fpm_commit_buf and fill hmc obj info */ 3224 - i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj); 3150 + i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj, &hmc_info->sd_table.sd_cnt); 3225 3151 mem_size = sizeof(struct i40iw_hmc_sd_entry) * 3226 3152 (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index); 3227 3153 ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size); ··· 3295 3221 3296 3222 /* parse the fpm_commit_buf and fill hmc obj info */ 3297 3223 if (!ret_code) 3298 - ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf, hmc_info->hmc_obj); 3224 + ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf, 3225 + hmc_info->hmc_obj, 3226 + &hmc_info->sd_table.sd_cnt); 3299 3227 3300 3228 i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER", 3301 3229 commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE); ··· 3545 3469 } 3546 3470 3547 3471 /** 3472 + * i40iw_est_sd - returns approximate number of SDs for HMC 3473 + * @dev: sc device struct 3474 + * @hmc_info: hmc structure, size and count for HMC objects 3475 + */ 3476 + static u64 i40iw_est_sd(struct i40iw_sc_dev *dev, struct i40iw_hmc_info *hmc_info) 3477 + { 3478 + int i; 3479 + u64 size = 0; 3480 + u64 sd; 3481 + 3482 + for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_PBLE; i++) 3483 + size += hmc_info->hmc_obj[i].cnt * hmc_info->hmc_obj[i].size; 3484 + 3485 + if (dev->is_pf) 3486 + size += hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size; 3487 + 3488 + if (size & 0x1FFFFF) 3489 + sd = (size >> 21) + 1; /* add 1 for remainder */ 3490 + else 3491 + sd = size >> 21; 3492 + 3493 + if (!dev->is_pf) { 3494 + /* 2MB alignment for VF PBLE HMC */ 3495 + size = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size; 3496 + if (size & 0x1FFFFF) 3497 + sd += (size >> 21) + 1; /* add 1 for remainder */ 3498 + else 3499 + sd += size >> 21; 3500 + } 3501 + 3502 + return sd; 3503 + } 3504 + 3505 + /** 3548 3506 * i40iw_config_fpm_values - configure HMC objects 3549 3507 * @dev: sc device struct 3550 3508 * @qp_count: desired qp count ··· 3589 3479 u32 i, mem_size; 3590 3480 u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted; 3591 3481 u32 powerof2; 3592 - u64 sd_needed, bytes_needed; 3482 + u64 sd_needed; 3593 3483 u32 loop_count = 0; 3594 3484 3595 3485 struct i40iw_hmc_info *hmc_info; ··· 3607 3497 return ret_code; 3608 3498 } 3609 3499 3610 - bytes_needed = 0; 3611 - for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) { 3500 + for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) 3612 3501 hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt; 3613 - bytes_needed += 3614 - (hmc_info->hmc_obj[i].max_cnt) * (hmc_info->hmc_obj[i].size); 3615 - i40iw_debug(dev, I40IW_DEBUG_HMC, 3616 - "%s i[%04d] max_cnt[0x%04X] size[0x%04llx]\n", 3617 - __func__, i, hmc_info->hmc_obj[i].max_cnt, 3618 - hmc_info->hmc_obj[i].size); 3619 - } 3620 - sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up */ 3502 + sd_needed = i40iw_est_sd(dev, hmc_info); 3621 3503 i40iw_debug(dev, I40IW_DEBUG_HMC, 3622 3504 "%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n", 3623 3505 __func__, sd_needed, hmc_info->first_sd_index); 3624 3506 i40iw_debug(dev, I40IW_DEBUG_HMC, 3625 - "%s: bytes_needed=0x%llx sd count %d where max sd is %d\n", 3626 - __func__, bytes_needed, hmc_info->sd_table.sd_cnt, 3507 + "%s: sd count %d where max sd is %d\n", 3508 + __func__, hmc_info->sd_table.sd_cnt, 3627 3509 hmc_fpm_misc->max_sds); 3628 3510 3629 3511 qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt); ··· 3657 3555 hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted; 3658 3556 3659 3557 /* How much memory is needed for all the objects. */ 3660 - bytes_needed = 0; 3661 - for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) 3662 - bytes_needed += 3663 - (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size); 3664 - sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; 3558 + sd_needed = i40iw_est_sd(dev, hmc_info); 3665 3559 if ((loop_count > 1000) || 3666 3560 ((!(loop_count % 10)) && 3667 3561 (qpwanted > qpwantedoriginal * 2 / 3))) { ··· 3678 3580 pblewanted -= FPM_MULTIPLIER * 1000; 3679 3581 } while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000); 3680 3582 3681 - bytes_needed = 0; 3682 - for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) { 3683 - bytes_needed += (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size); 3684 - i40iw_debug(dev, I40IW_DEBUG_HMC, 3685 - "%s i[%04d] cnt[0x%04x] size[0x%04llx]\n", 3686 - __func__, i, hmc_info->hmc_obj[i].cnt, 3687 - hmc_info->hmc_obj[i].size); 3688 - } 3689 - sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up not truncate. */ 3583 + sd_needed = i40iw_est_sd(dev, hmc_info); 3690 3584 3691 3585 i40iw_debug(dev, I40IW_DEBUG_HMC, 3692 3586 "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n", ··· 3695 3605 i40iw_rd32(dev->hw, dev->is_pf ? I40E_PFPE_CQPERRCODES : I40E_VFPE_CQPERRCODES1)); 3696 3606 return ret_code; 3697 3607 } 3698 - 3699 - hmc_info->sd_table.sd_cnt = (u32)sd_needed; 3700 3608 3701 3609 mem_size = sizeof(struct i40iw_hmc_sd_entry) * 3702 3610 (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1); ··· 3999 3911 */ 4000 3912 static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt) 4001 3913 { 4002 - u16 *mpa; 3914 + __be16 *mpa; 4003 3915 u32 opcode = 0xffffffff; 4004 3916 4005 3917 if (info->q2_data_written) { 4006 - mpa = (u16 *)pkt; 3918 + mpa = (__be16 *)pkt; 4007 3919 opcode = ntohs(mpa[1]) & 0xf; 4008 3920 } 4009 3921 return opcode; ··· 4065 3977 if (info->q2_data_written) { 4066 3978 /* Use data from offending packet to fill in ddp & rdma hdrs */ 4067 3979 pkt = i40iw_locate_mpa(pkt); 4068 - ddp_seg_len = ntohs(*(u16 *)pkt); 3980 + ddp_seg_len = ntohs(*(__be16 *)pkt); 4069 3981 if (ddp_seg_len) { 4070 3982 copy_len = 2; 4071 3983 termhdr->hdrct = DDP_LEN_FLAG; ··· 4276 4188 void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info) 4277 4189 { 4278 4190 u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET; 4279 - u32 *mpa; 4191 + __be32 *mpa; 4280 4192 u8 ddp_ctl; 4281 4193 u8 rdma_ctl; 4282 4194 u16 aeq_id = 0; 4283 4195 struct i40iw_terminate_hdr *termhdr; 4284 4196 4285 - mpa = (u32 *)i40iw_locate_mpa(pkt); 4197 + mpa = (__be32 *)i40iw_locate_mpa(pkt); 4286 4198 if (info->q2_data_written) { 4287 4199 /* did not validate the frame - do it now */ 4288 4200 ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff; ··· 4647 4559 }; 4648 4560 4649 4561 static struct i40iw_priv_qp_ops iw_priv_qp_ops = { 4650 - i40iw_sc_qp_init, 4651 - i40iw_sc_qp_create, 4652 - i40iw_sc_qp_modify, 4653 - i40iw_sc_qp_destroy, 4654 - i40iw_sc_qp_flush_wqes, 4655 - i40iw_sc_qp_upload_context, 4656 - i40iw_sc_qp_setctx, 4657 - i40iw_sc_send_lsmm, 4658 - i40iw_sc_send_lsmm_nostag, 4659 - i40iw_sc_send_rtt, 4660 - i40iw_sc_post_wqe0, 4562 + .qp_init = i40iw_sc_qp_init, 4563 + .qp_create = i40iw_sc_qp_create, 4564 + .qp_modify = i40iw_sc_qp_modify, 4565 + .qp_destroy = i40iw_sc_qp_destroy, 4566 + .qp_flush_wqes = i40iw_sc_qp_flush_wqes, 4567 + .qp_upload_context = i40iw_sc_qp_upload_context, 4568 + .qp_setctx = i40iw_sc_qp_setctx, 4569 + .qp_send_lsmm = i40iw_sc_send_lsmm, 4570 + .qp_send_lsmm_nostag = i40iw_sc_send_lsmm_nostag, 4571 + .qp_send_rtt = i40iw_sc_send_rtt, 4572 + .qp_post_wqe0 = i40iw_sc_post_wqe0, 4573 + .iw_mr_fast_register = i40iw_sc_mr_fast_register 4661 4574 }; 4662 4575 4663 4576 static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
+3 -1
drivers/infiniband/hw/i40iw/i40iw_d.h
··· 1290 1290 1291 1291 /* wqe size considering 32 bytes per wqe*/ 1292 1292 #define I40IWQP_SW_MIN_WQSIZE 4 /* 128 bytes */ 1293 - #define I40IWQP_SW_MAX_WQSIZE 16384 /* 524288 bytes */ 1293 + #define I40IWQP_SW_MAX_WQSIZE 2048 /* 2048 bytes */ 1294 1294 1295 1295 #define I40IWQP_OP_RDMA_WRITE 0 1296 1296 #define I40IWQP_OP_RDMA_READ 1 ··· 1511 1511 I40IW_CQ0_ALIGNMENT = 0x100, 1512 1512 I40IW_SD_BUF_ALIGNMENT = 0x100 1513 1513 }; 1514 + 1515 + #define I40IW_WQE_SIZE_64 64 1514 1516 1515 1517 #define I40IW_QP_WQE_MIN_SIZE 32 1516 1518 #define I40IW_QP_WQE_MAX_SIZE 128
+11 -3
drivers/infiniband/hw/i40iw/i40iw_hw.c
··· 106 106 set_bit(2, iwdev->allocated_pds); 107 107 108 108 spin_lock_init(&iwdev->resource_lock); 109 - mrdrvbits = 24 - get_count_order(iwdev->max_mr); 109 + spin_lock_init(&iwdev->qptable_lock); 110 + /* stag index mask has a minimum of 14 bits */ 111 + mrdrvbits = 24 - max(get_count_order(iwdev->max_mr), 14); 110 112 iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits)); 111 113 return 0; 112 114 } ··· 303 301 "%s ae_id = 0x%x bool qp=%d qp_id = %d\n", 304 302 __func__, info->ae_id, info->qp, info->qp_cq_id); 305 303 if (info->qp) { 304 + spin_lock_irqsave(&iwdev->qptable_lock, flags); 306 305 iwqp = iwdev->qp_table[info->qp_cq_id]; 307 306 if (!iwqp) { 307 + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); 308 308 i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id); 309 309 continue; 310 310 } 311 + i40iw_add_ref(&iwqp->ibqp); 312 + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); 311 313 qp = &iwqp->sc_qp; 312 314 spin_lock_irqsave(&iwqp->lock, flags); 313 315 iwqp->hw_tcp_state = info->tcp_state; ··· 417 411 i40iw_terminate_connection(qp, info); 418 412 break; 419 413 } 414 + if (info->qp) 415 + i40iw_rem_ref(&iwqp->ibqp); 420 416 } while (1); 421 417 422 418 if (aeqcnt) ··· 468 460 */ 469 461 void i40iw_manage_arp_cache(struct i40iw_device *iwdev, 470 462 unsigned char *mac_addr, 471 - __be32 *ip_addr, 463 + u32 *ip_addr, 472 464 bool ipv4, 473 465 u32 action) 474 466 { ··· 489 481 cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY; 490 482 info = &cqp_info->in.u.add_arp_cache_entry.info; 491 483 memset(info, 0, sizeof(*info)); 492 - info->arp_index = cpu_to_le32(arp_index); 484 + info->arp_index = cpu_to_le16((u16)arp_index); 493 485 info->permanent = true; 494 486 ether_addr_copy(info->mac_addr, mac_addr); 495 487 cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
+42 -14
drivers/infiniband/hw/i40iw/i40iw_main.c
··· 270 270 i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0); 271 271 else 272 272 i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0); 273 - synchronize_irq(msix_vec->irq); 274 273 free_irq(msix_vec->irq, dev_id); 275 274 } 276 275 ··· 1146 1147 if (!status) { 1147 1148 status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr, 1148 1149 (u8)iwdev->mac_ip_table_idx); 1149 - if (!status) 1150 - status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr, 1151 - (u8)iwdev->mac_ip_table_idx); 1152 - else 1150 + if (status) 1153 1151 i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx); 1154 1152 } 1155 1153 return status; ··· 1161 1165 struct net_device *ip_dev; 1162 1166 struct inet6_dev *idev; 1163 1167 struct inet6_ifaddr *ifp; 1164 - __be32 local_ipaddr6[4]; 1168 + u32 local_ipaddr6[4]; 1165 1169 1166 1170 rcu_read_lock(); 1167 1171 for_each_netdev_rcu(&init_net, ip_dev) { ··· 1508 1512 I40IW_HMC_PROFILE_DEFAULT; 1509 1513 iwdev->max_rdma_vfs = 1510 1514 (iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ? max_rdma_vfs : 0; 1515 + iwdev->max_enabled_vfs = iwdev->max_rdma_vfs; 1511 1516 iwdev->netdev = ldev->netdev; 1512 1517 hdl->client = client; 1513 1518 iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS; ··· 1528 1531 goto exit; 1529 1532 iwdev->obj_next = iwdev->obj_mem; 1530 1533 iwdev->push_mode = push_mode; 1534 + 1531 1535 init_waitqueue_head(&iwdev->vchnl_waitq); 1536 + init_waitqueue_head(&dev->vf_reqs); 1537 + 1532 1538 status = i40iw_initialize_dev(iwdev, ldev); 1533 1539 exit: 1534 1540 if (status) { ··· 1710 1710 for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) { 1711 1711 if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id)) 1712 1712 continue; 1713 - 1714 1713 /* free all resources allocated on behalf of vf */ 1715 1714 tmp_vfdev = dev->vf_dev[i]; 1716 1715 spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags); ··· 1818 1819 dev = &hdl->device.sc_dev; 1819 1820 iwdev = dev->back_dev; 1820 1821 1821 - i40iw_debug(dev, I40IW_DEBUG_VIRT, "msg %p, message length %u\n", msg, len); 1822 - 1823 1822 if (dev->vchnl_if.vchnl_recv) { 1824 1823 ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len); 1825 1824 if (!dev->is_pf) { ··· 1826 1829 } 1827 1830 } 1828 1831 return ret_code; 1832 + } 1833 + 1834 + /** 1835 + * i40iw_vf_clear_to_send - wait to send virtual channel message 1836 + * @dev: iwarp device * 1837 + * Wait for until virtual channel is clear 1838 + * before sending the next message 1839 + * 1840 + * Returns false if error 1841 + * Returns true if clear to send 1842 + */ 1843 + bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev) 1844 + { 1845 + struct i40iw_device *iwdev; 1846 + wait_queue_t wait; 1847 + 1848 + iwdev = dev->back_dev; 1849 + 1850 + if (!wq_has_sleeper(&dev->vf_reqs) && 1851 + (atomic_read(&iwdev->vchnl_msgs) == 0)) 1852 + return true; /* virtual channel is clear */ 1853 + 1854 + init_wait(&wait); 1855 + add_wait_queue_exclusive(&dev->vf_reqs, &wait); 1856 + 1857 + if (!wait_event_timeout(dev->vf_reqs, 1858 + (atomic_read(&iwdev->vchnl_msgs) == 0), 1859 + I40IW_VCHNL_EVENT_TIMEOUT)) 1860 + dev->vchnl_up = false; 1861 + 1862 + remove_wait_queue(&dev->vf_reqs, &wait); 1863 + 1864 + return dev->vchnl_up; 1829 1865 } 1830 1866 1831 1867 /** ··· 1878 1848 { 1879 1849 struct i40iw_device *iwdev; 1880 1850 struct i40e_info *ldev; 1881 - enum i40iw_status_code ret_code = I40IW_ERR_BAD_PTR; 1882 1851 1883 1852 if (!dev || !dev->back_dev) 1884 - return ret_code; 1853 + return I40IW_ERR_BAD_PTR; 1885 1854 1886 1855 iwdev = dev->back_dev; 1887 1856 ldev = iwdev->ldev; 1888 1857 1889 1858 if (ldev && ldev->ops && ldev->ops->virtchnl_send) 1890 - ret_code = ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len); 1891 - 1892 - return ret_code; 1859 + return ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len); 1860 + return I40IW_ERR_BAD_PTR; 1893 1861 } 1894 1862 1895 1863 /* client interface functions */
+1
drivers/infiniband/hw/i40iw/i40iw_osdep.h
··· 172 172 u8 __iomem *i40iw_get_hw_addr(void *dev); 173 173 void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); 174 174 enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev); 175 + bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev); 175 176 enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr, 176 177 u32 length, u32 value); 177 178 struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf);
+5 -4
drivers/infiniband/hw/i40iw/i40iw_pble.c
··· 404 404 sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa; 405 405 if (sd_entry->valid) 406 406 return 0; 407 - if (dev->is_pf) 407 + if (dev->is_pf) { 408 408 ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id, 409 409 sd_reg_val, idx->sd_idx, 410 410 sd_entry->entry_type, true); 411 - if (ret_code) { 412 - i40iw_pr_err("cqp cmd failed for sd (pbles)\n"); 413 - goto error; 411 + if (ret_code) { 412 + i40iw_pr_err("cqp cmd failed for sd (pbles)\n"); 413 + goto error; 414 + } 414 415 } 415 416 416 417 sd_entry->valid = true;
+1 -1
drivers/infiniband/hw/i40iw/i40iw_puda.c
··· 1194 1194 1195 1195 ioffset = (u16)(buf->data - (u8 *)buf->mem.va); 1196 1196 while (datalen) { 1197 - fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(u16 *)datap)); 1197 + fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(__be16 *)datap)); 1198 1198 if (fpdu_len > pfpdu->max_fpdu_data) { 1199 1199 i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ, 1200 1200 "%s: error bad fpdu_len\n", __func__);
+1
drivers/infiniband/hw/i40iw/i40iw_status.h
··· 95 95 I40IW_ERR_INVALID_MAC_ADDR = -65, 96 96 I40IW_ERR_BAD_STAG = -66, 97 97 I40IW_ERR_CQ_COMPL_ERROR = -67, 98 + I40IW_ERR_QUEUE_DESTROYED = -68 98 99 99 100 }; 100 101 #endif
+9 -5
drivers/infiniband/hw/i40iw/i40iw_type.h
··· 479 479 struct i40iw_virt_mem ieq_mem; 480 480 struct i40iw_puda_rsrc *ieq; 481 481 482 - struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; 482 + const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; 483 483 484 484 struct i40iw_hmc_fpm_misc hmc_fpm_misc; 485 485 u16 qs_handle; 486 - u32 debug_mask; 486 + u32 debug_mask; 487 487 u16 exception_lan_queue; 488 488 u8 hmc_fn_id; 489 489 bool is_pf; 490 490 bool vchnl_up; 491 491 u8 vf_id; 492 + wait_queue_head_t vf_reqs; 492 493 u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY]; 493 494 struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf; 494 495 u8 hw_rev; ··· 890 889 u32 qp_num; 891 890 u32 dest_ip[4]; 892 891 u32 src_ip[4]; 893 - u32 dest_port; 894 - u32 src_port; 892 + u16 dest_port; 893 + u16 src_port; 895 894 }; 896 895 897 896 struct i40iw_local_mac_ipaddr_entry_info { ··· 1041 1040 void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32); 1042 1041 void (*qp_send_rtt)(struct i40iw_sc_qp *, bool); 1043 1042 enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8); 1043 + enum i40iw_status_code (*iw_mr_fast_register)(struct i40iw_sc_qp *, 1044 + struct i40iw_fast_reg_stag_info *, 1045 + bool); 1044 1046 }; 1045 1047 1046 1048 struct i40iw_priv_cq_ops { ··· 1112 1108 enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *, 1113 1109 struct i40iw_hmc_fpm_misc *); 1114 1110 enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8); 1115 - enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *); 1111 + enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *, u32 *sd); 1116 1112 enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev, 1117 1113 struct i40iw_hmc_create_obj_info *); 1118 1114 enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev,
+55 -51
drivers/infiniband/hw/i40iw/i40iw_uk.c
··· 56 56 57 57 wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); 58 58 wqe = qp->sq_base[wqe_idx].elem; 59 + 60 + qp->sq_wrtrk_array[wqe_idx].wqe_size = I40IW_QP_WQE_MIN_SIZE; 61 + 59 62 peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size; 60 63 wqe_0 = qp->sq_base[peek_head].elem; 61 64 if (peek_head) ··· 133 130 */ 134 131 u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, 135 132 u32 *wqe_idx, 136 - u8 wqe_size) 133 + u8 wqe_size, 134 + u32 total_size, 135 + u64 wr_id 136 + ) 137 137 { 138 138 u64 *wqe = NULL; 139 139 u64 wqe_ptr; ··· 165 159 if (!*wqe_idx) 166 160 qp->swqe_polarity = !qp->swqe_polarity; 167 161 } 162 + 163 + if (((*wqe_idx & 3) == 1) && (wqe_size == I40IW_WQE_SIZE_64)) { 164 + i40iw_nop_1(qp); 165 + I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code); 166 + if (ret_code) 167 + return NULL; 168 + *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); 169 + if (!*wqe_idx) 170 + qp->swqe_polarity = !qp->swqe_polarity; 171 + } 172 + 168 173 for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) { 169 174 I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code); 170 175 if (ret_code) ··· 186 169 187 170 peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); 188 171 wqe_0 = qp->sq_base[peek_head].elem; 189 - if (peek_head & 0x3) 190 - wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID); 172 + 173 + if (((peek_head & 3) == 1) || ((peek_head & 3) == 3)) { 174 + if (RS_64(wqe_0[3], I40IWQPSQ_VALID) != !qp->swqe_polarity) 175 + wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID); 176 + } 177 + 178 + qp->sq_wrtrk_array[*wqe_idx].wrid = wr_id; 179 + qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; 180 + qp->sq_wrtrk_array[*wqe_idx].wqe_size = wqe_size; 191 181 return wqe; 192 182 } 193 183 ··· 273 249 if (ret_code) 274 250 return ret_code; 275 251 276 - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); 252 + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id); 277 253 if (!wqe) 278 254 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 279 - 280 - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; 281 - qp->sq_wrtrk_array[wqe_idx].wr_len = total_size; 282 255 set_64bit_val(wqe, 16, 283 256 LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO)); 284 257 if (!op_info->rem_addr.stag) ··· 330 309 ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size); 331 310 if (ret_code) 332 311 return ret_code; 333 - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); 312 + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->lo_addr.len, info->wr_id); 334 313 if (!wqe) 335 314 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 336 - 337 - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; 338 - qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->lo_addr.len; 339 315 local_fence |= info->local_fence; 340 316 341 317 set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO)); ··· 384 366 if (ret_code) 385 367 return ret_code; 386 368 387 - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); 369 + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id); 388 370 if (!wqe) 389 371 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 390 372 391 373 read_fence |= info->read_fence; 392 - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; 393 - qp->sq_wrtrk_array[wqe_idx].wr_len = total_size; 394 374 set_64bit_val(wqe, 16, 0); 395 375 header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) | 396 376 LS_64(info->op_type, I40IWQPSQ_OPCODE) | ··· 443 427 if (ret_code) 444 428 return ret_code; 445 429 446 - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); 430 + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id); 447 431 if (!wqe) 448 432 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 449 433 450 434 read_fence |= info->read_fence; 451 - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; 452 - qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len; 453 435 set_64bit_val(wqe, 16, 454 436 LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO)); 455 437 ··· 521 507 if (ret_code) 522 508 return ret_code; 523 509 524 - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); 510 + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id); 525 511 if (!wqe) 526 512 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 527 513 528 514 read_fence |= info->read_fence; 529 - 530 - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; 531 - qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len; 532 515 header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) | 533 516 LS_64(info->op_type, I40IWQPSQ_OPCODE) | 534 517 LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | ··· 585 574 op_info = &info->op.inv_local_stag; 586 575 local_fence = info->local_fence; 587 576 588 - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE); 577 + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id); 589 578 if (!wqe) 590 579 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 591 - 592 - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; 593 - qp->sq_wrtrk_array[wqe_idx].wr_len = 0; 594 580 set_64bit_val(wqe, 0, 0); 595 581 set_64bit_val(wqe, 8, 596 582 LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG)); ··· 627 619 op_info = &info->op.bind_window; 628 620 629 621 local_fence |= info->local_fence; 630 - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE); 622 + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id); 631 623 if (!wqe) 632 624 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 633 - 634 - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; 635 - qp->sq_wrtrk_array[wqe_idx].wr_len = 0; 636 625 set_64bit_val(wqe, 0, (uintptr_t)op_info->va); 637 626 set_64bit_val(wqe, 8, 638 627 LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) | ··· 765 760 enum i40iw_status_code ret_code2 = 0; 766 761 bool move_cq_head = true; 767 762 u8 polarity; 768 - u8 addl_frag_cnt, addl_wqes = 0; 763 + u8 addl_wqes = 0; 769 764 770 765 if (cq->avoid_mem_cflct) 771 766 cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq); ··· 802 797 info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ); 803 798 804 799 qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx; 800 + if (!qp) { 801 + ret_code = I40IW_ERR_QUEUE_DESTROYED; 802 + goto exit; 803 + } 805 804 wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX); 806 805 info->qp_handle = (i40iw_qp_handle)(unsigned long)qp; 807 806 ··· 836 827 info->op_type = (u8)RS_64(qword3, I40IWCQ_OP); 837 828 sw_wqe = qp->sq_base[wqe_idx].elem; 838 829 get_64bit_val(sw_wqe, 24, &wqe_qword); 839 - addl_frag_cnt = 840 - (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT); 841 - i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes); 842 830 843 - addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE); 831 + addl_wqes = qp->sq_wrtrk_array[wqe_idx].wqe_size / I40IW_QP_WQE_MIN_SIZE; 844 832 I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes)); 845 833 } else { 846 834 do { ··· 849 843 get_64bit_val(sw_wqe, 24, &wqe_qword); 850 844 op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE); 851 845 info->op_type = op_type; 852 - addl_frag_cnt = (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT); 853 - i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes); 854 - addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE); 846 + addl_wqes = qp->sq_wrtrk_array[tail].wqe_size / I40IW_QP_WQE_MIN_SIZE; 855 847 I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes)); 856 848 if (op_type != I40IWQP_OP_NOP) { 857 849 info->wr_id = qp->sq_wrtrk_array[tail].wrid; ··· 863 859 864 860 ret_code = 0; 865 861 862 + exit: 866 863 if (!ret_code && 867 864 (info->comp_status == I40IW_COMPL_STATUS_FLUSHED)) 868 865 if (pring && (I40IW_RING_MORE_WORK(*pring))) ··· 898 893 * i40iw_get_wqe_shift - get shift count for maximum wqe size 899 894 * @wqdepth: depth of wq required. 900 895 * @sge: Maximum Scatter Gather Elements wqe 896 + * @inline_data: Maximum inline data size 901 897 * @shift: Returns the shift needed based on sge 902 898 * 903 - * Shift can be used to left shift the wqe size based on sge. 904 - * If sge, == 1, shift =0 (wqe_size of 32 bytes), for sge=2 and 3, shift =1 905 - * (64 bytes wqes) and 2 otherwise (128 bytes wqe). 899 + * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size. 900 + * For 1 SGE or inline data <= 16, shift = 0 (wqe size of 32 bytes). 901 + * For 2 or 3 SGEs or inline data <= 48, shift = 1 (wqe size of 64 bytes). 902 + * Shift of 2 otherwise (wqe size of 128 bytes). 906 903 */ 907 - enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift) 904 + enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift) 908 905 { 909 906 u32 size; 910 907 911 908 *shift = 0; 912 - if (sge > 1) 913 - *shift = (sge < 4) ? 1 : 2; 909 + if (sge > 1 || inline_data > 16) 910 + *shift = (sge < 4 && inline_data <= 48) ? 1 : 2; 914 911 915 912 /* check if wqdepth is multiple of 2 or not */ 916 913 ··· 975 968 976 969 if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT) 977 970 return I40IW_ERR_INVALID_FRAG_COUNT; 978 - ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, &sqshift); 971 + ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, info->max_inline_data, &sqshift); 979 972 if (ret_code) 980 973 return ret_code; 981 974 982 - ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, &rqshift); 975 + ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, 0, &rqshift); 983 976 if (ret_code) 984 977 return ret_code; 985 978 ··· 1104 1097 u64 header, *wqe; 1105 1098 u32 wqe_idx; 1106 1099 1107 - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE); 1100 + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, wr_id); 1108 1101 if (!wqe) 1109 1102 return I40IW_ERR_QP_TOOMANY_WRS_POSTED; 1110 - 1111 - qp->sq_wrtrk_array[wqe_idx].wrid = wr_id; 1112 - qp->sq_wrtrk_array[wqe_idx].wr_len = 0; 1113 1103 set_64bit_val(wqe, 0, 0); 1114 1104 set_64bit_val(wqe, 8, 0); 1115 1105 set_64bit_val(wqe, 16, 0); ··· 1129 1125 * @frag_cnt: number of fragments 1130 1126 * @wqe_size: size of sq wqe returned 1131 1127 */ 1132 - enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size) 1128 + enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size) 1133 1129 { 1134 1130 switch (frag_cnt) { 1135 1131 case 0: ··· 1160 1156 * @frag_cnt: number of fragments 1161 1157 * @wqe_size: size of rq wqe returned 1162 1158 */ 1163 - enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size) 1159 + enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size) 1164 1160 { 1165 1161 switch (frag_cnt) { 1166 1162 case 0:
+22 -14
drivers/infiniband/hw/i40iw/i40iw_user.h
··· 61 61 I40IW_MAX_CQ_SIZE = 1048575, 62 62 I40IW_MAX_AEQ_ALLOCATE_COUNT = 255, 63 63 I40IW_DB_ID_ZERO = 0, 64 - I40IW_MAX_WQ_FRAGMENT_COUNT = 6, 64 + I40IW_MAX_WQ_FRAGMENT_COUNT = 3, 65 65 I40IW_MAX_SGE_RD = 1, 66 66 I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647, 67 67 I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647, ··· 70 70 I40IW_MAX_VF_FPM_ID = 47, 71 71 I40IW_MAX_VF_PER_PF = 127, 72 72 I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, 73 - I40IW_MAX_INLINE_DATA_SIZE = 112, 74 - I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 112, 73 + I40IW_MAX_INLINE_DATA_SIZE = 48, 74 + I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48, 75 75 I40IW_MAX_IRD_SIZE = 32, 76 76 I40IW_QPCTX_ENCD_MAXIRD = 3, 77 77 I40IW_MAX_WQ_ENTRIES = 2048, ··· 101 101 #define I40IW_STAG_KEY_FROM_STAG(stag) ((stag) && 0x000000FF) 102 102 103 103 #define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8) 104 + 105 + #define I40IW_MAX_MR_SIZE 0x10000000000L 104 106 105 107 struct i40iw_qp_uk; 106 108 struct i40iw_cq_uk; ··· 200 198 201 199 struct i40iw_post_send { 202 200 i40iw_sgl sg_list; 203 - u8 num_sges; 201 + u32 num_sges; 204 202 }; 205 203 206 204 struct i40iw_post_inline_send { ··· 222 220 223 221 struct i40iw_rdma_write { 224 222 i40iw_sgl lo_sg_list; 225 - u8 num_lo_sges; 223 + u32 num_lo_sges; 226 224 struct i40iw_sge rem_addr; 227 225 }; 228 226 ··· 347 345 348 346 struct i40iw_sq_uk_wr_trk_info { 349 347 u64 wrid; 350 - u64 wr_len; 348 + u32 wr_len; 349 + u8 wqe_size; 350 + u8 reserved[3]; 351 351 }; 352 352 353 353 struct i40iw_qp_quanta { ··· 371 367 u32 qp_id; 372 368 u32 sq_size; 373 369 u32 rq_size; 370 + u32 max_sq_frag_cnt; 371 + u32 max_rq_frag_cnt; 374 372 struct i40iw_qp_uk_ops ops; 375 373 bool use_srq; 376 374 u8 swqe_polarity; ··· 380 374 u8 rwqe_polarity; 381 375 u8 rq_wqe_size; 382 376 u8 rq_wqe_size_multiplier; 383 - u8 max_sq_frag_cnt; 384 - u8 max_rq_frag_cnt; 385 377 bool deferred_flag; 386 378 }; 387 379 ··· 408 404 u32 qp_id; 409 405 u32 sq_size; 410 406 u32 rq_size; 411 - u8 max_sq_frag_cnt; 412 - u8 max_rq_frag_cnt; 407 + u32 max_sq_frag_cnt; 408 + u32 max_rq_frag_cnt; 409 + u32 max_inline_data; 413 410 414 411 }; 415 412 ··· 427 422 428 423 void i40iw_qp_post_wr(struct i40iw_qp_uk *qp); 429 424 u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx, 430 - u8 wqe_size); 425 + u8 wqe_size, 426 + u32 total_size, 427 + u64 wr_id 428 + ); 431 429 u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx); 432 430 u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx); 433 431 ··· 442 434 void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq); 443 435 enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id, 444 436 bool signaled, bool post_sq); 445 - enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size); 446 - enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size); 437 + enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size); 438 + enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size); 447 439 enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size, 448 440 u8 *wqe_size); 449 - enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift); 441 + enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift); 450 442 #endif
+27 -22
drivers/infiniband/hw/i40iw/i40iw_utils.c
··· 59 59 * @action: modify, delete or add 60 60 */ 61 61 int i40iw_arp_table(struct i40iw_device *iwdev, 62 - __be32 *ip_addr, 62 + u32 *ip_addr, 63 63 bool ipv4, 64 64 u8 *mac_addr, 65 65 u32 action) ··· 152 152 struct net_device *upper_dev; 153 153 struct i40iw_device *iwdev; 154 154 struct i40iw_handler *hdl; 155 - __be32 local_ipaddr; 155 + u32 local_ipaddr; 156 156 157 157 hdl = i40iw_find_netdev(event_netdev); 158 158 if (!hdl) ··· 167 167 switch (event) { 168 168 case NETDEV_DOWN: 169 169 if (upper_dev) 170 - local_ipaddr = 171 - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; 170 + local_ipaddr = ntohl( 171 + ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); 172 172 else 173 - local_ipaddr = ifa->ifa_address; 174 - local_ipaddr = ntohl(local_ipaddr); 173 + local_ipaddr = ntohl(ifa->ifa_address); 175 174 i40iw_manage_arp_cache(iwdev, 176 175 netdev->dev_addr, 177 176 &local_ipaddr, ··· 179 180 return NOTIFY_OK; 180 181 case NETDEV_UP: 181 182 if (upper_dev) 182 - local_ipaddr = 183 - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; 183 + local_ipaddr = ntohl( 184 + ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); 184 185 else 185 - local_ipaddr = ifa->ifa_address; 186 - local_ipaddr = ntohl(local_ipaddr); 186 + local_ipaddr = ntohl(ifa->ifa_address); 187 187 i40iw_manage_arp_cache(iwdev, 188 188 netdev->dev_addr, 189 189 &local_ipaddr, ··· 192 194 case NETDEV_CHANGEADDR: 193 195 /* Add the address to the IP table */ 194 196 if (upper_dev) 195 - local_ipaddr = 196 - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; 197 + local_ipaddr = ntohl( 198 + ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); 197 199 else 198 - local_ipaddr = ifa->ifa_address; 200 + local_ipaddr = ntohl(ifa->ifa_address); 199 201 200 - local_ipaddr = ntohl(local_ipaddr); 201 202 i40iw_manage_arp_cache(iwdev, 202 203 netdev->dev_addr, 203 204 &local_ipaddr, ··· 224 227 struct net_device *netdev; 225 228 struct i40iw_device *iwdev; 226 229 struct i40iw_handler *hdl; 227 - __be32 local_ipaddr6[4]; 230 + u32 local_ipaddr6[4]; 228 231 229 232 hdl = i40iw_find_netdev(event_netdev); 230 233 if (!hdl) ··· 503 506 struct cqp_commands_info *cqp_info; 504 507 struct i40iw_device *iwdev; 505 508 u32 qp_num; 509 + unsigned long flags; 506 510 507 511 iwqp = to_iwqp(ibqp); 508 - if (!atomic_dec_and_test(&iwqp->refcount)) 509 - return; 510 - 511 512 iwdev = iwqp->iwdev; 513 + spin_lock_irqsave(&iwdev->qptable_lock, flags); 514 + if (!atomic_dec_and_test(&iwqp->refcount)) { 515 + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); 516 + return; 517 + } 518 + 512 519 qp_num = iwqp->ibqp.qp_num; 513 520 iwdev->qp_table[qp_num] = NULL; 521 + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); 514 522 cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); 515 523 if (!cqp_request) 516 524 return; ··· 987 985 enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev) 988 986 { 989 987 struct i40iw_device *iwdev = dev->back_dev; 990 - enum i40iw_status_code err_code = 0; 991 988 int timeout_ret; 992 989 993 990 i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n", 994 991 __func__, __LINE__, dev, iwdev); 995 - atomic_add(2, &iwdev->vchnl_msgs); 992 + 993 + atomic_set(&iwdev->vchnl_msgs, 2); 996 994 timeout_ret = wait_event_timeout(iwdev->vchnl_waitq, 997 995 (atomic_read(&iwdev->vchnl_msgs) == 1), 998 996 I40IW_VCHNL_EVENT_TIMEOUT); 999 997 atomic_dec(&iwdev->vchnl_msgs); 1000 998 if (!timeout_ret) { 1001 999 i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret); 1002 - err_code = I40IW_ERR_TIMEOUT; 1000 + atomic_set(&iwdev->vchnl_msgs, 0); 1001 + dev->vchnl_up = false; 1002 + return I40IW_ERR_TIMEOUT; 1003 1003 } 1004 - return err_code; 1004 + wake_up(&dev->vf_reqs); 1005 + return 0; 1005 1006 } 1006 1007 1007 1008 /**
+263 -31
drivers/infiniband/hw/i40iw/i40iw_verbs.c
··· 63 63 ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr); 64 64 props->fw_ver = I40IW_FW_VERSION; 65 65 props->device_cap_flags = iwdev->device_cap_flags; 66 - props->vendor_id = iwdev->vendor_id; 67 - props->vendor_part_id = iwdev->vendor_part_id; 66 + props->vendor_id = iwdev->ldev->pcidev->vendor; 67 + props->vendor_part_id = iwdev->ldev->pcidev->device; 68 68 props->hw_ver = (u32)iwdev->sc_dev.hw_rev; 69 69 props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; 70 70 props->max_qp = iwdev->max_qp; ··· 74 74 props->max_cqe = iwdev->max_cqe; 75 75 props->max_mr = iwdev->max_mr; 76 76 props->max_pd = iwdev->max_pd; 77 - props->max_sge_rd = 1; 77 + props->max_sge_rd = I40IW_MAX_SGE_RD; 78 78 props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE; 79 79 props->max_qp_init_rd_atom = props->max_qp_rd_atom; 80 80 props->atomic_cap = IB_ATOMIC_NONE; ··· 120 120 props->pkey_tbl_len = 1; 121 121 props->active_width = IB_WIDTH_4X; 122 122 props->active_speed = 1; 123 - props->max_msg_sz = 0x80000000; 123 + props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; 124 124 return 0; 125 125 } 126 126 ··· 437 437 kfree(iwqp->kqp.wrid_mem); 438 438 iwqp->kqp.wrid_mem = NULL; 439 439 kfree(iwqp->allocated_buffer); 440 - iwqp->allocated_buffer = NULL; 441 440 } 442 441 443 442 /** ··· 520 521 enum i40iw_status_code status; 521 522 struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; 522 523 523 - ukinfo->max_sq_frag_cnt = I40IW_MAX_WQ_FRAGMENT_COUNT; 524 - 525 524 sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1); 526 525 rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1); 527 526 528 - status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, &sqshift); 527 + status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift); 529 528 if (!status) 530 - status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, &rqshift); 529 + status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift); 531 530 532 531 if (status) 533 532 return -ENOSYS; ··· 606 609 if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) 607 610 init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; 608 611 612 + if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT) 613 + init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; 614 + 609 615 memset(&init_info, 0, sizeof(init_info)); 610 616 611 617 sq_size = init_attr->cap.max_send_wr; ··· 618 618 init_info.qp_uk_init_info.rq_size = rq_size; 619 619 init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; 620 620 init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; 621 + init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; 621 622 622 623 mem = kzalloc(sizeof(*iwqp), GFP_KERNEL); 623 624 if (!mem) ··· 723 722 iwarp_info = &iwqp->iwarp_info; 724 723 iwarp_info->rd_enable = true; 725 724 iwarp_info->wr_rdresp_en = true; 726 - if (!iwqp->user_mode) 725 + if (!iwqp->user_mode) { 726 + iwarp_info->fast_reg_en = true; 727 727 iwarp_info->priv_mode_en = true; 728 + } 728 729 iwarp_info->ddp_ver = 1; 729 730 iwarp_info->rdmap_ver = 1; 730 731 ··· 787 784 return ERR_PTR(err_code); 788 785 } 789 786 } 787 + init_completion(&iwqp->sq_drained); 788 + init_completion(&iwqp->rq_drained); 790 789 791 790 return &iwqp->ibqp; 792 791 error: ··· 1449 1444 } 1450 1445 1451 1446 /** 1447 + * i40iw_hw_alloc_stag - cqp command to allocate stag 1448 + * @iwdev: iwarp device 1449 + * @iwmr: iwarp mr pointer 1450 + */ 1451 + static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr) 1452 + { 1453 + struct i40iw_allocate_stag_info *info; 1454 + struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd); 1455 + enum i40iw_status_code status; 1456 + int err = 0; 1457 + struct i40iw_cqp_request *cqp_request; 1458 + struct cqp_commands_info *cqp_info; 1459 + 1460 + cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true); 1461 + if (!cqp_request) 1462 + return -ENOMEM; 1463 + 1464 + cqp_info = &cqp_request->info; 1465 + info = &cqp_info->in.u.alloc_stag.info; 1466 + memset(info, 0, sizeof(*info)); 1467 + info->page_size = PAGE_SIZE; 1468 + info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT; 1469 + info->pd_id = iwpd->sc_pd.pd_id; 1470 + info->total_len = iwmr->length; 1471 + cqp_info->cqp_cmd = OP_ALLOC_STAG; 1472 + cqp_info->post_sq = 1; 1473 + cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev; 1474 + cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request; 1475 + 1476 + status = i40iw_handle_cqp_op(iwdev, cqp_request); 1477 + if (status) { 1478 + err = -ENOMEM; 1479 + i40iw_pr_err("CQP-OP MR Reg fail"); 1480 + } 1481 + return err; 1482 + } 1483 + 1484 + /** 1485 + * i40iw_alloc_mr - register stag for fast memory registration 1486 + * @pd: ibpd pointer 1487 + * @mr_type: memory for stag registrion 1488 + * @max_num_sg: man number of pages 1489 + */ 1490 + static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, 1491 + enum ib_mr_type mr_type, 1492 + u32 max_num_sg) 1493 + { 1494 + struct i40iw_pd *iwpd = to_iwpd(pd); 1495 + struct i40iw_device *iwdev = to_iwdev(pd->device); 1496 + struct i40iw_pble_alloc *palloc; 1497 + struct i40iw_pbl *iwpbl; 1498 + struct i40iw_mr *iwmr; 1499 + enum i40iw_status_code status; 1500 + u32 stag; 1501 + int err_code = -ENOMEM; 1502 + 1503 + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); 1504 + if (!iwmr) 1505 + return ERR_PTR(-ENOMEM); 1506 + 1507 + stag = i40iw_create_stag(iwdev); 1508 + if (!stag) { 1509 + err_code = -EOVERFLOW; 1510 + goto err; 1511 + } 1512 + iwmr->stag = stag; 1513 + iwmr->ibmr.rkey = stag; 1514 + iwmr->ibmr.lkey = stag; 1515 + iwmr->ibmr.pd = pd; 1516 + iwmr->ibmr.device = pd->device; 1517 + iwpbl = &iwmr->iwpbl; 1518 + iwpbl->iwmr = iwmr; 1519 + iwmr->type = IW_MEMREG_TYPE_MEM; 1520 + palloc = &iwpbl->pble_alloc; 1521 + iwmr->page_cnt = max_num_sg; 1522 + mutex_lock(&iwdev->pbl_mutex); 1523 + status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt); 1524 + mutex_unlock(&iwdev->pbl_mutex); 1525 + if (!status) 1526 + goto err1; 1527 + 1528 + if (palloc->level != I40IW_LEVEL_1) 1529 + goto err2; 1530 + err_code = i40iw_hw_alloc_stag(iwdev, iwmr); 1531 + if (err_code) 1532 + goto err2; 1533 + iwpbl->pbl_allocated = true; 1534 + i40iw_add_pdusecount(iwpd); 1535 + return &iwmr->ibmr; 1536 + err2: 1537 + i40iw_free_pble(iwdev->pble_rsrc, palloc); 1538 + err1: 1539 + i40iw_free_stag(iwdev, stag); 1540 + err: 1541 + kfree(iwmr); 1542 + return ERR_PTR(err_code); 1543 + } 1544 + 1545 + /** 1546 + * i40iw_set_page - populate pbl list for fmr 1547 + * @ibmr: ib mem to access iwarp mr pointer 1548 + * @addr: page dma address fro pbl list 1549 + */ 1550 + static int i40iw_set_page(struct ib_mr *ibmr, u64 addr) 1551 + { 1552 + struct i40iw_mr *iwmr = to_iwmr(ibmr); 1553 + struct i40iw_pbl *iwpbl = &iwmr->iwpbl; 1554 + struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc; 1555 + u64 *pbl; 1556 + 1557 + if (unlikely(iwmr->npages == iwmr->page_cnt)) 1558 + return -ENOMEM; 1559 + 1560 + pbl = (u64 *)palloc->level1.addr; 1561 + pbl[iwmr->npages++] = cpu_to_le64(addr); 1562 + return 0; 1563 + } 1564 + 1565 + /** 1566 + * i40iw_map_mr_sg - map of sg list for fmr 1567 + * @ibmr: ib mem to access iwarp mr pointer 1568 + * @sg: scatter gather list for fmr 1569 + * @sg_nents: number of sg pages 1570 + */ 1571 + static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 1572 + int sg_nents, unsigned int *sg_offset) 1573 + { 1574 + struct i40iw_mr *iwmr = to_iwmr(ibmr); 1575 + 1576 + iwmr->npages = 0; 1577 + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, i40iw_set_page); 1578 + } 1579 + 1580 + /** 1581 + * i40iw_drain_sq - drain the send queue 1582 + * @ibqp: ib qp pointer 1583 + */ 1584 + static void i40iw_drain_sq(struct ib_qp *ibqp) 1585 + { 1586 + struct i40iw_qp *iwqp = to_iwqp(ibqp); 1587 + struct i40iw_sc_qp *qp = &iwqp->sc_qp; 1588 + 1589 + if (I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring)) 1590 + wait_for_completion(&iwqp->sq_drained); 1591 + } 1592 + 1593 + /** 1594 + * i40iw_drain_rq - drain the receive queue 1595 + * @ibqp: ib qp pointer 1596 + */ 1597 + static void i40iw_drain_rq(struct ib_qp *ibqp) 1598 + { 1599 + struct i40iw_qp *iwqp = to_iwqp(ibqp); 1600 + struct i40iw_sc_qp *qp = &iwqp->sc_qp; 1601 + 1602 + if (I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring)) 1603 + wait_for_completion(&iwqp->rq_drained); 1604 + } 1605 + 1606 + /** 1452 1607 * i40iw_hwreg_mr - send cqp command for memory registration 1453 1608 * @iwdev: iwarp device 1454 1609 * @iwmr: iwarp mr pointer ··· 1691 1526 struct i40iw_mr *iwmr; 1692 1527 struct ib_umem *region; 1693 1528 struct i40iw_mem_reg_req req; 1694 - u32 pbl_depth = 0; 1529 + u64 pbl_depth = 0; 1695 1530 u32 stag = 0; 1696 1531 u16 access; 1697 - u32 region_length; 1532 + u64 region_length; 1698 1533 bool use_pbles = false; 1699 1534 unsigned long flags; 1700 1535 int err = -ENOSYS; 1701 1536 1537 + if (length > I40IW_MAX_MR_SIZE) 1538 + return ERR_PTR(-EINVAL); 1702 1539 region = ib_umem_get(pd->uobject->context, start, length, acc, 0); 1703 1540 if (IS_ERR(region)) 1704 1541 return (struct ib_mr *)region; ··· 1731 1564 palloc = &iwpbl->pble_alloc; 1732 1565 1733 1566 iwmr->type = req.reg_type; 1734 - iwmr->page_cnt = pbl_depth; 1567 + iwmr->page_cnt = (u32)pbl_depth; 1735 1568 1736 1569 switch (req.reg_type) { 1737 1570 case IW_MEMREG_TYPE_QP: ··· 2048 1881 enum i40iw_status_code ret; 2049 1882 int err = 0; 2050 1883 unsigned long flags; 1884 + bool inv_stag; 2051 1885 2052 1886 iwqp = (struct i40iw_qp *)ibqp; 2053 1887 ukqp = &iwqp->sc_qp.qp_uk; 2054 1888 2055 1889 spin_lock_irqsave(&iwqp->lock, flags); 2056 1890 while (ib_wr) { 1891 + inv_stag = false; 2057 1892 memset(&info, 0, sizeof(info)); 2058 1893 info.wr_id = (u64)(ib_wr->wr_id); 2059 1894 if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all) ··· 2065 1896 2066 1897 switch (ib_wr->opcode) { 2067 1898 case IB_WR_SEND: 2068 - if (ib_wr->send_flags & IB_SEND_SOLICITED) 2069 - info.op_type = I40IW_OP_TYPE_SEND_SOL; 2070 - else 2071 - info.op_type = I40IW_OP_TYPE_SEND; 1899 + /* fall-through */ 1900 + case IB_WR_SEND_WITH_INV: 1901 + if (ib_wr->opcode == IB_WR_SEND) { 1902 + if (ib_wr->send_flags & IB_SEND_SOLICITED) 1903 + info.op_type = I40IW_OP_TYPE_SEND_SOL; 1904 + else 1905 + info.op_type = I40IW_OP_TYPE_SEND; 1906 + } else { 1907 + if (ib_wr->send_flags & IB_SEND_SOLICITED) 1908 + info.op_type = I40IW_OP_TYPE_SEND_SOL_INV; 1909 + else 1910 + info.op_type = I40IW_OP_TYPE_SEND_INV; 1911 + } 2072 1912 2073 1913 if (ib_wr->send_flags & IB_SEND_INLINE) { 2074 1914 info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr; 2075 1915 info.op.inline_send.len = ib_wr->sg_list[0].length; 2076 - ret = ukqp->ops.iw_inline_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false); 1916 + ret = ukqp->ops.iw_inline_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false); 2077 1917 } else { 2078 1918 info.op.send.num_sges = ib_wr->num_sge; 2079 1919 info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list; 2080 - ret = ukqp->ops.iw_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false); 1920 + ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false); 2081 1921 } 2082 1922 2083 1923 if (ret) ··· 2114 1936 if (ret) 2115 1937 err = -EIO; 2116 1938 break; 1939 + case IB_WR_RDMA_READ_WITH_INV: 1940 + inv_stag = true; 1941 + /* fall-through*/ 2117 1942 case IB_WR_RDMA_READ: 1943 + if (ib_wr->num_sge > I40IW_MAX_SGE_RD) { 1944 + err = -EINVAL; 1945 + break; 1946 + } 2118 1947 info.op_type = I40IW_OP_TYPE_RDMA_READ; 2119 1948 info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; 2120 1949 info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey; ··· 2129 1944 info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr; 2130 1945 info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey; 2131 1946 info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length; 2132 - ret = ukqp->ops.iw_rdma_read(ukqp, &info, false, false); 1947 + ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false); 2133 1948 if (ret) 2134 1949 err = -EIO; 2135 1950 break; 1951 + case IB_WR_LOCAL_INV: 1952 + info.op_type = I40IW_OP_TYPE_INV_STAG; 1953 + info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; 1954 + ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true); 1955 + if (ret) 1956 + err = -EIO; 1957 + break; 1958 + case IB_WR_REG_MR: 1959 + { 1960 + struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr); 1961 + int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size); 1962 + int flags = reg_wr(ib_wr)->access; 1963 + struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc; 1964 + struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev; 1965 + struct i40iw_fast_reg_stag_info info; 1966 + 1967 + info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD; 1968 + info.access_rights |= i40iw_get_user_access(flags); 1969 + info.stag_key = reg_wr(ib_wr)->key & 0xff; 1970 + info.stag_idx = reg_wr(ib_wr)->key >> 8; 1971 + info.wr_id = ib_wr->wr_id; 1972 + 1973 + info.addr_type = I40IW_ADDR_TYPE_VA_BASED; 1974 + info.va = (void *)(uintptr_t)iwmr->ibmr.iova; 1975 + info.total_len = iwmr->ibmr.length; 1976 + info.first_pm_pbl_index = palloc->level1.idx; 1977 + info.local_fence = ib_wr->send_flags & IB_SEND_FENCE; 1978 + info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED; 1979 + 1980 + if (page_shift == 21) 1981 + info.page_size = 1; /* 2M page */ 1982 + 1983 + ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true); 1984 + if (ret) 1985 + err = -EIO; 1986 + break; 1987 + } 2136 1988 default: 2137 1989 err = -EINVAL; 2138 1990 i40iw_pr_err(" upost_send bad opcode = 0x%x\n", ··· 2249 2027 enum i40iw_status_code ret; 2250 2028 struct i40iw_cq_uk *ukcq; 2251 2029 struct i40iw_sc_qp *qp; 2030 + struct i40iw_qp *iwqp; 2252 2031 unsigned long flags; 2253 2032 2254 2033 iwcq = (struct i40iw_cq *)ibcq; ··· 2260 2037 ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true); 2261 2038 if (ret == I40IW_ERR_QUEUE_EMPTY) { 2262 2039 break; 2040 + } else if (ret == I40IW_ERR_QUEUE_DESTROYED) { 2041 + continue; 2263 2042 } else if (ret) { 2264 2043 if (!cqe_count) 2265 2044 cqe_count = -1; ··· 2269 2044 } 2270 2045 entry->wc_flags = 0; 2271 2046 entry->wr_id = cq_poll_info.wr_id; 2272 - if (!cq_poll_info.error) 2273 - entry->status = IB_WC_SUCCESS; 2274 - else 2047 + if (cq_poll_info.error) { 2275 2048 entry->status = IB_WC_WR_FLUSH_ERR; 2049 + entry->vendor_err = cq_poll_info.major_err << 16 | cq_poll_info.minor_err; 2050 + } else { 2051 + entry->status = IB_WC_SUCCESS; 2052 + } 2276 2053 2277 2054 switch (cq_poll_info.op_type) { 2278 2055 case I40IW_OP_TYPE_RDMA_WRITE: ··· 2298 2071 break; 2299 2072 } 2300 2073 2301 - entry->vendor_err = 2302 - cq_poll_info.major_err << 16 | cq_poll_info.minor_err; 2303 2074 entry->ex.imm_data = 0; 2304 2075 qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle; 2305 2076 entry->qp = (struct ib_qp *)qp->back_qp; 2306 2077 entry->src_qp = cq_poll_info.qp_id; 2078 + iwqp = (struct i40iw_qp *)qp->back_qp; 2079 + if (iwqp->iwarp_state > I40IW_QP_STATE_RTS) { 2080 + if (!I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring)) 2081 + complete(&iwqp->sq_drained); 2082 + if (!I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring)) 2083 + complete(&iwqp->rq_drained); 2084 + } 2307 2085 entry->byte_len = cq_poll_info.bytes_xfered; 2308 2086 entry++; 2309 2087 cqe_count++; ··· 2375 2143 struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; 2376 2144 struct timespec curr_time; 2377 2145 static struct timespec last_rd_time = {0, 0}; 2378 - enum i40iw_status_code status = 0; 2379 2146 unsigned long flags; 2380 2147 2381 2148 curr_time = current_kernel_time(); ··· 2387 2156 spin_unlock_irqrestore(&devstat->stats_lock, flags); 2388 2157 } else { 2389 2158 if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1) 2390 - status = i40iw_vchnl_vf_get_pe_stats(dev, 2391 - &devstat->hw_stats); 2392 - 2393 - if (status) 2394 - return -ENOSYS; 2159 + if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats)) 2160 + return -ENOSYS; 2395 2161 } 2396 2162 2397 2163 stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] + ··· 2555 2327 iwibdev->ibdev.query_device = i40iw_query_device; 2556 2328 iwibdev->ibdev.create_ah = i40iw_create_ah; 2557 2329 iwibdev->ibdev.destroy_ah = i40iw_destroy_ah; 2330 + iwibdev->ibdev.drain_sq = i40iw_drain_sq; 2331 + iwibdev->ibdev.drain_rq = i40iw_drain_rq; 2332 + iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; 2333 + iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg; 2558 2334 iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); 2559 2335 if (!iwibdev->ibdev.iwcm) { 2560 2336 ib_dealloc_device(&iwibdev->ibdev);
+3
drivers/infiniband/hw/i40iw/i40iw_verbs.h
··· 92 92 struct ib_umem *region; 93 93 u16 type; 94 94 u32 page_cnt; 95 + u32 npages; 95 96 u32 stag; 96 97 u64 length; 97 98 u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS]; ··· 170 169 struct i40iw_pbl *iwpbl; 171 170 struct i40iw_dma_mem q2_ctx_mem; 172 171 struct i40iw_dma_mem ietf_mem; 172 + struct completion sq_drained; 173 + struct completion rq_drained; 173 174 }; 174 175 #endif
+1 -1
drivers/infiniband/hw/i40iw/i40iw_vf.c
··· 80 80 return 0; 81 81 } 82 82 83 - struct i40iw_vf_cqp_ops iw_vf_cqp_ops = { 83 + const struct i40iw_vf_cqp_ops iw_vf_cqp_ops = { 84 84 i40iw_manage_vf_pble_bp 85 85 };
+1 -1
drivers/infiniband/hw/i40iw/i40iw_vf.h
··· 57 57 u64 scratch, 58 58 bool post_sq); 59 59 60 - extern struct i40iw_vf_cqp_ops iw_vf_cqp_ops; 60 + extern const struct i40iw_vf_cqp_ops iw_vf_cqp_ops; 61 61 62 62 #endif
+55 -47
drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
··· 254 254 static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, 255 255 u32 vf_id, 256 256 struct i40iw_virtchnl_op_buf *vchnl_msg, 257 - struct i40iw_dev_hw_stats hw_stats) 257 + struct i40iw_dev_hw_stats *hw_stats) 258 258 { 259 259 enum i40iw_status_code ret_code; 260 260 u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1]; ··· 264 264 vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx; 265 265 vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer); 266 266 vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS; 267 - *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = hw_stats; 267 + *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = *hw_stats; 268 268 ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer)); 269 269 if (ret_code) 270 270 i40iw_debug(dev, I40IW_DEBUG_VIRT, ··· 437 437 vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg); 438 438 return I40IW_SUCCESS; 439 439 } 440 - for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; 441 - iw_vf_idx++) { 440 + for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) { 442 441 if (!dev->vf_dev[iw_vf_idx]) { 443 - if (first_avail_iw_vf == 444 - I40IW_MAX_PE_ENABLED_VF_COUNT) 442 + if (first_avail_iw_vf == I40IW_MAX_PE_ENABLED_VF_COUNT) 445 443 first_avail_iw_vf = iw_vf_idx; 446 444 continue; 447 445 } ··· 539 541 devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats); 540 542 spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags); 541 543 vf_dev->msg_count--; 542 - vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, devstat->hw_stats); 544 + vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats); 543 545 break; 544 546 default: 545 547 i40iw_debug(dev, I40IW_DEBUG_VIRT, ··· 594 596 struct i40iw_virtchnl_req vchnl_req; 595 597 enum i40iw_status_code ret_code; 596 598 599 + if (!i40iw_vf_clear_to_send(dev)) 600 + return I40IW_ERR_TIMEOUT; 597 601 memset(&vchnl_req, 0, sizeof(vchnl_req)); 598 602 vchnl_req.dev = dev; 599 603 vchnl_req.parm = vchnl_ver; 600 604 vchnl_req.parm_len = sizeof(*vchnl_ver); 601 605 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; 606 + 602 607 ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req); 603 - if (!ret_code) { 604 - ret_code = i40iw_vf_wait_vchnl_resp(dev); 605 - if (!ret_code) 606 - ret_code = vchnl_req.ret_code; 607 - else 608 - dev->vchnl_up = false; 609 - } else { 608 + if (ret_code) { 610 609 i40iw_debug(dev, I40IW_DEBUG_VIRT, 611 610 "%s Send message failed 0x%0x\n", __func__, ret_code); 611 + return ret_code; 612 612 } 613 - return ret_code; 613 + ret_code = i40iw_vf_wait_vchnl_resp(dev); 614 + if (ret_code) 615 + return ret_code; 616 + else 617 + return vchnl_req.ret_code; 614 618 } 615 619 616 620 /** ··· 626 626 struct i40iw_virtchnl_req vchnl_req; 627 627 enum i40iw_status_code ret_code; 628 628 629 + if (!i40iw_vf_clear_to_send(dev)) 630 + return I40IW_ERR_TIMEOUT; 629 631 memset(&vchnl_req, 0, sizeof(vchnl_req)); 630 632 vchnl_req.dev = dev; 631 633 vchnl_req.parm = hmc_fcn; 632 634 vchnl_req.parm_len = sizeof(*hmc_fcn); 633 635 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; 636 + 634 637 ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req); 635 - if (!ret_code) { 636 - ret_code = i40iw_vf_wait_vchnl_resp(dev); 637 - if (!ret_code) 638 - ret_code = vchnl_req.ret_code; 639 - else 640 - dev->vchnl_up = false; 641 - } else { 638 + if (ret_code) { 642 639 i40iw_debug(dev, I40IW_DEBUG_VIRT, 643 640 "%s Send message failed 0x%0x\n", __func__, ret_code); 641 + return ret_code; 644 642 } 645 - return ret_code; 643 + ret_code = i40iw_vf_wait_vchnl_resp(dev); 644 + if (ret_code) 645 + return ret_code; 646 + else 647 + return vchnl_req.ret_code; 646 648 } 647 649 648 650 /** ··· 662 660 struct i40iw_virtchnl_req vchnl_req; 663 661 enum i40iw_status_code ret_code; 664 662 663 + if (!i40iw_vf_clear_to_send(dev)) 664 + return I40IW_ERR_TIMEOUT; 665 665 memset(&vchnl_req, 0, sizeof(vchnl_req)); 666 666 vchnl_req.dev = dev; 667 667 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; 668 + 668 669 ret_code = vchnl_vf_send_add_hmc_objs_req(dev, 669 670 &vchnl_req, 670 671 rsrc_type, 671 672 start_index, 672 673 rsrc_count); 673 - if (!ret_code) { 674 - ret_code = i40iw_vf_wait_vchnl_resp(dev); 675 - if (!ret_code) 676 - ret_code = vchnl_req.ret_code; 677 - else 678 - dev->vchnl_up = false; 679 - } else { 674 + if (ret_code) { 680 675 i40iw_debug(dev, I40IW_DEBUG_VIRT, 681 676 "%s Send message failed 0x%0x\n", __func__, ret_code); 677 + return ret_code; 682 678 } 683 - return ret_code; 679 + ret_code = i40iw_vf_wait_vchnl_resp(dev); 680 + if (ret_code) 681 + return ret_code; 682 + else 683 + return vchnl_req.ret_code; 684 684 } 685 685 686 686 /** ··· 700 696 struct i40iw_virtchnl_req vchnl_req; 701 697 enum i40iw_status_code ret_code; 702 698 699 + if (!i40iw_vf_clear_to_send(dev)) 700 + return I40IW_ERR_TIMEOUT; 703 701 memset(&vchnl_req, 0, sizeof(vchnl_req)); 704 702 vchnl_req.dev = dev; 705 703 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; 704 + 706 705 ret_code = vchnl_vf_send_del_hmc_objs_req(dev, 707 706 &vchnl_req, 708 707 rsrc_type, 709 708 start_index, 710 709 rsrc_count); 711 - if (!ret_code) { 712 - ret_code = i40iw_vf_wait_vchnl_resp(dev); 713 - if (!ret_code) 714 - ret_code = vchnl_req.ret_code; 715 - else 716 - dev->vchnl_up = false; 717 - } else { 710 + if (ret_code) { 718 711 i40iw_debug(dev, I40IW_DEBUG_VIRT, 719 712 "%s Send message failed 0x%0x\n", __func__, ret_code); 713 + return ret_code; 720 714 } 721 - return ret_code; 715 + ret_code = i40iw_vf_wait_vchnl_resp(dev); 716 + if (ret_code) 717 + return ret_code; 718 + else 719 + return vchnl_req.ret_code; 722 720 } 723 721 724 722 /** ··· 734 728 struct i40iw_virtchnl_req vchnl_req; 735 729 enum i40iw_status_code ret_code; 736 730 731 + if (!i40iw_vf_clear_to_send(dev)) 732 + return I40IW_ERR_TIMEOUT; 737 733 memset(&vchnl_req, 0, sizeof(vchnl_req)); 738 734 vchnl_req.dev = dev; 739 735 vchnl_req.parm = hw_stats; 740 736 vchnl_req.parm_len = sizeof(*hw_stats); 741 737 vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; 738 + 742 739 ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req); 743 - if (!ret_code) { 744 - ret_code = i40iw_vf_wait_vchnl_resp(dev); 745 - if (!ret_code) 746 - ret_code = vchnl_req.ret_code; 747 - else 748 - dev->vchnl_up = false; 749 - } else { 740 + if (ret_code) { 750 741 i40iw_debug(dev, I40IW_DEBUG_VIRT, 751 742 "%s Send message failed 0x%0x\n", __func__, ret_code); 743 + return ret_code; 752 744 } 753 - return ret_code; 745 + ret_code = i40iw_vf_wait_vchnl_resp(dev); 746 + if (ret_code) 747 + return ret_code; 748 + else 749 + return vchnl_req.ret_code; 754 750 }
+3 -6
drivers/infiniband/hw/mlx4/mcg.c
··· 96 96 u8 scope_join_state; 97 97 u8 proxy_join; 98 98 u8 reserved[2]; 99 - }; 99 + } __packed __aligned(4); 100 100 101 101 struct mcast_group { 102 102 struct ib_sa_mcmember_data rec; ··· 747 747 __be64 tid, 748 748 union ib_gid *new_mgid) 749 749 { 750 - struct mcast_group *group = NULL, *cur_group; 750 + struct mcast_group *group = NULL, *cur_group, *n; 751 751 struct mcast_req *req; 752 - struct list_head *pos; 753 - struct list_head *n; 754 752 755 753 mutex_lock(&ctx->mcg_table_lock); 756 - list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) { 757 - group = list_entry(pos, struct mcast_group, mgid0_list); 754 + list_for_each_entry_safe(group, n, &ctx->mcg_mgid0_list, mgid0_list) { 758 755 mutex_lock(&group->lock); 759 756 if (group->last_req_tid == tid) { 760 757 if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
+2 -3
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 717 717 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, 718 718 enum ib_mr_type mr_type, 719 719 u32 max_num_sg); 720 - int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, 721 - struct scatterlist *sg, 722 - int sg_nents); 720 + int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 721 + unsigned int *sg_offset); 723 722 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); 724 723 int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); 725 724 struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
+3 -4
drivers/infiniband/hw/mlx4/mr.c
··· 528 528 return 0; 529 529 } 530 530 531 - int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, 532 - struct scatterlist *sg, 533 - int sg_nents) 531 + int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 532 + unsigned int *sg_offset) 534 533 { 535 534 struct mlx4_ib_mr *mr = to_mmr(ibmr); 536 535 int rc; ··· 540 541 sizeof(u64) * mr->max_pages, 541 542 DMA_TO_DEVICE); 542 543 543 - rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page); 544 + rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page); 544 545 545 546 ib_dma_sync_single_for_device(ibmr->device, mr->page_map, 546 547 sizeof(u64) * mr->max_pages,
+4 -1
drivers/infiniband/hw/mlx5/cq.c
··· 879 879 880 880 mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); 881 881 cq->mcq.irqn = irqn; 882 - cq->mcq.comp = mlx5_ib_cq_comp; 882 + if (context) 883 + cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; 884 + else 885 + cq->mcq.comp = mlx5_ib_cq_comp; 883 886 cq->mcq.event = mlx5_ib_cq_event; 884 887 885 888 INIT_LIST_HEAD(&cq->wc_list);
+80 -22
drivers/infiniband/hw/mlx5/main.c
··· 38 38 #include <linux/dma-mapping.h> 39 39 #include <linux/slab.h> 40 40 #include <linux/io-mapping.h> 41 + #if defined(CONFIG_X86) 42 + #include <asm/pat.h> 43 + #endif 41 44 #include <linux/sched.h> 42 45 #include <rdma/ib_user_verbs.h> 43 46 #include <rdma/ib_addr.h> ··· 519 516 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 520 517 props->device_cap_flags |= IB_DEVICE_UD_TSO; 521 518 } 519 + 520 + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && 521 + MLX5_CAP_ETH(dev->mdev, scatter_fcs)) 522 + props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; 522 523 523 524 props->vendor_part_id = mdev->pdev->device; 524 525 props->hw_ver = mdev->pdev->revision; ··· 1075 1068 return get_arg(offset); 1076 1069 } 1077 1070 1071 + static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) 1072 + { 1073 + switch (cmd) { 1074 + case MLX5_IB_MMAP_WC_PAGE: 1075 + return "WC"; 1076 + case MLX5_IB_MMAP_REGULAR_PAGE: 1077 + return "best effort WC"; 1078 + case MLX5_IB_MMAP_NC_PAGE: 1079 + return "NC"; 1080 + default: 1081 + return NULL; 1082 + } 1083 + } 1084 + 1085 + static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, 1086 + struct vm_area_struct *vma, struct mlx5_uuar_info *uuari) 1087 + { 1088 + int err; 1089 + unsigned long idx; 1090 + phys_addr_t pfn, pa; 1091 + pgprot_t prot; 1092 + 1093 + switch (cmd) { 1094 + case MLX5_IB_MMAP_WC_PAGE: 1095 + /* Some architectures don't support WC memory */ 1096 + #if defined(CONFIG_X86) 1097 + if (!pat_enabled()) 1098 + return -EPERM; 1099 + #elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU))) 1100 + return -EPERM; 1101 + #endif 1102 + /* fall through */ 1103 + case MLX5_IB_MMAP_REGULAR_PAGE: 1104 + /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */ 1105 + prot = pgprot_writecombine(vma->vm_page_prot); 1106 + break; 1107 + case MLX5_IB_MMAP_NC_PAGE: 1108 + prot = pgprot_noncached(vma->vm_page_prot); 1109 + break; 1110 + default: 1111 + return -EINVAL; 1112 + } 1113 + 1114 + if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1115 + return -EINVAL; 1116 + 1117 + idx = get_index(vma->vm_pgoff); 1118 + if (idx >= uuari->num_uars) 1119 + return -EINVAL; 1120 + 1121 + pfn = uar_index2pfn(dev, uuari->uars[idx].index); 1122 + mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); 1123 + 1124 + vma->vm_page_prot = prot; 1125 + err = io_remap_pfn_range(vma, vma->vm_start, pfn, 1126 + PAGE_SIZE, vma->vm_page_prot); 1127 + if (err) { 1128 + mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n", 1129 + err, vma->vm_start, &pfn, mmap_cmd2str(cmd)); 1130 + return -EAGAIN; 1131 + } 1132 + 1133 + pa = pfn << PAGE_SHIFT; 1134 + mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), 1135 + vma->vm_start, &pa); 1136 + 1137 + return 0; 1138 + } 1139 + 1078 1140 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 1079 1141 { 1080 1142 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1081 1143 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1082 1144 struct mlx5_uuar_info *uuari = &context->uuari; 1083 1145 unsigned long command; 1084 - unsigned long idx; 1085 1146 phys_addr_t pfn; 1086 1147 1087 1148 command = get_command(vma->vm_pgoff); 1088 1149 switch (command) { 1150 + case MLX5_IB_MMAP_WC_PAGE: 1151 + case MLX5_IB_MMAP_NC_PAGE: 1089 1152 case MLX5_IB_MMAP_REGULAR_PAGE: 1090 - if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1091 - return -EINVAL; 1092 - 1093 - idx = get_index(vma->vm_pgoff); 1094 - if (idx >= uuari->num_uars) 1095 - return -EINVAL; 1096 - 1097 - pfn = uar_index2pfn(dev, uuari->uars[idx].index); 1098 - mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, 1099 - (unsigned long long)pfn); 1100 - 1101 - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 1102 - if (io_remap_pfn_range(vma, vma->vm_start, pfn, 1103 - PAGE_SIZE, vma->vm_page_prot)) 1104 - return -EAGAIN; 1105 - 1106 - mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n", 1107 - vma->vm_start, 1108 - (unsigned long long)pfn << PAGE_SHIFT); 1109 - break; 1153 + return uar_mmap(dev, command, vma, uuari); 1110 1154 1111 1155 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: 1112 1156 return -ENOSYS; ··· 1166 1108 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1167 1109 return -EINVAL; 1168 1110 1169 - if (vma->vm_flags & (VM_WRITE | VM_EXEC)) 1111 + if (vma->vm_flags & VM_WRITE) 1170 1112 return -EPERM; 1171 1113 1172 1114 /* Don't expose to user-space information it shouldn't have */
+5 -3
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 70 70 enum mlx5_ib_mmap_cmd { 71 71 MLX5_IB_MMAP_REGULAR_PAGE = 0, 72 72 MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, 73 + MLX5_IB_MMAP_WC_PAGE = 2, 74 + MLX5_IB_MMAP_NC_PAGE = 3, 73 75 /* 5 is chosen in order to be compatible with old versions of libmlx5 */ 74 76 MLX5_IB_MMAP_CORE_CLOCK = 5, 75 77 }; ··· 358 356 MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, 359 357 /* QP uses 1 as its source QP number */ 360 358 MLX5_IB_QP_SQPN_QP1 = 1 << 6, 359 + MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, 361 360 }; 362 361 363 362 struct mlx5_umr_wr { ··· 715 712 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, 716 713 enum ib_mr_type mr_type, 717 714 u32 max_num_sg); 718 - int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, 719 - struct scatterlist *sg, 720 - int sg_nents); 715 + int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 716 + unsigned int *sg_offset); 721 717 int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 722 718 const struct ib_wc *in_wc, const struct ib_grh *in_grh, 723 719 const struct ib_mad_hdr *in, size_t in_mad_size,
+16 -9
drivers/infiniband/hw/mlx5/mr.c
··· 1751 1751 static int 1752 1752 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 1753 1753 struct scatterlist *sgl, 1754 - unsigned short sg_nents) 1754 + unsigned short sg_nents, 1755 + unsigned int *sg_offset_p) 1755 1756 { 1756 1757 struct scatterlist *sg = sgl; 1757 1758 struct mlx5_klm *klms = mr->descs; 1759 + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1758 1760 u32 lkey = mr->ibmr.pd->local_dma_lkey; 1759 1761 int i; 1760 1762 1761 - mr->ibmr.iova = sg_dma_address(sg); 1763 + mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 1762 1764 mr->ibmr.length = 0; 1763 1765 mr->ndescs = sg_nents; 1764 1766 1765 1767 for_each_sg(sgl, sg, sg_nents, i) { 1766 1768 if (unlikely(i > mr->max_descs)) 1767 1769 break; 1768 - klms[i].va = cpu_to_be64(sg_dma_address(sg)); 1769 - klms[i].bcount = cpu_to_be32(sg_dma_len(sg)); 1770 + klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 1771 + klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 1770 1772 klms[i].key = cpu_to_be32(lkey); 1771 1773 mr->ibmr.length += sg_dma_len(sg); 1774 + 1775 + sg_offset = 0; 1772 1776 } 1777 + 1778 + if (sg_offset_p) 1779 + *sg_offset_p = sg_offset; 1773 1780 1774 1781 return i; 1775 1782 } ··· 1795 1788 return 0; 1796 1789 } 1797 1790 1798 - int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, 1799 - struct scatterlist *sg, 1800 - int sg_nents) 1791 + int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 1792 + unsigned int *sg_offset) 1801 1793 { 1802 1794 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1803 1795 int n; ··· 1808 1802 DMA_TO_DEVICE); 1809 1803 1810 1804 if (mr->access_mode == MLX5_ACCESS_MODE_KLM) 1811 - n = mlx5_ib_sg_to_klms(mr, sg, sg_nents); 1805 + n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); 1812 1806 else 1813 - n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); 1807 + n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 1808 + mlx5_set_page); 1814 1809 1815 1810 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 1816 1811 mr->desc_size * mr->max_descs,
+19 -1
drivers/infiniband/hw/mlx5/qp.c
··· 1028 1028 static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, 1029 1029 struct mlx5_ib_rq *rq, void *qpin) 1030 1030 { 1031 + struct mlx5_ib_qp *mqp = rq->base.container_mibqp; 1031 1032 __be64 *pas; 1032 1033 __be64 *qp_pas; 1033 1034 void *in; ··· 1051 1050 MLX5_SET(rqc, rqc, flush_in_error_en, 1); 1052 1051 MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); 1053 1052 MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); 1053 + 1054 + if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS) 1055 + MLX5_SET(rqc, rqc, scatter_fcs, 1); 1054 1056 1055 1057 wq = MLX5_ADDR_OF(rqc, rqc, wq); 1056 1058 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); ··· 1140 1136 } 1141 1137 1142 1138 if (qp->rq.wqe_cnt) { 1139 + rq->base.container_mibqp = qp; 1140 + 1143 1141 err = create_raw_packet_qp_rq(dev, rq, in); 1144 1142 if (err) 1145 1143 goto err_destroy_sq; 1146 1144 1147 - rq->base.container_mibqp = qp; 1148 1145 1149 1146 err = create_raw_packet_qp_tir(dev, rq, tdn); 1150 1147 if (err) ··· 1256 1251 mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n"); 1257 1252 return -EOPNOTSUPP; 1258 1253 } 1254 + 1255 + if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) { 1256 + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { 1257 + mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs"); 1258 + return -EOPNOTSUPP; 1259 + } 1260 + if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) || 1261 + !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { 1262 + mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n"); 1263 + return -EOPNOTSUPP; 1264 + } 1265 + qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS; 1266 + } 1259 1267 1260 1268 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 1261 1269 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+3 -57
drivers/infiniband/hw/nes/nes_utils.c
··· 44 44 #include <linux/ip.h> 45 45 #include <linux/tcp.h> 46 46 #include <linux/init.h> 47 + #include <linux/kernel.h> 47 48 48 49 #include <asm/io.h> 49 50 #include <asm/irq.h> ··· 904 903 */ 905 904 void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length) 906 905 { 907 - char xlate[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 908 - 'a', 'b', 'c', 'd', 'e', 'f'}; 909 - char *ptr; 910 - char hex_buf[80]; 911 - char ascii_buf[20]; 912 - int num_char; 913 - int num_ascii; 914 - int num_hex; 915 - 916 906 if (!(nes_debug_level & dump_debug_level)) { 917 907 return; 918 908 } 919 909 920 - ptr = addr; 921 910 if (length > 0x100) { 922 911 nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100); 923 912 length = 0x100; 924 913 } 925 - nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", ptr, length, length); 914 + nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", addr, length, length); 926 915 927 - memset(ascii_buf, 0, 20); 928 - memset(hex_buf, 0, 80); 929 - 930 - num_ascii = 0; 931 - num_hex = 0; 932 - for (num_char = 0; num_char < length; num_char++) { 933 - if (num_ascii == 8) { 934 - ascii_buf[num_ascii++] = ' '; 935 - hex_buf[num_hex++] = '-'; 936 - hex_buf[num_hex++] = ' '; 937 - } 938 - 939 - if (*ptr < 0x20 || *ptr > 0x7e) 940 - ascii_buf[num_ascii++] = '.'; 941 - else 942 - ascii_buf[num_ascii++] = *ptr; 943 - hex_buf[num_hex++] = xlate[((*ptr & 0xf0) >> 4)]; 944 - hex_buf[num_hex++] = xlate[*ptr & 0x0f]; 945 - hex_buf[num_hex++] = ' '; 946 - ptr++; 947 - 948 - if (num_ascii >= 17) { 949 - /* output line and reset */ 950 - nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf); 951 - memset(ascii_buf, 0, 20); 952 - memset(hex_buf, 0, 80); 953 - num_ascii = 0; 954 - num_hex = 0; 955 - } 956 - } 957 - 958 - /* output the rest */ 959 - if (num_ascii) { 960 - while (num_ascii < 17) { 961 - if (num_ascii == 8) { 962 - hex_buf[num_hex++] = ' '; 963 - hex_buf[num_hex++] = ' '; 964 - } 965 - hex_buf[num_hex++] = ' '; 966 - hex_buf[num_hex++] = ' '; 967 - hex_buf[num_hex++] = ' '; 968 - num_ascii++; 969 - } 970 - 971 - nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf); 972 - } 916 + print_hex_dump(KERN_ERR, PFX, DUMP_PREFIX_NONE, 16, 1, addr, length, true); 973 917 }
+38 -5
drivers/infiniband/hw/nes/nes_verbs.c
··· 402 402 return 0; 403 403 } 404 404 405 - static int nes_map_mr_sg(struct ib_mr *ibmr, 406 - struct scatterlist *sg, 407 - int sg_nents) 405 + static int nes_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 406 + int sg_nents, unsigned int *sg_offset) 408 407 { 409 408 struct nes_mr *nesmr = to_nesmr(ibmr); 410 409 411 410 nesmr->npages = 0; 412 411 413 - return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page); 412 + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, nes_set_page); 414 413 } 415 414 416 415 /** ··· 980 981 /** 981 982 * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory. 982 983 */ 983 - static inline void nes_free_qp_mem(struct nes_device *nesdev, 984 + static void nes_free_qp_mem(struct nes_device *nesdev, 984 985 struct nes_qp *nesqp, int virt_wqs) 985 986 { 986 987 unsigned long flags; ··· 1314 1315 nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type); 1315 1316 return ERR_PTR(-EINVAL); 1316 1317 } 1318 + init_completion(&nesqp->sq_drained); 1319 + init_completion(&nesqp->rq_drained); 1317 1320 1318 1321 nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR); 1319 1322 init_timer(&nesqp->terminate_timer); ··· 3453 3452 return err; 3454 3453 } 3455 3454 3455 + /** 3456 + * nes_drain_sq - drain sq 3457 + * @ibqp: pointer to ibqp 3458 + */ 3459 + static void nes_drain_sq(struct ib_qp *ibqp) 3460 + { 3461 + struct nes_qp *nesqp = to_nesqp(ibqp); 3462 + 3463 + if (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head) 3464 + wait_for_completion(&nesqp->sq_drained); 3465 + } 3466 + 3467 + /** 3468 + * nes_drain_rq - drain rq 3469 + * @ibqp: pointer to ibqp 3470 + */ 3471 + static void nes_drain_rq(struct ib_qp *ibqp) 3472 + { 3473 + struct nes_qp *nesqp = to_nesqp(ibqp); 3474 + 3475 + if (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head) 3476 + wait_for_completion(&nesqp->rq_drained); 3477 + } 3456 3478 3457 3479 /** 3458 3480 * nes_poll_cq ··· 3604 3580 move_cq_head = 0; 3605 3581 wq_tail = nesqp->hwqp.rq_tail; 3606 3582 } 3583 + } 3584 + 3585 + if (nesqp->iwarp_state > NES_CQP_QP_IWARP_STATE_RTS) { 3586 + if (nesqp->hwqp.sq_tail == nesqp->hwqp.sq_head) 3587 + complete(&nesqp->sq_drained); 3588 + if (nesqp->hwqp.rq_tail == nesqp->hwqp.rq_head) 3589 + complete(&nesqp->rq_drained); 3607 3590 } 3608 3591 3609 3592 entry->wr_id = wrid; ··· 3785 3754 nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; 3786 3755 nesibdev->ibdev.post_send = nes_post_send; 3787 3756 nesibdev->ibdev.post_recv = nes_post_recv; 3757 + nesibdev->ibdev.drain_sq = nes_drain_sq; 3758 + nesibdev->ibdev.drain_rq = nes_drain_rq; 3788 3759 3789 3760 nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); 3790 3761 if (nesibdev->ibdev.iwcm == NULL) {
+2
drivers/infiniband/hw/nes/nes_verbs.h
··· 189 189 u8 pau_pending; 190 190 u8 pau_state; 191 191 __u64 nesuqp_addr; 192 + struct completion sq_drained; 193 + struct completion rq_drained; 192 194 }; 193 195 194 196 struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
+3 -4
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
··· 3081 3081 return 0; 3082 3082 } 3083 3083 3084 - int ocrdma_map_mr_sg(struct ib_mr *ibmr, 3085 - struct scatterlist *sg, 3086 - int sg_nents) 3084 + int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 3085 + unsigned int *sg_offset) 3087 3086 { 3088 3087 struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); 3089 3088 3090 3089 mr->npages = 0; 3091 3090 3092 - return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page); 3091 + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ocrdma_set_page); 3093 3092 }
+2 -3
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
··· 122 122 struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, 123 123 enum ib_mr_type mr_type, 124 124 u32 max_num_sg); 125 - int ocrdma_map_mr_sg(struct ib_mr *ibmr, 126 - struct scatterlist *sg, 127 - int sg_nents); 125 + int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 126 + unsigned int *sg_offset); 128 127 129 128 #endif /* __OCRDMA_VERBS_H__ */
+2 -2
drivers/infiniband/hw/qib/qib_init.c
··· 1090 1090 qib_dbg_ibdev_exit(&dd->verbs_dev); 1091 1091 #endif 1092 1092 free_percpu(dd->int_counter); 1093 - ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); 1093 + rvt_dealloc_device(&dd->verbs_dev.rdi); 1094 1094 } 1095 1095 1096 1096 u64 qib_int_counter(struct qib_devdata *dd) ··· 1183 1183 bail: 1184 1184 if (!list_empty(&dd->list)) 1185 1185 list_del_init(&dd->list); 1186 - ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); 1186 + rvt_dealloc_device(&dd->verbs_dev.rdi); 1187 1187 return ERR_PTR(ret); 1188 1188 } 1189 1189
+1 -1
drivers/infiniband/hw/qib/qib_rc.c
··· 230 230 * 231 231 * Return 1 if constructed; otherwise, return 0. 232 232 */ 233 - int qib_make_rc_req(struct rvt_qp *qp) 233 + int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) 234 234 { 235 235 struct qib_qp_priv *priv = qp->priv; 236 236 struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+2 -2
drivers/infiniband/hw/qib/qib_ruc.c
··· 739 739 struct qib_qp_priv *priv = qp->priv; 740 740 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 741 741 struct qib_pportdata *ppd = ppd_from_ibp(ibp); 742 - int (*make_req)(struct rvt_qp *qp); 742 + int (*make_req)(struct rvt_qp *qp, unsigned long *flags); 743 743 unsigned long flags; 744 744 745 745 if ((qp->ibqp.qp_type == IB_QPT_RC || ··· 781 781 qp->s_hdrwords = 0; 782 782 spin_lock_irqsave(&qp->s_lock, flags); 783 783 } 784 - } while (make_req(qp)); 784 + } while (make_req(qp, &flags)); 785 785 786 786 spin_unlock_irqrestore(&qp->s_lock, flags); 787 787 }
+1 -1
drivers/infiniband/hw/qib/qib_uc.c
··· 45 45 * 46 46 * Return 1 if constructed; otherwise, return 0. 47 47 */ 48 - int qib_make_uc_req(struct rvt_qp *qp) 48 + int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) 49 49 { 50 50 struct qib_qp_priv *priv = qp->priv; 51 51 struct qib_other_headers *ohdr;
+5 -5
drivers/infiniband/hw/qib/qib_ud.c
··· 238 238 * 239 239 * Return 1 if constructed; otherwise, return 0. 240 240 */ 241 - int qib_make_ud_req(struct rvt_qp *qp) 241 + int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) 242 242 { 243 243 struct qib_qp_priv *priv = qp->priv; 244 244 struct qib_other_headers *ohdr; ··· 294 294 this_cpu_inc(ibp->pmastats->n_unicast_xmit); 295 295 lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); 296 296 if (unlikely(lid == ppd->lid)) { 297 - unsigned long flags; 297 + unsigned long tflags = *flags; 298 298 /* 299 299 * If DMAs are in progress, we can't generate 300 300 * a completion for the loopback packet since ··· 307 307 goto bail; 308 308 } 309 309 qp->s_cur = next_cur; 310 - local_irq_save(flags); 311 - spin_unlock_irqrestore(&qp->s_lock, flags); 310 + spin_unlock_irqrestore(&qp->s_lock, tflags); 312 311 qib_ud_loopback(qp, wqe); 313 - spin_lock_irqsave(&qp->s_lock, flags); 312 + spin_lock_irqsave(&qp->s_lock, tflags); 313 + *flags = tflags; 314 314 qib_send_complete(qp, wqe, IB_WC_SUCCESS); 315 315 goto done; 316 316 }
+3 -3
drivers/infiniband/hw/qib/qib_verbs.h
··· 430 430 431 431 void qib_send_rc_ack(struct rvt_qp *qp); 432 432 433 - int qib_make_rc_req(struct rvt_qp *qp); 433 + int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags); 434 434 435 - int qib_make_uc_req(struct rvt_qp *qp); 435 + int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags); 436 436 437 - int qib_make_ud_req(struct rvt_qp *qp); 437 + int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags); 438 438 439 439 int qib_register_ib_device(struct qib_devdata *); 440 440
+3 -3
drivers/infiniband/sw/rdmavt/qp.c
··· 829 829 case IB_QPT_SMI: 830 830 case IB_QPT_GSI: 831 831 case IB_QPT_UD: 832 - qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK; 832 + qp->allowed_ops = IB_OPCODE_UD; 833 833 break; 834 834 case IB_QPT_RC: 835 - qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK; 835 + qp->allowed_ops = IB_OPCODE_RC; 836 836 break; 837 837 case IB_QPT_UC: 838 - qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK; 838 + qp->allowed_ops = IB_OPCODE_UC; 839 839 break; 840 840 default: 841 841 ret = ERR_PTR(-EINVAL);
+13
drivers/infiniband/sw/rdmavt/vt.c
··· 106 106 } 107 107 EXPORT_SYMBOL(rvt_alloc_device); 108 108 109 + /** 110 + * rvt_dealloc_device - deallocate rdi 111 + * @rdi: structure to free 112 + * 113 + * Free a structure allocated with rvt_alloc_device() 114 + */ 115 + void rvt_dealloc_device(struct rvt_dev_info *rdi) 116 + { 117 + kfree(rdi->ports); 118 + ib_dealloc_device(&rdi->ibdev); 119 + } 120 + EXPORT_SYMBOL(rvt_dealloc_device); 121 + 109 122 static int rvt_query_device(struct ib_device *ibdev, 110 123 struct ib_device_attr *props, 111 124 struct ib_udata *uhw)
+67
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
··· 36 36 37 37 #include "ipoib.h" 38 38 39 + struct ipoib_stats { 40 + char stat_string[ETH_GSTRING_LEN]; 41 + int stat_offset; 42 + }; 43 + 44 + #define IPOIB_NETDEV_STAT(m) { \ 45 + .stat_string = #m, \ 46 + .stat_offset = offsetof(struct rtnl_link_stats64, m) } 47 + 48 + static const struct ipoib_stats ipoib_gstrings_stats[] = { 49 + IPOIB_NETDEV_STAT(rx_packets), 50 + IPOIB_NETDEV_STAT(tx_packets), 51 + IPOIB_NETDEV_STAT(rx_bytes), 52 + IPOIB_NETDEV_STAT(tx_bytes), 53 + IPOIB_NETDEV_STAT(tx_errors), 54 + IPOIB_NETDEV_STAT(rx_dropped), 55 + IPOIB_NETDEV_STAT(tx_dropped) 56 + }; 57 + 58 + #define IPOIB_GLOBAL_STATS_LEN ARRAY_SIZE(ipoib_gstrings_stats) 59 + 39 60 static void ipoib_get_drvinfo(struct net_device *netdev, 40 61 struct ethtool_drvinfo *drvinfo) 41 62 { ··· 113 92 114 93 return 0; 115 94 } 95 + static void ipoib_get_ethtool_stats(struct net_device *dev, 96 + struct ethtool_stats __always_unused *stats, 97 + u64 *data) 98 + { 99 + int i; 100 + struct net_device_stats *net_stats = &dev->stats; 101 + u8 *p = (u8 *)net_stats; 102 + 103 + for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++) 104 + data[i] = *(u64 *)(p + ipoib_gstrings_stats[i].stat_offset); 105 + 106 + } 107 + static void ipoib_get_strings(struct net_device __always_unused *dev, 108 + u32 stringset, u8 *data) 109 + { 110 + u8 *p = data; 111 + int i; 112 + 113 + switch (stringset) { 114 + case ETH_SS_STATS: 115 + for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++) { 116 + memcpy(p, ipoib_gstrings_stats[i].stat_string, 117 + ETH_GSTRING_LEN); 118 + p += ETH_GSTRING_LEN; 119 + } 120 + break; 121 + case ETH_SS_TEST: 122 + default: 123 + break; 124 + } 125 + } 126 + static int ipoib_get_sset_count(struct net_device __always_unused *dev, 127 + int sset) 128 + { 129 + switch (sset) { 130 + case ETH_SS_STATS: 131 + return IPOIB_GLOBAL_STATS_LEN; 132 + case ETH_SS_TEST: 133 + default: 134 + break; 135 + } 136 + return -EOPNOTSUPP; 137 + } 116 138 117 139 static const struct ethtool_ops ipoib_ethtool_ops = { 118 140 .get_drvinfo = ipoib_get_drvinfo, 119 141 .get_coalesce = ipoib_get_coalesce, 120 142 .set_coalesce = ipoib_set_coalesce, 143 + .get_strings = ipoib_get_strings, 144 + .get_ethtool_stats = ipoib_get_ethtool_stats, 145 + .get_sset_count = ipoib_get_sset_count, 121 146 }; 122 147 123 148 void ipoib_set_ethtool_ops(struct net_device *dev)
-2
drivers/infiniband/ulp/ipoib/ipoib_ib.c
··· 51 51 "Enable data path debug tracing if > 0"); 52 52 #endif 53 53 54 - static DEFINE_MUTEX(pkey_mutex); 55 - 56 54 struct ipoib_ah *ipoib_create_ah(struct net_device *dev, 57 55 struct ib_pd *pd, struct ib_ah_attr *attr) 58 56 {
+2 -2
drivers/infiniband/ulp/iser/iser_memory.c
··· 236 236 page_vec->npages = 0; 237 237 page_vec->fake_mr.page_size = SIZE_4K; 238 238 plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, 239 - mem->size, iser_set_page); 239 + mem->size, NULL, iser_set_page); 240 240 if (unlikely(plen < mem->size)) { 241 241 iser_err("page vec too short to hold this SG\n"); 242 242 iser_data_buf_dump(mem, device->ib_device); ··· 446 446 447 447 ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); 448 448 449 - n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K); 449 + n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K); 450 450 if (unlikely(n != mem->size)) { 451 451 iser_err("failed to map sg (%d/%d)\n", 452 452 n, mem->size);
+82 -759
drivers/infiniband/ulp/isert/ib_isert.c
··· 33 33 34 34 #define ISERT_MAX_CONN 8 35 35 #define ISER_MAX_RX_CQ_LEN (ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN) 36 - #define ISER_MAX_TX_CQ_LEN (ISERT_QP_MAX_REQ_DTOS * ISERT_MAX_CONN) 36 + #define ISER_MAX_TX_CQ_LEN \ 37 + ((ISERT_QP_MAX_REQ_DTOS + ISCSI_DEF_XMIT_CMDS_MAX) * ISERT_MAX_CONN) 37 38 #define ISER_MAX_CQ_LEN (ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \ 38 39 ISERT_MAX_CONN) 39 40 ··· 47 46 static struct workqueue_struct *isert_comp_wq; 48 47 static struct workqueue_struct *isert_release_wq; 49 48 50 - static void 51 - isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); 52 - static int 53 - isert_map_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn); 54 - static void 55 - isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); 56 - static int 57 - isert_reg_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn); 58 49 static int 59 50 isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd); 60 51 static int ··· 135 142 attr.recv_cq = comp->cq; 136 143 attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; 137 144 attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; 145 + attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; 138 146 attr.cap.max_send_sge = device->ib_device->attrs.max_sge; 139 147 isert_conn->max_sge = min(device->ib_device->attrs.max_sge, 140 148 device->ib_device->attrs.max_sge_rd); ··· 264 270 device->ib_device->num_comp_vectors)); 265 271 266 272 isert_info("Using %d CQs, %s supports %d vectors support " 267 - "Fast registration %d pi_capable %d\n", 273 + "pi_capable %d\n", 268 274 device->comps_used, device->ib_device->name, 269 - device->ib_device->num_comp_vectors, device->use_fastreg, 275 + device->ib_device->num_comp_vectors, 270 276 device->pi_capable); 271 277 272 278 device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp), ··· 306 312 307 313 isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge); 308 314 isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd); 309 - 310 - /* asign function handlers */ 311 - if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS && 312 - ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) { 313 - device->use_fastreg = 1; 314 - device->reg_rdma_mem = isert_reg_rdma; 315 - device->unreg_rdma_mem = isert_unreg_rdma; 316 - } else { 317 - device->use_fastreg = 0; 318 - device->reg_rdma_mem = isert_map_rdma; 319 - device->unreg_rdma_mem = isert_unmap_cmd; 320 - } 321 315 322 316 ret = isert_alloc_comps(device); 323 317 if (ret) ··· 399 417 } 400 418 401 419 static void 402 - isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) 403 - { 404 - struct fast_reg_descriptor *fr_desc, *tmp; 405 - int i = 0; 406 - 407 - if (list_empty(&isert_conn->fr_pool)) 408 - return; 409 - 410 - isert_info("Freeing conn %p fastreg pool", isert_conn); 411 - 412 - list_for_each_entry_safe(fr_desc, tmp, 413 - &isert_conn->fr_pool, list) { 414 - list_del(&fr_desc->list); 415 - ib_dereg_mr(fr_desc->data_mr); 416 - if (fr_desc->pi_ctx) { 417 - ib_dereg_mr(fr_desc->pi_ctx->prot_mr); 418 - ib_dereg_mr(fr_desc->pi_ctx->sig_mr); 419 - kfree(fr_desc->pi_ctx); 420 - } 421 - kfree(fr_desc); 422 - ++i; 423 - } 424 - 425 - if (i < isert_conn->fr_pool_size) 426 - isert_warn("Pool still has %d regions registered\n", 427 - isert_conn->fr_pool_size - i); 428 - } 429 - 430 - static int 431 - isert_create_pi_ctx(struct fast_reg_descriptor *desc, 432 - struct ib_device *device, 433 - struct ib_pd *pd) 434 - { 435 - struct pi_context *pi_ctx; 436 - int ret; 437 - 438 - pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); 439 - if (!pi_ctx) { 440 - isert_err("Failed to allocate pi context\n"); 441 - return -ENOMEM; 442 - } 443 - 444 - pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 445 - ISCSI_ISER_SG_TABLESIZE); 446 - if (IS_ERR(pi_ctx->prot_mr)) { 447 - isert_err("Failed to allocate prot frmr err=%ld\n", 448 - PTR_ERR(pi_ctx->prot_mr)); 449 - ret = PTR_ERR(pi_ctx->prot_mr); 450 - goto err_pi_ctx; 451 - } 452 - desc->ind |= ISERT_PROT_KEY_VALID; 453 - 454 - pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2); 455 - if (IS_ERR(pi_ctx->sig_mr)) { 456 - isert_err("Failed to allocate signature enabled mr err=%ld\n", 457 - PTR_ERR(pi_ctx->sig_mr)); 458 - ret = PTR_ERR(pi_ctx->sig_mr); 459 - goto err_prot_mr; 460 - } 461 - 462 - desc->pi_ctx = pi_ctx; 463 - desc->ind |= ISERT_SIG_KEY_VALID; 464 - desc->ind &= ~ISERT_PROTECTED; 465 - 466 - return 0; 467 - 468 - err_prot_mr: 469 - ib_dereg_mr(pi_ctx->prot_mr); 470 - err_pi_ctx: 471 - kfree(pi_ctx); 472 - 473 - return ret; 474 - } 475 - 476 - static int 477 - isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, 478 - struct fast_reg_descriptor *fr_desc) 479 - { 480 - fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 481 - ISCSI_ISER_SG_TABLESIZE); 482 - if (IS_ERR(fr_desc->data_mr)) { 483 - isert_err("Failed to allocate data frmr err=%ld\n", 484 - PTR_ERR(fr_desc->data_mr)); 485 - return PTR_ERR(fr_desc->data_mr); 486 - } 487 - fr_desc->ind |= ISERT_DATA_KEY_VALID; 488 - 489 - isert_dbg("Created fr_desc %p\n", fr_desc); 490 - 491 - return 0; 492 - } 493 - 494 - static int 495 - isert_conn_create_fastreg_pool(struct isert_conn *isert_conn) 496 - { 497 - struct fast_reg_descriptor *fr_desc; 498 - struct isert_device *device = isert_conn->device; 499 - struct se_session *se_sess = isert_conn->conn->sess->se_sess; 500 - struct se_node_acl *se_nacl = se_sess->se_node_acl; 501 - int i, ret, tag_num; 502 - /* 503 - * Setup the number of FRMRs based upon the number of tags 504 - * available to session in iscsi_target_locate_portal(). 505 - */ 506 - tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth); 507 - tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS; 508 - 509 - isert_conn->fr_pool_size = 0; 510 - for (i = 0; i < tag_num; i++) { 511 - fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL); 512 - if (!fr_desc) { 513 - isert_err("Failed to allocate fast_reg descriptor\n"); 514 - ret = -ENOMEM; 515 - goto err; 516 - } 517 - 518 - ret = isert_create_fr_desc(device->ib_device, 519 - device->pd, fr_desc); 520 - if (ret) { 521 - isert_err("Failed to create fastreg descriptor err=%d\n", 522 - ret); 523 - kfree(fr_desc); 524 - goto err; 525 - } 526 - 527 - list_add_tail(&fr_desc->list, &isert_conn->fr_pool); 528 - isert_conn->fr_pool_size++; 529 - } 530 - 531 - isert_dbg("Creating conn %p fastreg pool size=%d", 532 - isert_conn, isert_conn->fr_pool_size); 533 - 534 - return 0; 535 - 536 - err: 537 - isert_conn_free_fastreg_pool(isert_conn); 538 - return ret; 539 - } 540 - 541 - static void 542 420 isert_init_conn(struct isert_conn *isert_conn) 543 421 { 544 422 isert_conn->state = ISER_CONN_INIT; ··· 407 565 init_completion(&isert_conn->login_req_comp); 408 566 kref_init(&isert_conn->kref); 409 567 mutex_init(&isert_conn->mutex); 410 - spin_lock_init(&isert_conn->pool_lock); 411 - INIT_LIST_HEAD(&isert_conn->fr_pool); 412 568 INIT_WORK(&isert_conn->release_work, isert_release_work); 413 569 } 414 570 ··· 578 738 isert_dbg("conn %p\n", isert_conn); 579 739 580 740 BUG_ON(!device); 581 - 582 - if (device->use_fastreg) 583 - isert_conn_free_fastreg_pool(isert_conn); 584 741 585 742 isert_free_rx_descriptors(isert_conn); 586 743 if (isert_conn->cm_id) ··· 917 1080 { 918 1081 struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc; 919 1082 920 - isert_cmd->iser_ib_op = ISER_IB_SEND; 921 1083 tx_desc->tx_cqe.done = isert_send_done; 922 1084 send_wr->wr_cqe = &tx_desc->tx_cqe; 923 1085 ··· 996 1160 } 997 1161 if (!login->login_failed) { 998 1162 if (login->login_complete) { 999 - if (!conn->sess->sess_ops->SessionType && 1000 - isert_conn->device->use_fastreg) { 1001 - ret = isert_conn_create_fastreg_pool(isert_conn); 1002 - if (ret) { 1003 - isert_err("Conn: %p failed to create" 1004 - " fastreg pool\n", isert_conn); 1005 - return ret; 1006 - } 1007 - } 1008 - 1009 1163 ret = isert_alloc_rx_descriptors(isert_conn); 1010 1164 if (ret) 1011 1165 return ret; ··· 1459 1633 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 1460 1634 } 1461 1635 1462 - static int 1463 - isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, 1464 - struct scatterlist *sg, u32 nents, u32 length, u32 offset, 1465 - enum iser_ib_op_code op, struct isert_data_buf *data) 1466 - { 1467 - struct ib_device *ib_dev = isert_conn->cm_id->device; 1468 - 1469 - data->dma_dir = op == ISER_IB_RDMA_WRITE ? 1470 - DMA_TO_DEVICE : DMA_FROM_DEVICE; 1471 - 1472 - data->len = length - offset; 1473 - data->offset = offset; 1474 - data->sg_off = data->offset / PAGE_SIZE; 1475 - 1476 - data->sg = &sg[data->sg_off]; 1477 - data->nents = min_t(unsigned int, nents - data->sg_off, 1478 - ISCSI_ISER_SG_TABLESIZE); 1479 - data->len = min_t(unsigned int, data->len, ISCSI_ISER_SG_TABLESIZE * 1480 - PAGE_SIZE); 1481 - 1482 - data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents, 1483 - data->dma_dir); 1484 - if (unlikely(!data->dma_nents)) { 1485 - isert_err("Cmd: unable to dma map SGs %p\n", sg); 1486 - return -EINVAL; 1487 - } 1488 - 1489 - isert_dbg("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", 1490 - isert_cmd, data->dma_nents, data->sg, data->nents, data->len); 1491 - 1492 - return 0; 1493 - } 1494 - 1495 1636 static void 1496 - isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data) 1637 + isert_rdma_rw_ctx_destroy(struct isert_cmd *cmd, struct isert_conn *conn) 1497 1638 { 1498 - struct ib_device *ib_dev = isert_conn->cm_id->device; 1639 + struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd; 1640 + enum dma_data_direction dir = target_reverse_dma_direction(se_cmd); 1499 1641 1500 - ib_dma_unmap_sg(ib_dev, data->sg, data->nents, data->dma_dir); 1501 - memset(data, 0, sizeof(*data)); 1502 - } 1642 + if (!cmd->rw.nr_ops) 1643 + return; 1503 1644 1504 - 1505 - 1506 - static void 1507 - isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) 1508 - { 1509 - isert_dbg("Cmd %p\n", isert_cmd); 1510 - 1511 - if (isert_cmd->data.sg) { 1512 - isert_dbg("Cmd %p unmap_sg op\n", isert_cmd); 1513 - isert_unmap_data_buf(isert_conn, &isert_cmd->data); 1645 + if (isert_prot_cmd(conn, se_cmd)) { 1646 + rdma_rw_ctx_destroy_signature(&cmd->rw, conn->qp, 1647 + conn->cm_id->port_num, se_cmd->t_data_sg, 1648 + se_cmd->t_data_nents, se_cmd->t_prot_sg, 1649 + se_cmd->t_prot_nents, dir); 1650 + } else { 1651 + rdma_rw_ctx_destroy(&cmd->rw, conn->qp, conn->cm_id->port_num, 1652 + se_cmd->t_data_sg, se_cmd->t_data_nents, dir); 1514 1653 } 1515 1654 1516 - if (isert_cmd->rdma_wr) { 1517 - isert_dbg("Cmd %p free send_wr\n", isert_cmd); 1518 - kfree(isert_cmd->rdma_wr); 1519 - isert_cmd->rdma_wr = NULL; 1520 - } 1521 - 1522 - if (isert_cmd->ib_sge) { 1523 - isert_dbg("Cmd %p free ib_sge\n", isert_cmd); 1524 - kfree(isert_cmd->ib_sge); 1525 - isert_cmd->ib_sge = NULL; 1526 - } 1527 - } 1528 - 1529 - static void 1530 - isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) 1531 - { 1532 - isert_dbg("Cmd %p\n", isert_cmd); 1533 - 1534 - if (isert_cmd->fr_desc) { 1535 - isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd, isert_cmd->fr_desc); 1536 - if (isert_cmd->fr_desc->ind & ISERT_PROTECTED) { 1537 - isert_unmap_data_buf(isert_conn, &isert_cmd->prot); 1538 - isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED; 1539 - } 1540 - spin_lock_bh(&isert_conn->pool_lock); 1541 - list_add_tail(&isert_cmd->fr_desc->list, &isert_conn->fr_pool); 1542 - spin_unlock_bh(&isert_conn->pool_lock); 1543 - isert_cmd->fr_desc = NULL; 1544 - } 1545 - 1546 - if (isert_cmd->data.sg) { 1547 - isert_dbg("Cmd %p unmap_sg op\n", isert_cmd); 1548 - isert_unmap_data_buf(isert_conn, &isert_cmd->data); 1549 - } 1550 - 1551 - isert_cmd->ib_sge = NULL; 1552 - isert_cmd->rdma_wr = NULL; 1655 + cmd->rw.nr_ops = 0; 1553 1656 } 1554 1657 1555 1658 static void ··· 1487 1732 struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; 1488 1733 struct isert_conn *isert_conn = isert_cmd->conn; 1489 1734 struct iscsi_conn *conn = isert_conn->conn; 1490 - struct isert_device *device = isert_conn->device; 1491 1735 struct iscsi_text_rsp *hdr; 1492 1736 1493 1737 isert_dbg("Cmd %p\n", isert_cmd); ··· 1514 1760 } 1515 1761 } 1516 1762 1517 - device->unreg_rdma_mem(isert_cmd, isert_conn); 1763 + isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn); 1518 1764 transport_generic_free_cmd(&cmd->se_cmd, 0); 1519 1765 break; 1520 1766 case ISCSI_OP_SCSI_TMFUNC: ··· 1648 1894 1649 1895 isert_dbg("Cmd %p\n", isert_cmd); 1650 1896 1651 - if (isert_cmd->fr_desc && isert_cmd->fr_desc->ind & ISERT_PROTECTED) { 1652 - ret = isert_check_pi_status(cmd, 1653 - isert_cmd->fr_desc->pi_ctx->sig_mr); 1654 - isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED; 1655 - } 1897 + ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr); 1898 + isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn); 1656 1899 1657 - device->unreg_rdma_mem(isert_cmd, isert_conn); 1658 - isert_cmd->rdma_wr_num = 0; 1659 1900 if (ret) 1660 1901 transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0); 1661 1902 else ··· 1678 1929 1679 1930 isert_dbg("Cmd %p\n", isert_cmd); 1680 1931 1681 - if (isert_cmd->fr_desc && isert_cmd->fr_desc->ind & ISERT_PROTECTED) { 1682 - ret = isert_check_pi_status(se_cmd, 1683 - isert_cmd->fr_desc->pi_ctx->sig_mr); 1684 - isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED; 1685 - } 1686 - 1687 1932 iscsit_stop_dataout_timer(cmd); 1688 - device->unreg_rdma_mem(isert_cmd, isert_conn); 1689 - cmd->write_data_done = isert_cmd->data.len; 1690 - isert_cmd->rdma_wr_num = 0; 1933 + 1934 + if (isert_prot_cmd(isert_conn, se_cmd)) 1935 + ret = isert_check_pi_status(se_cmd, isert_cmd->rw.sig->sig_mr); 1936 + isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn); 1937 + cmd->write_data_done = 0; 1691 1938 1692 1939 isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); 1693 1940 spin_lock_bh(&cmd->istate_lock); ··· 1856 2111 { 1857 2112 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); 1858 2113 struct isert_conn *isert_conn = conn->context; 1859 - struct isert_device *device = isert_conn->device; 1860 2114 1861 2115 spin_lock_bh(&conn->cmd_lock); 1862 2116 if (!list_empty(&cmd->i_conn_node)) ··· 1864 2120 1865 2121 if (cmd->data_direction == DMA_TO_DEVICE) 1866 2122 iscsit_stop_dataout_timer(cmd); 1867 - 1868 - device->unreg_rdma_mem(isert_cmd, isert_conn); 2123 + isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn); 1869 2124 } 1870 2125 1871 2126 static enum target_prot_op ··· 2017 2274 return isert_post_response(isert_conn, isert_cmd); 2018 2275 } 2019 2276 2020 - static int 2021 - isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, 2022 - struct ib_sge *ib_sge, struct ib_rdma_wr *rdma_wr, 2023 - u32 data_left, u32 offset) 2024 - { 2025 - struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; 2026 - struct scatterlist *sg_start, *tmp_sg; 2027 - struct isert_device *device = isert_conn->device; 2028 - struct ib_device *ib_dev = device->ib_device; 2029 - u32 sg_off, page_off; 2030 - int i = 0, sg_nents; 2031 - 2032 - sg_off = offset / PAGE_SIZE; 2033 - sg_start = &cmd->se_cmd.t_data_sg[sg_off]; 2034 - sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge); 2035 - page_off = offset % PAGE_SIZE; 2036 - 2037 - rdma_wr->wr.sg_list = ib_sge; 2038 - rdma_wr->wr.wr_cqe = &isert_cmd->tx_desc.tx_cqe; 2039 - 2040 - /* 2041 - * Perform mapping of TCM scatterlist memory ib_sge dma_addr. 2042 - */ 2043 - for_each_sg(sg_start, tmp_sg, sg_nents, i) { 2044 - isert_dbg("RDMA from SGL dma_addr: 0x%llx dma_len: %u, " 2045 - "page_off: %u\n", 2046 - (unsigned long long)tmp_sg->dma_address, 2047 - tmp_sg->length, page_off); 2048 - 2049 - ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off; 2050 - ib_sge->length = min_t(u32, data_left, 2051 - ib_sg_dma_len(ib_dev, tmp_sg) - page_off); 2052 - ib_sge->lkey = device->pd->local_dma_lkey; 2053 - 2054 - isert_dbg("RDMA ib_sge: addr: 0x%llx length: %u lkey: %x\n", 2055 - ib_sge->addr, ib_sge->length, ib_sge->lkey); 2056 - page_off = 0; 2057 - data_left -= ib_sge->length; 2058 - if (!data_left) 2059 - break; 2060 - ib_sge++; 2061 - isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge); 2062 - } 2063 - 2064 - rdma_wr->wr.num_sge = ++i; 2065 - isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n", 2066 - rdma_wr->wr.sg_list, rdma_wr->wr.num_sge); 2067 - 2068 - return rdma_wr->wr.num_sge; 2069 - } 2070 - 2071 - static int 2072 - isert_map_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn) 2073 - { 2074 - struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; 2075 - struct se_cmd *se_cmd = &cmd->se_cmd; 2076 - struct isert_conn *isert_conn = conn->context; 2077 - struct isert_data_buf *data = &isert_cmd->data; 2078 - struct ib_rdma_wr *rdma_wr; 2079 - struct ib_sge *ib_sge; 2080 - u32 offset, data_len, data_left, rdma_write_max, va_offset = 0; 2081 - int ret = 0, i, ib_sge_cnt; 2082 - 2083 - offset = isert_cmd->iser_ib_op == ISER_IB_RDMA_READ ? 2084 - cmd->write_data_done : 0; 2085 - ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg, 2086 - se_cmd->t_data_nents, se_cmd->data_length, 2087 - offset, isert_cmd->iser_ib_op, 2088 - &isert_cmd->data); 2089 - if (ret) 2090 - return ret; 2091 - 2092 - data_left = data->len; 2093 - offset = data->offset; 2094 - 2095 - ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL); 2096 - if (!ib_sge) { 2097 - isert_warn("Unable to allocate ib_sge\n"); 2098 - ret = -ENOMEM; 2099 - goto unmap_cmd; 2100 - } 2101 - isert_cmd->ib_sge = ib_sge; 2102 - 2103 - isert_cmd->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); 2104 - isert_cmd->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) * 2105 - isert_cmd->rdma_wr_num, GFP_KERNEL); 2106 - if (!isert_cmd->rdma_wr) { 2107 - isert_dbg("Unable to allocate isert_cmd->rdma_wr\n"); 2108 - ret = -ENOMEM; 2109 - goto unmap_cmd; 2110 - } 2111 - 2112 - rdma_write_max = isert_conn->max_sge * PAGE_SIZE; 2113 - 2114 - for (i = 0; i < isert_cmd->rdma_wr_num; i++) { 2115 - rdma_wr = &isert_cmd->rdma_wr[i]; 2116 - data_len = min(data_left, rdma_write_max); 2117 - 2118 - rdma_wr->wr.send_flags = 0; 2119 - if (isert_cmd->iser_ib_op == ISER_IB_RDMA_WRITE) { 2120 - isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done; 2121 - 2122 - rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 2123 - rdma_wr->remote_addr = isert_cmd->read_va + offset; 2124 - rdma_wr->rkey = isert_cmd->read_stag; 2125 - if (i + 1 == isert_cmd->rdma_wr_num) 2126 - rdma_wr->wr.next = &isert_cmd->tx_desc.send_wr; 2127 - else 2128 - rdma_wr->wr.next = &isert_cmd->rdma_wr[i + 1].wr; 2129 - } else { 2130 - isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done; 2131 - 2132 - rdma_wr->wr.opcode = IB_WR_RDMA_READ; 2133 - rdma_wr->remote_addr = isert_cmd->write_va + va_offset; 2134 - rdma_wr->rkey = isert_cmd->write_stag; 2135 - if (i + 1 == isert_cmd->rdma_wr_num) 2136 - rdma_wr->wr.send_flags = IB_SEND_SIGNALED; 2137 - else 2138 - rdma_wr->wr.next = &isert_cmd->rdma_wr[i + 1].wr; 2139 - } 2140 - 2141 - ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, 2142 - rdma_wr, data_len, offset); 2143 - ib_sge += ib_sge_cnt; 2144 - 2145 - offset += data_len; 2146 - va_offset += data_len; 2147 - data_left -= data_len; 2148 - } 2149 - 2150 - return 0; 2151 - unmap_cmd: 2152 - isert_unmap_data_buf(isert_conn, data); 2153 - 2154 - return ret; 2155 - } 2156 - 2157 - static inline void 2158 - isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) 2159 - { 2160 - u32 rkey; 2161 - 2162 - memset(inv_wr, 0, sizeof(*inv_wr)); 2163 - inv_wr->wr_cqe = NULL; 2164 - inv_wr->opcode = IB_WR_LOCAL_INV; 2165 - inv_wr->ex.invalidate_rkey = mr->rkey; 2166 - 2167 - /* Bump the key */ 2168 - rkey = ib_inc_rkey(mr->rkey); 2169 - ib_update_fast_reg_key(mr, rkey); 2170 - } 2171 - 2172 - static int 2173 - isert_fast_reg_mr(struct isert_conn *isert_conn, 2174 - struct fast_reg_descriptor *fr_desc, 2175 - struct isert_data_buf *mem, 2176 - enum isert_indicator ind, 2177 - struct ib_sge *sge) 2178 - { 2179 - struct isert_device *device = isert_conn->device; 2180 - struct ib_device *ib_dev = device->ib_device; 2181 - struct ib_mr *mr; 2182 - struct ib_reg_wr reg_wr; 2183 - struct ib_send_wr inv_wr, *bad_wr, *wr = NULL; 2184 - int ret, n; 2185 - 2186 - if (mem->dma_nents == 1) { 2187 - sge->lkey = device->pd->local_dma_lkey; 2188 - sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]); 2189 - sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]); 2190 - isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n", 2191 - sge->addr, sge->length, sge->lkey); 2192 - return 0; 2193 - } 2194 - 2195 - if (ind == ISERT_DATA_KEY_VALID) 2196 - /* Registering data buffer */ 2197 - mr = fr_desc->data_mr; 2198 - else 2199 - /* Registering protection buffer */ 2200 - mr = fr_desc->pi_ctx->prot_mr; 2201 - 2202 - if (!(fr_desc->ind & ind)) { 2203 - isert_inv_rkey(&inv_wr, mr); 2204 - wr = &inv_wr; 2205 - } 2206 - 2207 - n = ib_map_mr_sg(mr, mem->sg, mem->nents, PAGE_SIZE); 2208 - if (unlikely(n != mem->nents)) { 2209 - isert_err("failed to map mr sg (%d/%d)\n", 2210 - n, mem->nents); 2211 - return n < 0 ? n : -EINVAL; 2212 - } 2213 - 2214 - isert_dbg("Use fr_desc %p sg_nents %d offset %u\n", 2215 - fr_desc, mem->nents, mem->offset); 2216 - 2217 - reg_wr.wr.next = NULL; 2218 - reg_wr.wr.opcode = IB_WR_REG_MR; 2219 - reg_wr.wr.wr_cqe = NULL; 2220 - reg_wr.wr.send_flags = 0; 2221 - reg_wr.wr.num_sge = 0; 2222 - reg_wr.mr = mr; 2223 - reg_wr.key = mr->lkey; 2224 - reg_wr.access = IB_ACCESS_LOCAL_WRITE; 2225 - 2226 - if (!wr) 2227 - wr = &reg_wr.wr; 2228 - else 2229 - wr->next = &reg_wr.wr; 2230 - 2231 - ret = ib_post_send(isert_conn->qp, wr, &bad_wr); 2232 - if (ret) { 2233 - isert_err("fast registration failed, ret:%d\n", ret); 2234 - return ret; 2235 - } 2236 - fr_desc->ind &= ~ind; 2237 - 2238 - sge->lkey = mr->lkey; 2239 - sge->addr = mr->iova; 2240 - sge->length = mr->length; 2241 - 2242 - isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n", 2243 - sge->addr, sge->length, sge->lkey); 2244 - 2245 - return ret; 2246 - } 2247 - 2248 2277 static inline void 2249 2278 isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs, 2250 2279 struct ib_sig_domain *domain) ··· 2041 2526 static int 2042 2527 isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) 2043 2528 { 2529 + memset(sig_attrs, 0, sizeof(*sig_attrs)); 2530 + 2044 2531 switch (se_cmd->prot_op) { 2045 2532 case TARGET_PROT_DIN_INSERT: 2046 2533 case TARGET_PROT_DOUT_STRIP: ··· 2064 2547 return -EINVAL; 2065 2548 } 2066 2549 2550 + sig_attrs->check_mask = 2551 + (se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) | 2552 + (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) | 2553 + (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0); 2067 2554 return 0; 2068 2555 } 2069 2556 2070 - static inline u8 2071 - isert_set_prot_checks(u8 prot_checks) 2072 - { 2073 - return (prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) | 2074 - (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) | 2075 - (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0); 2076 - } 2077 - 2078 2557 static int 2079 - isert_reg_sig_mr(struct isert_conn *isert_conn, 2080 - struct isert_cmd *isert_cmd, 2081 - struct fast_reg_descriptor *fr_desc) 2558 + isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn, 2559 + struct ib_cqe *cqe, struct ib_send_wr *chain_wr) 2082 2560 { 2083 - struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd; 2084 - struct ib_sig_handover_wr sig_wr; 2085 - struct ib_send_wr inv_wr, *bad_wr, *wr = NULL; 2086 - struct pi_context *pi_ctx = fr_desc->pi_ctx; 2087 - struct ib_sig_attrs sig_attrs; 2561 + struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd; 2562 + enum dma_data_direction dir = target_reverse_dma_direction(se_cmd); 2563 + u8 port_num = conn->cm_id->port_num; 2564 + u64 addr; 2565 + u32 rkey, offset; 2088 2566 int ret; 2089 2567 2090 - memset(&sig_attrs, 0, sizeof(sig_attrs)); 2091 - ret = isert_set_sig_attrs(se_cmd, &sig_attrs); 2092 - if (ret) 2093 - goto err; 2094 - 2095 - sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks); 2096 - 2097 - if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) { 2098 - isert_inv_rkey(&inv_wr, pi_ctx->sig_mr); 2099 - wr = &inv_wr; 2568 + if (dir == DMA_FROM_DEVICE) { 2569 + addr = cmd->write_va; 2570 + rkey = cmd->write_stag; 2571 + offset = cmd->iscsi_cmd->write_data_done; 2572 + } else { 2573 + addr = cmd->read_va; 2574 + rkey = cmd->read_stag; 2575 + offset = 0; 2100 2576 } 2101 2577 2102 - memset(&sig_wr, 0, sizeof(sig_wr)); 2103 - sig_wr.wr.opcode = IB_WR_REG_SIG_MR; 2104 - sig_wr.wr.wr_cqe = NULL; 2105 - sig_wr.wr.sg_list = &isert_cmd->ib_sg[DATA]; 2106 - sig_wr.wr.num_sge = 1; 2107 - sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE; 2108 - sig_wr.sig_attrs = &sig_attrs; 2109 - sig_wr.sig_mr = pi_ctx->sig_mr; 2110 - if (se_cmd->t_prot_sg) 2111 - sig_wr.prot = &isert_cmd->ib_sg[PROT]; 2578 + if (isert_prot_cmd(conn, se_cmd)) { 2579 + struct ib_sig_attrs sig_attrs; 2112 2580 2113 - if (!wr) 2114 - wr = &sig_wr.wr; 2115 - else 2116 - wr->next = &sig_wr.wr; 2117 - 2118 - ret = ib_post_send(isert_conn->qp, wr, &bad_wr); 2119 - if (ret) { 2120 - isert_err("fast registration failed, ret:%d\n", ret); 2121 - goto err; 2122 - } 2123 - fr_desc->ind &= ~ISERT_SIG_KEY_VALID; 2124 - 2125 - isert_cmd->ib_sg[SIG].lkey = pi_ctx->sig_mr->lkey; 2126 - isert_cmd->ib_sg[SIG].addr = 0; 2127 - isert_cmd->ib_sg[SIG].length = se_cmd->data_length; 2128 - if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP && 2129 - se_cmd->prot_op != TARGET_PROT_DOUT_INSERT) 2130 - /* 2131 - * We have protection guards on the wire 2132 - * so we need to set a larget transfer 2133 - */ 2134 - isert_cmd->ib_sg[SIG].length += se_cmd->prot_length; 2135 - 2136 - isert_dbg("sig_sge: addr: 0x%llx length: %u lkey: %x\n", 2137 - isert_cmd->ib_sg[SIG].addr, isert_cmd->ib_sg[SIG].length, 2138 - isert_cmd->ib_sg[SIG].lkey); 2139 - err: 2140 - return ret; 2141 - } 2142 - 2143 - static int 2144 - isert_handle_prot_cmd(struct isert_conn *isert_conn, 2145 - struct isert_cmd *isert_cmd) 2146 - { 2147 - struct isert_device *device = isert_conn->device; 2148 - struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd; 2149 - int ret; 2150 - 2151 - if (!isert_cmd->fr_desc->pi_ctx) { 2152 - ret = isert_create_pi_ctx(isert_cmd->fr_desc, 2153 - device->ib_device, 2154 - device->pd); 2155 - if (ret) { 2156 - isert_err("conn %p failed to allocate pi_ctx\n", 2157 - isert_conn); 2158 - return ret; 2159 - } 2160 - } 2161 - 2162 - if (se_cmd->t_prot_sg) { 2163 - ret = isert_map_data_buf(isert_conn, isert_cmd, 2164 - se_cmd->t_prot_sg, 2165 - se_cmd->t_prot_nents, 2166 - se_cmd->prot_length, 2167 - 0, 2168 - isert_cmd->iser_ib_op, 2169 - &isert_cmd->prot); 2170 - if (ret) { 2171 - isert_err("conn %p failed to map protection buffer\n", 2172 - isert_conn); 2173 - return ret; 2174 - } 2175 - 2176 - memset(&isert_cmd->ib_sg[PROT], 0, sizeof(isert_cmd->ib_sg[PROT])); 2177 - ret = isert_fast_reg_mr(isert_conn, isert_cmd->fr_desc, 2178 - &isert_cmd->prot, 2179 - ISERT_PROT_KEY_VALID, 2180 - &isert_cmd->ib_sg[PROT]); 2181 - if (ret) { 2182 - isert_err("conn %p failed to fast reg mr\n", 2183 - isert_conn); 2184 - goto unmap_prot_cmd; 2185 - } 2186 - } 2187 - 2188 - ret = isert_reg_sig_mr(isert_conn, isert_cmd, isert_cmd->fr_desc); 2189 - if (ret) { 2190 - isert_err("conn %p failed to fast reg mr\n", 2191 - isert_conn); 2192 - goto unmap_prot_cmd; 2193 - } 2194 - isert_cmd->fr_desc->ind |= ISERT_PROTECTED; 2195 - 2196 - return 0; 2197 - 2198 - unmap_prot_cmd: 2199 - if (se_cmd->t_prot_sg) 2200 - isert_unmap_data_buf(isert_conn, &isert_cmd->prot); 2201 - 2202 - return ret; 2203 - } 2204 - 2205 - static int 2206 - isert_reg_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn) 2207 - { 2208 - struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; 2209 - struct se_cmd *se_cmd = &cmd->se_cmd; 2210 - struct isert_conn *isert_conn = conn->context; 2211 - struct fast_reg_descriptor *fr_desc = NULL; 2212 - struct ib_rdma_wr *rdma_wr; 2213 - struct ib_sge *ib_sg; 2214 - u32 offset; 2215 - int ret = 0; 2216 - unsigned long flags; 2217 - 2218 - offset = isert_cmd->iser_ib_op == ISER_IB_RDMA_READ ? 2219 - cmd->write_data_done : 0; 2220 - ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg, 2221 - se_cmd->t_data_nents, se_cmd->data_length, 2222 - offset, isert_cmd->iser_ib_op, 2223 - &isert_cmd->data); 2224 - if (ret) 2225 - return ret; 2226 - 2227 - if (isert_cmd->data.dma_nents != 1 || 2228 - isert_prot_cmd(isert_conn, se_cmd)) { 2229 - spin_lock_irqsave(&isert_conn->pool_lock, flags); 2230 - fr_desc = list_first_entry(&isert_conn->fr_pool, 2231 - struct fast_reg_descriptor, list); 2232 - list_del(&fr_desc->list); 2233 - spin_unlock_irqrestore(&isert_conn->pool_lock, flags); 2234 - isert_cmd->fr_desc = fr_desc; 2235 - } 2236 - 2237 - ret = isert_fast_reg_mr(isert_conn, fr_desc, &isert_cmd->data, 2238 - ISERT_DATA_KEY_VALID, &isert_cmd->ib_sg[DATA]); 2239 - if (ret) 2240 - goto unmap_cmd; 2241 - 2242 - if (isert_prot_cmd(isert_conn, se_cmd)) { 2243 - ret = isert_handle_prot_cmd(isert_conn, isert_cmd); 2581 + ret = isert_set_sig_attrs(se_cmd, &sig_attrs); 2244 2582 if (ret) 2245 - goto unmap_cmd; 2583 + return ret; 2246 2584 2247 - ib_sg = &isert_cmd->ib_sg[SIG]; 2585 + WARN_ON_ONCE(offset); 2586 + ret = rdma_rw_ctx_signature_init(&cmd->rw, conn->qp, port_num, 2587 + se_cmd->t_data_sg, se_cmd->t_data_nents, 2588 + se_cmd->t_prot_sg, se_cmd->t_prot_nents, 2589 + &sig_attrs, addr, rkey, dir); 2248 2590 } else { 2249 - ib_sg = &isert_cmd->ib_sg[DATA]; 2591 + ret = rdma_rw_ctx_init(&cmd->rw, conn->qp, port_num, 2592 + se_cmd->t_data_sg, se_cmd->t_data_nents, 2593 + offset, addr, rkey, dir); 2594 + } 2595 + if (ret < 0) { 2596 + isert_err("Cmd: %p failed to prepare RDMA res\n", cmd); 2597 + return ret; 2250 2598 } 2251 2599 2252 - memcpy(&isert_cmd->s_ib_sge, ib_sg, sizeof(*ib_sg)); 2253 - isert_cmd->ib_sge = &isert_cmd->s_ib_sge; 2254 - isert_cmd->rdma_wr_num = 1; 2255 - memset(&isert_cmd->s_rdma_wr, 0, sizeof(isert_cmd->s_rdma_wr)); 2256 - isert_cmd->rdma_wr = &isert_cmd->s_rdma_wr; 2257 - 2258 - rdma_wr = &isert_cmd->s_rdma_wr; 2259 - rdma_wr->wr.sg_list = &isert_cmd->s_ib_sge; 2260 - rdma_wr->wr.num_sge = 1; 2261 - rdma_wr->wr.wr_cqe = &isert_cmd->tx_desc.tx_cqe; 2262 - if (isert_cmd->iser_ib_op == ISER_IB_RDMA_WRITE) { 2263 - isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done; 2264 - 2265 - rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 2266 - rdma_wr->remote_addr = isert_cmd->read_va; 2267 - rdma_wr->rkey = isert_cmd->read_stag; 2268 - rdma_wr->wr.send_flags = !isert_prot_cmd(isert_conn, se_cmd) ? 2269 - 0 : IB_SEND_SIGNALED; 2270 - } else { 2271 - isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done; 2272 - 2273 - rdma_wr->wr.opcode = IB_WR_RDMA_READ; 2274 - rdma_wr->remote_addr = isert_cmd->write_va; 2275 - rdma_wr->rkey = isert_cmd->write_stag; 2276 - rdma_wr->wr.send_flags = IB_SEND_SIGNALED; 2277 - } 2278 - 2279 - return 0; 2280 - 2281 - unmap_cmd: 2282 - if (fr_desc) { 2283 - spin_lock_irqsave(&isert_conn->pool_lock, flags); 2284 - list_add_tail(&fr_desc->list, &isert_conn->fr_pool); 2285 - spin_unlock_irqrestore(&isert_conn->pool_lock, flags); 2286 - } 2287 - isert_unmap_data_buf(isert_conn, &isert_cmd->data); 2288 - 2600 + ret = rdma_rw_ctx_post(&cmd->rw, conn->qp, port_num, cqe, chain_wr); 2601 + if (ret < 0) 2602 + isert_err("Cmd: %p failed to post RDMA res\n", cmd); 2289 2603 return ret; 2290 2604 } 2291 2605 ··· 2126 2778 struct se_cmd *se_cmd = &cmd->se_cmd; 2127 2779 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); 2128 2780 struct isert_conn *isert_conn = conn->context; 2129 - struct isert_device *device = isert_conn->device; 2130 - struct ib_send_wr *wr_failed; 2781 + struct ib_cqe *cqe = NULL; 2782 + struct ib_send_wr *chain_wr = NULL; 2131 2783 int rc; 2132 2784 2133 2785 isert_dbg("Cmd: %p RDMA_WRITE data_length: %u\n", 2134 2786 isert_cmd, se_cmd->data_length); 2135 2787 2136 - isert_cmd->iser_ib_op = ISER_IB_RDMA_WRITE; 2137 - rc = device->reg_rdma_mem(isert_cmd, conn); 2138 - if (rc) { 2139 - isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); 2140 - return rc; 2141 - } 2142 - 2143 - if (!isert_prot_cmd(isert_conn, se_cmd)) { 2788 + if (isert_prot_cmd(isert_conn, se_cmd)) { 2789 + isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done; 2790 + cqe = &isert_cmd->tx_desc.tx_cqe; 2791 + } else { 2144 2792 /* 2145 2793 * Build isert_conn->tx_desc for iSCSI response PDU and attach 2146 2794 */ ··· 2147 2803 isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); 2148 2804 isert_init_send_wr(isert_conn, isert_cmd, 2149 2805 &isert_cmd->tx_desc.send_wr); 2150 - isert_cmd->s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr; 2151 - isert_cmd->rdma_wr_num += 1; 2152 2806 2153 2807 rc = isert_post_recv(isert_conn, isert_cmd->rx_desc); 2154 2808 if (rc) { 2155 2809 isert_err("ib_post_recv failed with %d\n", rc); 2156 2810 return rc; 2157 2811 } 2812 + 2813 + chain_wr = &isert_cmd->tx_desc.send_wr; 2158 2814 } 2159 2815 2160 - rc = ib_post_send(isert_conn->qp, &isert_cmd->rdma_wr->wr, &wr_failed); 2161 - if (rc) 2162 - isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); 2163 - 2164 - if (!isert_prot_cmd(isert_conn, se_cmd)) 2165 - isert_dbg("Cmd: %p posted RDMA_WRITE + Response for iSER Data " 2166 - "READ\n", isert_cmd); 2167 - else 2168 - isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", 2169 - isert_cmd); 2170 - 2816 + isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr); 2817 + isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd); 2171 2818 return 1; 2172 2819 } 2173 2820 2174 2821 static int 2175 2822 isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) 2176 2823 { 2177 - struct se_cmd *se_cmd = &cmd->se_cmd; 2178 2824 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); 2179 - struct isert_conn *isert_conn = conn->context; 2180 - struct isert_device *device = isert_conn->device; 2181 - struct ib_send_wr *wr_failed; 2182 - int rc; 2183 2825 2184 2826 isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n", 2185 - isert_cmd, se_cmd->data_length, cmd->write_data_done); 2186 - isert_cmd->iser_ib_op = ISER_IB_RDMA_READ; 2187 - rc = device->reg_rdma_mem(isert_cmd, conn); 2188 - if (rc) { 2189 - isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); 2190 - return rc; 2191 - } 2827 + isert_cmd, cmd->se_cmd.data_length, cmd->write_data_done); 2192 2828 2193 - rc = ib_post_send(isert_conn->qp, &isert_cmd->rdma_wr->wr, &wr_failed); 2194 - if (rc) 2195 - isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); 2829 + isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done; 2830 + isert_rdma_rw_ctx_post(isert_cmd, conn->context, 2831 + &isert_cmd->tx_desc.tx_cqe, NULL); 2196 2832 2197 2833 isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", 2198 2834 isert_cmd); 2199 - 2200 2835 return 0; 2201 2836 } 2202 2837
+3 -66
drivers/infiniband/ulp/isert/ib_isert.h
··· 3 3 #include <linux/in6.h> 4 4 #include <rdma/ib_verbs.h> 5 5 #include <rdma/rdma_cm.h> 6 + #include <rdma/rw.h> 6 7 #include <scsi/iser.h> 7 8 8 9 ··· 54 53 55 54 #define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2) 56 55 57 - #define ISERT_INFLIGHT_DATAOUTS 8 58 - 59 - #define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \ 60 - (1 + ISERT_INFLIGHT_DATAOUTS) + \ 56 + #define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \ 61 57 ISERT_MAX_TX_MISC_PDUS + \ 62 58 ISERT_MAX_RX_MISC_PDUS) 63 59 ··· 67 69 enum isert_desc_type { 68 70 ISCSI_TX_CONTROL, 69 71 ISCSI_TX_DATAIN 70 - }; 71 - 72 - enum iser_ib_op_code { 73 - ISER_IB_RECV, 74 - ISER_IB_SEND, 75 - ISER_IB_RDMA_WRITE, 76 - ISER_IB_RDMA_READ, 77 72 }; 78 73 79 74 enum iser_conn_state { ··· 109 118 return container_of(cqe, struct iser_tx_desc, tx_cqe); 110 119 } 111 120 112 - 113 - enum isert_indicator { 114 - ISERT_PROTECTED = 1 << 0, 115 - ISERT_DATA_KEY_VALID = 1 << 1, 116 - ISERT_PROT_KEY_VALID = 1 << 2, 117 - ISERT_SIG_KEY_VALID = 1 << 3, 118 - }; 119 - 120 - struct pi_context { 121 - struct ib_mr *prot_mr; 122 - struct ib_mr *sig_mr; 123 - }; 124 - 125 - struct fast_reg_descriptor { 126 - struct list_head list; 127 - struct ib_mr *data_mr; 128 - u8 ind; 129 - struct pi_context *pi_ctx; 130 - }; 131 - 132 - struct isert_data_buf { 133 - struct scatterlist *sg; 134 - int nents; 135 - u32 sg_off; 136 - u32 len; /* cur_rdma_length */ 137 - u32 offset; 138 - unsigned int dma_nents; 139 - enum dma_data_direction dma_dir; 140 - }; 141 - 142 - enum { 143 - DATA = 0, 144 - PROT = 1, 145 - SIG = 2, 146 - }; 147 - 148 121 struct isert_cmd { 149 122 uint32_t read_stag; 150 123 uint32_t write_stag; ··· 121 166 struct iscsi_cmd *iscsi_cmd; 122 167 struct iser_tx_desc tx_desc; 123 168 struct iser_rx_desc *rx_desc; 124 - enum iser_ib_op_code iser_ib_op; 125 - struct ib_sge *ib_sge; 126 - struct ib_sge s_ib_sge; 127 - int rdma_wr_num; 128 - struct ib_rdma_wr *rdma_wr; 129 - struct ib_rdma_wr s_rdma_wr; 130 - struct ib_sge ib_sg[3]; 131 - struct isert_data_buf data; 132 - struct isert_data_buf prot; 133 - struct fast_reg_descriptor *fr_desc; 169 + struct rdma_rw_ctx rw; 134 170 struct work_struct comp_work; 135 171 struct scatterlist sg; 136 172 }; ··· 156 210 struct isert_device *device; 157 211 struct mutex mutex; 158 212 struct kref kref; 159 - struct list_head fr_pool; 160 - int fr_pool_size; 161 - /* lock to protect fastreg pool */ 162 - spinlock_t pool_lock; 163 213 struct work_struct release_work; 164 214 bool logout_posted; 165 215 bool snd_w_inv; ··· 178 236 }; 179 237 180 238 struct isert_device { 181 - int use_fastreg; 182 239 bool pi_capable; 183 240 int refcount; 184 241 struct ib_device *ib_device; ··· 185 244 struct isert_comp *comps; 186 245 int comps_used; 187 246 struct list_head dev_node; 188 - int (*reg_rdma_mem)(struct isert_cmd *isert_cmd, 189 - struct iscsi_conn *conn); 190 - void (*unreg_rdma_mem)(struct isert_cmd *isert_cmd, 191 - struct isert_conn *isert_conn); 192 247 }; 193 248 194 249 struct isert_np {
+176 -53
drivers/infiniband/ulp/srp/ib_srp.c
··· 70 70 static bool allow_ext_sg; 71 71 static bool prefer_fr = true; 72 72 static bool register_always = true; 73 + static bool never_register; 73 74 static int topspin_workarounds = 1; 74 75 75 76 module_param(srp_sg_tablesize, uint, 0444); ··· 99 98 module_param(register_always, bool, 0444); 100 99 MODULE_PARM_DESC(register_always, 101 100 "Use memory registration even for contiguous memory regions"); 101 + 102 + module_param(never_register, bool, 0444); 103 + MODULE_PARM_DESC(never_register, "Never register memory"); 102 104 103 105 static const struct kernel_param_ops srp_tmo_ops; 104 106 ··· 320 316 struct ib_fmr_pool_param fmr_param; 321 317 322 318 memset(&fmr_param, 0, sizeof(fmr_param)); 323 - fmr_param.pool_size = target->scsi_host->can_queue; 319 + fmr_param.pool_size = target->mr_pool_size; 324 320 fmr_param.dirty_watermark = fmr_param.pool_size / 4; 325 321 fmr_param.cache = 1; 326 322 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; ··· 445 441 { 446 442 struct srp_device *dev = target->srp_host->srp_dev; 447 443 448 - return srp_create_fr_pool(dev->dev, dev->pd, 449 - target->scsi_host->can_queue, 444 + return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size, 450 445 dev->max_pages_per_mr); 451 446 } 452 447 453 448 /** 454 449 * srp_destroy_qp() - destroy an RDMA queue pair 455 - * @ch: SRP RDMA channel. 450 + * @qp: RDMA queue pair. 456 451 * 457 452 * Drain the qp before destroying it. This avoids that the receive 458 453 * completion handler can access the queue pair while it is 459 454 * being destroyed. 460 455 */ 461 - static void srp_destroy_qp(struct srp_rdma_ch *ch) 456 + static void srp_destroy_qp(struct ib_qp *qp) 462 457 { 463 - ib_drain_rq(ch->qp); 464 - ib_destroy_qp(ch->qp); 458 + ib_drain_rq(qp); 459 + ib_destroy_qp(qp); 465 460 } 466 461 467 462 static int srp_create_ch_ib(struct srp_rdma_ch *ch) ··· 472 469 struct ib_qp *qp; 473 470 struct ib_fmr_pool *fmr_pool = NULL; 474 471 struct srp_fr_pool *fr_pool = NULL; 475 - const int m = dev->use_fast_reg ? 3 : 1; 472 + const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; 476 473 int ret; 477 474 478 475 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); ··· 533 530 } 534 531 535 532 if (ch->qp) 536 - srp_destroy_qp(ch); 533 + srp_destroy_qp(ch->qp); 537 534 if (ch->recv_cq) 538 535 ib_free_cq(ch->recv_cq); 539 536 if (ch->send_cq) ··· 557 554 return 0; 558 555 559 556 err_qp: 560 - srp_destroy_qp(ch); 557 + srp_destroy_qp(qp); 561 558 562 559 err_send_cq: 563 560 ib_free_cq(send_cq); ··· 600 597 ib_destroy_fmr_pool(ch->fmr_pool); 601 598 } 602 599 603 - srp_destroy_qp(ch); 600 + srp_destroy_qp(ch->qp); 604 601 ib_free_cq(ch->send_cq); 605 602 ib_free_cq(ch->recv_cq); 606 603 ··· 853 850 854 851 for (i = 0; i < target->req_ring_size; ++i) { 855 852 req = &ch->req_ring[i]; 856 - mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), 853 + mr_list = kmalloc(target->mr_per_cmd * sizeof(void *), 857 854 GFP_KERNEL); 858 855 if (!mr_list) 859 856 goto out; ··· 1115 1112 } 1116 1113 1117 1114 /** 1118 - * srp_free_req() - Unmap data and add request to the free request list. 1115 + * srp_free_req() - Unmap data and adjust ch->req_lim. 1119 1116 * @ch: SRP RDMA channel. 1120 1117 * @req: Request to be freed. 1121 1118 * @scmnd: SCSI command associated with @req. ··· 1302 1299 srp_handle_qp_err(cq, wc, "FAST REG"); 1303 1300 } 1304 1301 1302 + /* 1303 + * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset 1304 + * where to start in the first element. If sg_offset_p != NULL then 1305 + * *sg_offset_p is updated to the offset in state->sg[retval] of the first 1306 + * byte that has not yet been mapped. 1307 + */ 1305 1308 static int srp_map_finish_fr(struct srp_map_state *state, 1306 1309 struct srp_request *req, 1307 - struct srp_rdma_ch *ch, int sg_nents) 1310 + struct srp_rdma_ch *ch, int sg_nents, 1311 + unsigned int *sg_offset_p) 1308 1312 { 1309 1313 struct srp_target_port *target = ch->target; 1310 1314 struct srp_device *dev = target->srp_host->srp_dev; ··· 1326 1316 1327 1317 WARN_ON_ONCE(!dev->use_fast_reg); 1328 1318 1329 - if (sg_nents == 0) 1330 - return 0; 1331 - 1332 1319 if (sg_nents == 1 && target->global_mr) { 1333 - srp_map_desc(state, sg_dma_address(state->sg), 1334 - sg_dma_len(state->sg), 1320 + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1321 + 1322 + srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, 1323 + sg_dma_len(state->sg) - sg_offset, 1335 1324 target->global_mr->rkey); 1325 + if (sg_offset_p) 1326 + *sg_offset_p = 0; 1336 1327 return 1; 1337 1328 } 1338 1329 ··· 1344 1333 rkey = ib_inc_rkey(desc->mr->rkey); 1345 1334 ib_update_fast_reg_key(desc->mr, rkey); 1346 1335 1347 - n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size); 1348 - if (unlikely(n < 0)) 1336 + n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, 1337 + dev->mr_page_size); 1338 + if (unlikely(n < 0)) { 1339 + srp_fr_pool_put(ch->fr_pool, &desc, 1); 1340 + pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", 1341 + dev_name(&req->scmnd->device->sdev_gendev), sg_nents, 1342 + sg_offset_p ? *sg_offset_p : -1, n); 1349 1343 return n; 1344 + } 1345 + 1346 + WARN_ON_ONCE(desc->mr->length == 0); 1350 1347 1351 1348 req->reg_cqe.done = srp_reg_mr_err_done; 1352 1349 ··· 1376 1357 desc->mr->length, desc->mr->rkey); 1377 1358 1378 1359 err = ib_post_send(ch->qp, &wr.wr, &bad_wr); 1379 - if (unlikely(err)) 1360 + if (unlikely(err)) { 1361 + WARN_ON_ONCE(err == -ENOMEM); 1380 1362 return err; 1363 + } 1381 1364 1382 1365 return n; 1383 1366 } ··· 1419 1398 /* 1420 1399 * If the last entry of the MR wasn't a full page, then we need to 1421 1400 * close it out and start a new one -- we can only merge at page 1422 - * boundries. 1401 + * boundaries. 1423 1402 */ 1424 1403 ret = 0; 1425 1404 if (len != dev->mr_page_size) ··· 1434 1413 struct scatterlist *sg; 1435 1414 int i, ret; 1436 1415 1437 - state->desc = req->indirect_desc; 1438 1416 state->pages = req->map_page; 1439 1417 state->fmr.next = req->fmr_list; 1440 - state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt; 1418 + state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; 1441 1419 1442 1420 for_each_sg(scat, sg, count, i) { 1443 1421 ret = srp_map_sg_entry(state, ch, sg, i); ··· 1448 1428 if (ret) 1449 1429 return ret; 1450 1430 1451 - req->nmdesc = state->nmdesc; 1452 - 1453 1431 return 0; 1454 1432 } 1455 1433 ··· 1455 1437 struct srp_request *req, struct scatterlist *scat, 1456 1438 int count) 1457 1439 { 1440 + unsigned int sg_offset = 0; 1441 + 1458 1442 state->desc = req->indirect_desc; 1459 1443 state->fr.next = req->fr_list; 1460 - state->fr.end = req->fr_list + ch->target->cmd_sg_cnt; 1444 + state->fr.end = req->fr_list + ch->target->mr_per_cmd; 1461 1445 state->sg = scat; 1446 + 1447 + if (count == 0) 1448 + return 0; 1462 1449 1463 1450 while (count) { 1464 1451 int i, n; 1465 1452 1466 - n = srp_map_finish_fr(state, req, ch, count); 1453 + n = srp_map_finish_fr(state, req, ch, count, &sg_offset); 1467 1454 if (unlikely(n < 0)) 1468 1455 return n; 1469 1456 ··· 1476 1453 for (i = 0; i < n; i++) 1477 1454 state->sg = sg_next(state->sg); 1478 1455 } 1479 - 1480 - req->nmdesc = state->nmdesc; 1481 1456 1482 1457 return 0; 1483 1458 } ··· 1495 1474 ib_sg_dma_len(dev->dev, sg), 1496 1475 target->global_mr->rkey); 1497 1476 } 1498 - 1499 - req->nmdesc = state->nmdesc; 1500 1477 1501 1478 return 0; 1502 1479 } ··· 1528 1509 1529 1510 if (dev->use_fast_reg) { 1530 1511 state.sg = idb_sg; 1531 - sg_set_buf(idb_sg, req->indirect_desc, idb_len); 1512 + sg_init_one(idb_sg, req->indirect_desc, idb_len); 1532 1513 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ 1533 1514 #ifdef CONFIG_NEED_SG_DMA_LENGTH 1534 1515 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1535 1516 #endif 1536 - ret = srp_map_finish_fr(&state, req, ch, 1); 1517 + ret = srp_map_finish_fr(&state, req, ch, 1, NULL); 1537 1518 if (ret < 0) 1538 1519 return ret; 1520 + WARN_ON_ONCE(ret < 1); 1539 1521 } else if (dev->use_fmr) { 1540 1522 state.pages = idb_pages; 1541 1523 state.pages[0] = (req->indirect_dma_addr & ··· 1554 1534 return 0; 1555 1535 } 1556 1536 1537 + #if defined(DYNAMIC_DATA_DEBUG) 1538 + static void srp_check_mapping(struct srp_map_state *state, 1539 + struct srp_rdma_ch *ch, struct srp_request *req, 1540 + struct scatterlist *scat, int count) 1541 + { 1542 + struct srp_device *dev = ch->target->srp_host->srp_dev; 1543 + struct srp_fr_desc **pfr; 1544 + u64 desc_len = 0, mr_len = 0; 1545 + int i; 1546 + 1547 + for (i = 0; i < state->ndesc; i++) 1548 + desc_len += be32_to_cpu(req->indirect_desc[i].len); 1549 + if (dev->use_fast_reg) 1550 + for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) 1551 + mr_len += (*pfr)->mr->length; 1552 + else if (dev->use_fmr) 1553 + for (i = 0; i < state->nmdesc; i++) 1554 + mr_len += be32_to_cpu(req->indirect_desc[i].len); 1555 + if (desc_len != scsi_bufflen(req->scmnd) || 1556 + mr_len > scsi_bufflen(req->scmnd)) 1557 + pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", 1558 + scsi_bufflen(req->scmnd), desc_len, mr_len, 1559 + state->ndesc, state->nmdesc); 1560 + } 1561 + #endif 1562 + 1563 + /** 1564 + * srp_map_data() - map SCSI data buffer onto an SRP request 1565 + * @scmnd: SCSI command to map 1566 + * @ch: SRP RDMA channel 1567 + * @req: SRP request 1568 + * 1569 + * Returns the length in bytes of the SRP_CMD IU or a negative value if 1570 + * mapping failed. 1571 + */ 1557 1572 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, 1558 1573 struct srp_request *req) 1559 1574 { ··· 1656 1601 1657 1602 memset(&state, 0, sizeof(state)); 1658 1603 if (dev->use_fast_reg) 1659 - srp_map_sg_fr(&state, ch, req, scat, count); 1604 + ret = srp_map_sg_fr(&state, ch, req, scat, count); 1660 1605 else if (dev->use_fmr) 1661 - srp_map_sg_fmr(&state, ch, req, scat, count); 1606 + ret = srp_map_sg_fmr(&state, ch, req, scat, count); 1662 1607 else 1663 - srp_map_sg_dma(&state, ch, req, scat, count); 1608 + ret = srp_map_sg_dma(&state, ch, req, scat, count); 1609 + req->nmdesc = state.nmdesc; 1610 + if (ret < 0) 1611 + goto unmap; 1612 + 1613 + #if defined(DYNAMIC_DEBUG) 1614 + { 1615 + DEFINE_DYNAMIC_DEBUG_METADATA(ddm, 1616 + "Memory mapping consistency check"); 1617 + if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT)) 1618 + srp_check_mapping(&state, ch, req, scat, count); 1619 + } 1620 + #endif 1664 1621 1665 1622 /* We've mapped the request, now pull as much of the indirect 1666 1623 * descriptor table as we can into the command buffer. If this ··· 1695 1628 !target->allow_ext_sg)) { 1696 1629 shost_printk(KERN_ERR, target->scsi_host, 1697 1630 "Could not fit S/G list into SRP_CMD\n"); 1698 - return -EIO; 1631 + ret = -EIO; 1632 + goto unmap; 1699 1633 } 1700 1634 1701 1635 count = min(state.ndesc, target->cmd_sg_cnt); ··· 1714 1646 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, 1715 1647 idb_len, &idb_rkey); 1716 1648 if (ret < 0) 1717 - return ret; 1649 + goto unmap; 1718 1650 req->nmdesc++; 1719 1651 } else { 1720 1652 idb_rkey = cpu_to_be32(target->global_mr->rkey); ··· 1740 1672 cmd->buf_fmt = fmt; 1741 1673 1742 1674 return len; 1675 + 1676 + unmap: 1677 + srp_unmap_data(scmnd, ch, req); 1678 + if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size) 1679 + ret = -E2BIG; 1680 + return ret; 1743 1681 } 1744 1682 1745 1683 /* ··· 2638 2564 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; 2639 2565 } 2640 2566 2567 + static int srp_slave_alloc(struct scsi_device *sdev) 2568 + { 2569 + struct Scsi_Host *shost = sdev->host; 2570 + struct srp_target_port *target = host_to_target(shost); 2571 + struct srp_device *srp_dev = target->srp_host->srp_dev; 2572 + struct ib_device *ibdev = srp_dev->dev; 2573 + 2574 + if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) 2575 + blk_queue_virt_boundary(sdev->request_queue, 2576 + ~srp_dev->mr_page_mask); 2577 + 2578 + return 0; 2579 + } 2580 + 2641 2581 static int srp_slave_configure(struct scsi_device *sdev) 2642 2582 { 2643 2583 struct Scsi_Host *shost = sdev->host; ··· 2843 2755 .module = THIS_MODULE, 2844 2756 .name = "InfiniBand SRP initiator", 2845 2757 .proc_name = DRV_NAME, 2758 + .slave_alloc = srp_slave_alloc, 2846 2759 .slave_configure = srp_slave_configure, 2847 2760 .info = srp_target_info, 2848 2761 .queuecommand = srp_queuecommand, ··· 2918 2829 goto out; 2919 2830 } 2920 2831 2921 - pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n", 2832 + pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n", 2922 2833 dev_name(&target->scsi_host->shost_gendev), 2923 2834 srp_sdev_count(target->scsi_host)); 2924 2835 ··· 3250 3161 struct srp_device *srp_dev = host->srp_dev; 3251 3162 struct ib_device *ibdev = srp_dev->dev; 3252 3163 int ret, node_idx, node, cpu, i; 3164 + unsigned int max_sectors_per_mr, mr_per_cmd = 0; 3253 3165 bool multich = false; 3254 3166 3255 3167 target_host = scsi_host_alloc(&srp_template, ··· 3307 3217 target->sg_tablesize = target->cmd_sg_cnt; 3308 3218 } 3309 3219 3220 + if (srp_dev->use_fast_reg || srp_dev->use_fmr) { 3221 + /* 3222 + * FR and FMR can only map one HCA page per entry. If the 3223 + * start address is not aligned on a HCA page boundary two 3224 + * entries will be used for the head and the tail although 3225 + * these two entries combined contain at most one HCA page of 3226 + * data. Hence the "+ 1" in the calculation below. 3227 + * 3228 + * The indirect data buffer descriptor is contiguous so the 3229 + * memory for that buffer will only be registered if 3230 + * register_always is true. Hence add one to mr_per_cmd if 3231 + * register_always has been set. 3232 + */ 3233 + max_sectors_per_mr = srp_dev->max_pages_per_mr << 3234 + (ilog2(srp_dev->mr_page_size) - 9); 3235 + mr_per_cmd = register_always + 3236 + (target->scsi_host->max_sectors + 1 + 3237 + max_sectors_per_mr - 1) / max_sectors_per_mr; 3238 + pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", 3239 + target->scsi_host->max_sectors, 3240 + srp_dev->max_pages_per_mr, srp_dev->mr_page_size, 3241 + max_sectors_per_mr, mr_per_cmd); 3242 + } 3243 + 3310 3244 target_host->sg_tablesize = target->sg_tablesize; 3245 + target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; 3246 + target->mr_per_cmd = mr_per_cmd; 3311 3247 target->indirect_size = target->sg_tablesize * 3312 3248 sizeof (struct srp_direct_buf); 3313 3249 target->max_iu_len = sizeof (struct srp_cmd) + ··· 3530 3414 if (!srp_dev) 3531 3415 return; 3532 3416 3533 - srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 3534 - device->map_phys_fmr && device->unmap_fmr); 3535 - srp_dev->has_fr = (device->attrs.device_cap_flags & 3536 - IB_DEVICE_MEM_MGT_EXTENSIONS); 3537 - if (!srp_dev->has_fmr && !srp_dev->has_fr) 3538 - dev_warn(&device->dev, "neither FMR nor FR is supported\n"); 3539 - 3540 - srp_dev->use_fast_reg = (srp_dev->has_fr && 3541 - (!srp_dev->has_fmr || prefer_fr)); 3542 - srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; 3543 - 3544 3417 /* 3545 3418 * Use the smallest page size supported by the HCA, down to a 3546 3419 * minimum of 4096 bytes. We're unlikely to build large sglists ··· 3540 3435 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); 3541 3436 max_pages_per_mr = device->attrs.max_mr_size; 3542 3437 do_div(max_pages_per_mr, srp_dev->mr_page_size); 3438 + pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, 3439 + device->attrs.max_mr_size, srp_dev->mr_page_size, 3440 + max_pages_per_mr, SRP_MAX_PAGES_PER_MR); 3543 3441 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 3544 3442 max_pages_per_mr); 3443 + 3444 + srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 3445 + device->map_phys_fmr && device->unmap_fmr); 3446 + srp_dev->has_fr = (device->attrs.device_cap_flags & 3447 + IB_DEVICE_MEM_MGT_EXTENSIONS); 3448 + if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { 3449 + dev_warn(&device->dev, "neither FMR nor FR is supported\n"); 3450 + } else if (!never_register && 3451 + device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) { 3452 + srp_dev->use_fast_reg = (srp_dev->has_fr && 3453 + (!srp_dev->has_fmr || prefer_fr)); 3454 + srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; 3455 + } 3456 + 3545 3457 if (srp_dev->use_fast_reg) { 3546 3458 srp_dev->max_pages_per_mr = 3547 3459 min_t(u32, srp_dev->max_pages_per_mr, ··· 3578 3456 if (IS_ERR(srp_dev->pd)) 3579 3457 goto free_dev; 3580 3458 3581 - if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) { 3459 + if (never_register || !register_always || 3460 + (!srp_dev->has_fmr && !srp_dev->has_fr)) { 3582 3461 srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd, 3583 3462 IB_ACCESS_LOCAL_WRITE | 3584 3463 IB_ACCESS_REMOTE_READ |
+2
drivers/infiniband/ulp/srp/ib_srp.h
··· 202 202 char target_name[32]; 203 203 unsigned int scsi_id; 204 204 unsigned int sg_tablesize; 205 + int mr_pool_size; 206 + int mr_per_cmd; 205 207 int queue_size; 206 208 int req_ring_size; 207 209 int comp_vector;
+253 -476
drivers/infiniband/ulp/srpt/ib_srpt.c
··· 765 765 } 766 766 767 767 /** 768 - * srpt_post_send() - Post an IB send request. 769 - * 770 - * Returns zero upon success and a non-zero value upon failure. 771 - */ 772 - static int srpt_post_send(struct srpt_rdma_ch *ch, 773 - struct srpt_send_ioctx *ioctx, int len) 774 - { 775 - struct ib_sge list; 776 - struct ib_send_wr wr, *bad_wr; 777 - struct srpt_device *sdev = ch->sport->sdev; 778 - int ret; 779 - 780 - atomic_inc(&ch->req_lim); 781 - 782 - ret = -ENOMEM; 783 - if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) { 784 - pr_warn("IB send queue full (needed 1)\n"); 785 - goto out; 786 - } 787 - 788 - ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, len, 789 - DMA_TO_DEVICE); 790 - 791 - list.addr = ioctx->ioctx.dma; 792 - list.length = len; 793 - list.lkey = sdev->pd->local_dma_lkey; 794 - 795 - ioctx->ioctx.cqe.done = srpt_send_done; 796 - wr.next = NULL; 797 - wr.wr_cqe = &ioctx->ioctx.cqe; 798 - wr.sg_list = &list; 799 - wr.num_sge = 1; 800 - wr.opcode = IB_WR_SEND; 801 - wr.send_flags = IB_SEND_SIGNALED; 802 - 803 - ret = ib_post_send(ch->qp, &wr, &bad_wr); 804 - 805 - out: 806 - if (ret < 0) { 807 - atomic_inc(&ch->sq_wr_avail); 808 - atomic_dec(&ch->req_lim); 809 - } 810 - return ret; 811 - } 812 - 813 - /** 814 768 * srpt_zerolength_write() - Perform a zero-length RDMA write. 815 769 * 816 770 * A quote from the InfiniBand specification: C9-88: For an HCA responder ··· 797 843 } 798 844 } 799 845 846 + static int srpt_alloc_rw_ctxs(struct srpt_send_ioctx *ioctx, 847 + struct srp_direct_buf *db, int nbufs, struct scatterlist **sg, 848 + unsigned *sg_cnt) 849 + { 850 + enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd); 851 + struct srpt_rdma_ch *ch = ioctx->ch; 852 + struct scatterlist *prev = NULL; 853 + unsigned prev_nents; 854 + int ret, i; 855 + 856 + if (nbufs == 1) { 857 + ioctx->rw_ctxs = &ioctx->s_rw_ctx; 858 + } else { 859 + ioctx->rw_ctxs = kmalloc_array(nbufs, sizeof(*ioctx->rw_ctxs), 860 + GFP_KERNEL); 861 + if (!ioctx->rw_ctxs) 862 + return -ENOMEM; 863 + } 864 + 865 + for (i = ioctx->n_rw_ctx; i < nbufs; i++, db++) { 866 + struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; 867 + u64 remote_addr = be64_to_cpu(db->va); 868 + u32 size = be32_to_cpu(db->len); 869 + u32 rkey = be32_to_cpu(db->key); 870 + 871 + ret = target_alloc_sgl(&ctx->sg, &ctx->nents, size, false, 872 + i < nbufs - 1); 873 + if (ret) 874 + goto unwind; 875 + 876 + ret = rdma_rw_ctx_init(&ctx->rw, ch->qp, ch->sport->port, 877 + ctx->sg, ctx->nents, 0, remote_addr, rkey, dir); 878 + if (ret < 0) { 879 + target_free_sgl(ctx->sg, ctx->nents); 880 + goto unwind; 881 + } 882 + 883 + ioctx->n_rdma += ret; 884 + ioctx->n_rw_ctx++; 885 + 886 + if (prev) { 887 + sg_unmark_end(&prev[prev_nents - 1]); 888 + sg_chain(prev, prev_nents + 1, ctx->sg); 889 + } else { 890 + *sg = ctx->sg; 891 + } 892 + 893 + prev = ctx->sg; 894 + prev_nents = ctx->nents; 895 + 896 + *sg_cnt += ctx->nents; 897 + } 898 + 899 + return 0; 900 + 901 + unwind: 902 + while (--i >= 0) { 903 + struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; 904 + 905 + rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port, 906 + ctx->sg, ctx->nents, dir); 907 + target_free_sgl(ctx->sg, ctx->nents); 908 + } 909 + if (ioctx->rw_ctxs != &ioctx->s_rw_ctx) 910 + kfree(ioctx->rw_ctxs); 911 + return ret; 912 + } 913 + 914 + static void srpt_free_rw_ctxs(struct srpt_rdma_ch *ch, 915 + struct srpt_send_ioctx *ioctx) 916 + { 917 + enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd); 918 + int i; 919 + 920 + for (i = 0; i < ioctx->n_rw_ctx; i++) { 921 + struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; 922 + 923 + rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port, 924 + ctx->sg, ctx->nents, dir); 925 + target_free_sgl(ctx->sg, ctx->nents); 926 + } 927 + 928 + if (ioctx->rw_ctxs != &ioctx->s_rw_ctx) 929 + kfree(ioctx->rw_ctxs); 930 + } 931 + 932 + static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd) 933 + { 934 + /* 935 + * The pointer computations below will only be compiled correctly 936 + * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check 937 + * whether srp_cmd::add_data has been declared as a byte pointer. 938 + */ 939 + BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) && 940 + !__same_type(srp_cmd->add_data[0], (u8)0)); 941 + 942 + /* 943 + * According to the SRP spec, the lower two bits of the 'ADDITIONAL 944 + * CDB LENGTH' field are reserved and the size in bytes of this field 945 + * is four times the value specified in bits 3..7. Hence the "& ~3". 946 + */ 947 + return srp_cmd->add_data + (srp_cmd->add_cdb_len & ~3); 948 + } 949 + 800 950 /** 801 951 * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request. 802 952 * @ioctx: Pointer to the I/O context associated with the request. ··· 916 858 * -ENOMEM when memory allocation fails and zero upon success. 917 859 */ 918 860 static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, 919 - struct srp_cmd *srp_cmd, 920 - enum dma_data_direction *dir, u64 *data_len) 861 + struct srp_cmd *srp_cmd, enum dma_data_direction *dir, 862 + struct scatterlist **sg, unsigned *sg_cnt, u64 *data_len) 921 863 { 922 - struct srp_indirect_buf *idb; 923 - struct srp_direct_buf *db; 924 - unsigned add_cdb_offset; 925 - int ret; 926 - 927 - /* 928 - * The pointer computations below will only be compiled correctly 929 - * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check 930 - * whether srp_cmd::add_data has been declared as a byte pointer. 931 - */ 932 - BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) 933 - && !__same_type(srp_cmd->add_data[0], (u8)0)); 934 - 935 864 BUG_ON(!dir); 936 865 BUG_ON(!data_len); 937 - 938 - ret = 0; 939 - *data_len = 0; 940 866 941 867 /* 942 868 * The lower four bits of the buffer format field contain the DATA-IN 943 869 * buffer descriptor format, and the highest four bits contain the 944 870 * DATA-OUT buffer descriptor format. 945 871 */ 946 - *dir = DMA_NONE; 947 872 if (srp_cmd->buf_fmt & 0xf) 948 873 /* DATA-IN: transfer data from target to initiator (read). */ 949 874 *dir = DMA_FROM_DEVICE; 950 875 else if (srp_cmd->buf_fmt >> 4) 951 876 /* DATA-OUT: transfer data from initiator to target (write). */ 952 877 *dir = DMA_TO_DEVICE; 878 + else 879 + *dir = DMA_NONE; 953 880 954 - /* 955 - * According to the SRP spec, the lower two bits of the 'ADDITIONAL 956 - * CDB LENGTH' field are reserved and the size in bytes of this field 957 - * is four times the value specified in bits 3..7. Hence the "& ~3". 958 - */ 959 - add_cdb_offset = srp_cmd->add_cdb_len & ~3; 881 + /* initialize data_direction early as srpt_alloc_rw_ctxs needs it */ 882 + ioctx->cmd.data_direction = *dir; 883 + 960 884 if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) || 961 885 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) { 962 - ioctx->n_rbuf = 1; 963 - ioctx->rbufs = &ioctx->single_rbuf; 886 + struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd); 964 887 965 - db = (struct srp_direct_buf *)(srp_cmd->add_data 966 - + add_cdb_offset); 967 - memcpy(ioctx->rbufs, db, sizeof(*db)); 968 888 *data_len = be32_to_cpu(db->len); 889 + return srpt_alloc_rw_ctxs(ioctx, db, 1, sg, sg_cnt); 969 890 } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || 970 891 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { 971 - idb = (struct srp_indirect_buf *)(srp_cmd->add_data 972 - + add_cdb_offset); 892 + struct srp_indirect_buf *idb = srpt_get_desc_buf(srp_cmd); 893 + int nbufs = be32_to_cpu(idb->table_desc.len) / 894 + sizeof(struct srp_direct_buf); 973 895 974 - ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db); 975 - 976 - if (ioctx->n_rbuf > 896 + if (nbufs > 977 897 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { 978 898 pr_err("received unsupported SRP_CMD request" 979 899 " type (%u out + %u in != %u / %zu)\n", 980 900 srp_cmd->data_out_desc_cnt, 981 901 srp_cmd->data_in_desc_cnt, 982 902 be32_to_cpu(idb->table_desc.len), 983 - sizeof(*db)); 984 - ioctx->n_rbuf = 0; 985 - ret = -EINVAL; 986 - goto out; 903 + sizeof(struct srp_direct_buf)); 904 + return -EINVAL; 987 905 } 988 906 989 - if (ioctx->n_rbuf == 1) 990 - ioctx->rbufs = &ioctx->single_rbuf; 991 - else { 992 - ioctx->rbufs = 993 - kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC); 994 - if (!ioctx->rbufs) { 995 - ioctx->n_rbuf = 0; 996 - ret = -ENOMEM; 997 - goto out; 998 - } 999 - } 1000 - 1001 - db = idb->desc_list; 1002 - memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db)); 1003 907 *data_len = be32_to_cpu(idb->len); 908 + return srpt_alloc_rw_ctxs(ioctx, idb->desc_list, nbufs, 909 + sg, sg_cnt); 910 + } else { 911 + *data_len = 0; 912 + return 0; 1004 913 } 1005 - out: 1006 - return ret; 1007 914 } 1008 915 1009 916 /** ··· 1072 1049 } 1073 1050 1074 1051 /** 1075 - * srpt_unmap_sg_to_ib_sge() - Unmap an IB SGE list. 1076 - */ 1077 - static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, 1078 - struct srpt_send_ioctx *ioctx) 1079 - { 1080 - struct scatterlist *sg; 1081 - enum dma_data_direction dir; 1082 - 1083 - BUG_ON(!ch); 1084 - BUG_ON(!ioctx); 1085 - BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs); 1086 - 1087 - while (ioctx->n_rdma) 1088 - kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list); 1089 - 1090 - kfree(ioctx->rdma_wrs); 1091 - ioctx->rdma_wrs = NULL; 1092 - 1093 - if (ioctx->mapped_sg_count) { 1094 - sg = ioctx->sg; 1095 - WARN_ON(!sg); 1096 - dir = ioctx->cmd.data_direction; 1097 - BUG_ON(dir == DMA_NONE); 1098 - ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt, 1099 - target_reverse_dma_direction(&ioctx->cmd)); 1100 - ioctx->mapped_sg_count = 0; 1101 - } 1102 - } 1103 - 1104 - /** 1105 - * srpt_map_sg_to_ib_sge() - Map an SG list to an IB SGE list. 1106 - */ 1107 - static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, 1108 - struct srpt_send_ioctx *ioctx) 1109 - { 1110 - struct ib_device *dev = ch->sport->sdev->device; 1111 - struct se_cmd *cmd; 1112 - struct scatterlist *sg, *sg_orig; 1113 - int sg_cnt; 1114 - enum dma_data_direction dir; 1115 - struct ib_rdma_wr *riu; 1116 - struct srp_direct_buf *db; 1117 - dma_addr_t dma_addr; 1118 - struct ib_sge *sge; 1119 - u64 raddr; 1120 - u32 rsize; 1121 - u32 tsize; 1122 - u32 dma_len; 1123 - int count, nrdma; 1124 - int i, j, k; 1125 - 1126 - BUG_ON(!ch); 1127 - BUG_ON(!ioctx); 1128 - cmd = &ioctx->cmd; 1129 - dir = cmd->data_direction; 1130 - BUG_ON(dir == DMA_NONE); 1131 - 1132 - ioctx->sg = sg = sg_orig = cmd->t_data_sg; 1133 - ioctx->sg_cnt = sg_cnt = cmd->t_data_nents; 1134 - 1135 - count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt, 1136 - target_reverse_dma_direction(cmd)); 1137 - if (unlikely(!count)) 1138 - return -EAGAIN; 1139 - 1140 - ioctx->mapped_sg_count = count; 1141 - 1142 - if (ioctx->rdma_wrs && ioctx->n_rdma_wrs) 1143 - nrdma = ioctx->n_rdma_wrs; 1144 - else { 1145 - nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE 1146 - + ioctx->n_rbuf; 1147 - 1148 - ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs), 1149 - GFP_KERNEL); 1150 - if (!ioctx->rdma_wrs) 1151 - goto free_mem; 1152 - 1153 - ioctx->n_rdma_wrs = nrdma; 1154 - } 1155 - 1156 - db = ioctx->rbufs; 1157 - tsize = cmd->data_length; 1158 - dma_len = ib_sg_dma_len(dev, &sg[0]); 1159 - riu = ioctx->rdma_wrs; 1160 - 1161 - /* 1162 - * For each remote desc - calculate the #ib_sge. 1163 - * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then 1164 - * each remote desc rdma_iu is required a rdma wr; 1165 - * else 1166 - * we need to allocate extra rdma_iu to carry extra #ib_sge in 1167 - * another rdma wr 1168 - */ 1169 - for (i = 0, j = 0; 1170 - j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { 1171 - rsize = be32_to_cpu(db->len); 1172 - raddr = be64_to_cpu(db->va); 1173 - riu->remote_addr = raddr; 1174 - riu->rkey = be32_to_cpu(db->key); 1175 - riu->wr.num_sge = 0; 1176 - 1177 - /* calculate how many sge required for this remote_buf */ 1178 - while (rsize > 0 && tsize > 0) { 1179 - 1180 - if (rsize >= dma_len) { 1181 - tsize -= dma_len; 1182 - rsize -= dma_len; 1183 - raddr += dma_len; 1184 - 1185 - if (tsize > 0) { 1186 - ++j; 1187 - if (j < count) { 1188 - sg = sg_next(sg); 1189 - dma_len = ib_sg_dma_len( 1190 - dev, sg); 1191 - } 1192 - } 1193 - } else { 1194 - tsize -= rsize; 1195 - dma_len -= rsize; 1196 - rsize = 0; 1197 - } 1198 - 1199 - ++riu->wr.num_sge; 1200 - 1201 - if (rsize > 0 && 1202 - riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) { 1203 - ++ioctx->n_rdma; 1204 - riu->wr.sg_list = kmalloc_array(riu->wr.num_sge, 1205 - sizeof(*riu->wr.sg_list), 1206 - GFP_KERNEL); 1207 - if (!riu->wr.sg_list) 1208 - goto free_mem; 1209 - 1210 - ++riu; 1211 - riu->wr.num_sge = 0; 1212 - riu->remote_addr = raddr; 1213 - riu->rkey = be32_to_cpu(db->key); 1214 - } 1215 - } 1216 - 1217 - ++ioctx->n_rdma; 1218 - riu->wr.sg_list = kmalloc_array(riu->wr.num_sge, 1219 - sizeof(*riu->wr.sg_list), 1220 - GFP_KERNEL); 1221 - if (!riu->wr.sg_list) 1222 - goto free_mem; 1223 - } 1224 - 1225 - db = ioctx->rbufs; 1226 - tsize = cmd->data_length; 1227 - riu = ioctx->rdma_wrs; 1228 - sg = sg_orig; 1229 - dma_len = ib_sg_dma_len(dev, &sg[0]); 1230 - dma_addr = ib_sg_dma_address(dev, &sg[0]); 1231 - 1232 - /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */ 1233 - for (i = 0, j = 0; 1234 - j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { 1235 - rsize = be32_to_cpu(db->len); 1236 - sge = riu->wr.sg_list; 1237 - k = 0; 1238 - 1239 - while (rsize > 0 && tsize > 0) { 1240 - sge->addr = dma_addr; 1241 - sge->lkey = ch->sport->sdev->pd->local_dma_lkey; 1242 - 1243 - if (rsize >= dma_len) { 1244 - sge->length = 1245 - (tsize < dma_len) ? tsize : dma_len; 1246 - tsize -= dma_len; 1247 - rsize -= dma_len; 1248 - 1249 - if (tsize > 0) { 1250 - ++j; 1251 - if (j < count) { 1252 - sg = sg_next(sg); 1253 - dma_len = ib_sg_dma_len( 1254 - dev, sg); 1255 - dma_addr = ib_sg_dma_address( 1256 - dev, sg); 1257 - } 1258 - } 1259 - } else { 1260 - sge->length = (tsize < rsize) ? tsize : rsize; 1261 - tsize -= rsize; 1262 - dma_len -= rsize; 1263 - dma_addr += rsize; 1264 - rsize = 0; 1265 - } 1266 - 1267 - ++k; 1268 - if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) { 1269 - ++riu; 1270 - sge = riu->wr.sg_list; 1271 - k = 0; 1272 - } else if (rsize > 0 && tsize > 0) 1273 - ++sge; 1274 - } 1275 - } 1276 - 1277 - return 0; 1278 - 1279 - free_mem: 1280 - srpt_unmap_sg_to_ib_sge(ch, ioctx); 1281 - 1282 - return -ENOMEM; 1283 - } 1284 - 1285 - /** 1286 1052 * srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator. 1287 1053 */ 1288 1054 static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) ··· 1096 1284 BUG_ON(ioctx->ch != ch); 1097 1285 spin_lock_init(&ioctx->spinlock); 1098 1286 ioctx->state = SRPT_STATE_NEW; 1099 - ioctx->n_rbuf = 0; 1100 - ioctx->rbufs = NULL; 1101 1287 ioctx->n_rdma = 0; 1102 - ioctx->n_rdma_wrs = 0; 1103 - ioctx->rdma_wrs = NULL; 1104 - ioctx->mapped_sg_count = 0; 1288 + ioctx->n_rw_ctx = 0; 1105 1289 init_completion(&ioctx->tx_done); 1106 1290 ioctx->queue_status_only = false; 1107 1291 /* ··· 1167 1359 * SRP_RSP sending failed or the SRP_RSP send completion has 1168 1360 * not been received in time. 1169 1361 */ 1170 - srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); 1171 1362 transport_generic_free_cmd(&ioctx->cmd, 0); 1172 1363 break; 1173 1364 case SRPT_STATE_MGMT_RSP_SENT: ··· 1194 1387 1195 1388 WARN_ON(ioctx->n_rdma <= 0); 1196 1389 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); 1390 + ioctx->n_rdma = 0; 1197 1391 1198 1392 if (unlikely(wc->status != IB_WC_SUCCESS)) { 1199 1393 pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n", ··· 1209 1401 else 1210 1402 pr_err("%s[%d]: wrong state = %d\n", __func__, 1211 1403 __LINE__, srpt_get_cmd_state(ioctx)); 1212 - } 1213 - 1214 - static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) 1215 - { 1216 - struct srpt_send_ioctx *ioctx = 1217 - container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); 1218 - 1219 - if (unlikely(wc->status != IB_WC_SUCCESS)) { 1220 - /* 1221 - * Note: if an RDMA write error completion is received that 1222 - * means that a SEND also has been posted. Defer further 1223 - * processing of the associated command until the send error 1224 - * completion has been received. 1225 - */ 1226 - pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n", 1227 - ioctx, wc->status); 1228 - } 1229 1404 } 1230 1405 1231 1406 /** ··· 1328 1537 { 1329 1538 struct se_cmd *cmd; 1330 1539 struct srp_cmd *srp_cmd; 1540 + struct scatterlist *sg = NULL; 1541 + unsigned sg_cnt = 0; 1331 1542 u64 data_len; 1332 1543 enum dma_data_direction dir; 1333 1544 int rc; ··· 1356 1563 break; 1357 1564 } 1358 1565 1359 - if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) { 1360 - pr_err("0x%llx: parsing SRP descriptor table failed.\n", 1361 - srp_cmd->tag); 1566 + rc = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &sg, &sg_cnt, 1567 + &data_len); 1568 + if (rc) { 1569 + if (rc != -EAGAIN) { 1570 + pr_err("0x%llx: parsing SRP descriptor table failed.\n", 1571 + srp_cmd->tag); 1572 + } 1362 1573 goto release_ioctx; 1363 1574 } 1364 1575 1365 - rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb, 1576 + rc = target_submit_cmd_map_sgls(cmd, ch->sess, srp_cmd->cdb, 1366 1577 &send_ioctx->sense_data[0], 1367 1578 scsilun_to_int(&srp_cmd->lun), data_len, 1368 - TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); 1579 + TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF, 1580 + sg, sg_cnt, NULL, 0, NULL, 0); 1369 1581 if (rc != 0) { 1370 1582 pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc, 1371 1583 srp_cmd->tag); ··· 1462 1664 recv_ioctx->ioctx.dma, srp_max_req_size, 1463 1665 DMA_FROM_DEVICE); 1464 1666 1465 - if (unlikely(ch->state == CH_CONNECTING)) { 1466 - list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); 1467 - goto out; 1468 - } 1667 + if (unlikely(ch->state == CH_CONNECTING)) 1668 + goto out_wait; 1469 1669 1470 1670 if (unlikely(ch->state != CH_LIVE)) 1471 - goto out; 1671 + return; 1472 1672 1473 1673 srp_cmd = recv_ioctx->ioctx.buf; 1474 1674 if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) { 1475 - if (!send_ioctx) 1675 + if (!send_ioctx) { 1676 + if (!list_empty(&ch->cmd_wait_list)) 1677 + goto out_wait; 1476 1678 send_ioctx = srpt_get_send_ioctx(ch); 1477 - if (unlikely(!send_ioctx)) { 1478 - list_add_tail(&recv_ioctx->wait_list, 1479 - &ch->cmd_wait_list); 1480 - goto out; 1481 1679 } 1680 + if (unlikely(!send_ioctx)) 1681 + goto out_wait; 1482 1682 } 1483 1683 1484 1684 switch (srp_cmd->opcode) { ··· 1505 1709 } 1506 1710 1507 1711 srpt_post_recv(ch->sport->sdev, recv_ioctx); 1508 - out: 1509 1712 return; 1713 + 1714 + out_wait: 1715 + list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); 1510 1716 } 1511 1717 1512 1718 static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) ··· 1577 1779 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT && 1578 1780 state != SRPT_STATE_MGMT_RSP_SENT); 1579 1781 1580 - atomic_inc(&ch->sq_wr_avail); 1782 + atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail); 1581 1783 1582 1784 if (wc->status != IB_WC_SUCCESS) 1583 1785 pr_info("sending response for ioctx 0x%p failed" 1584 1786 " with status %d\n", ioctx, wc->status); 1585 1787 1586 1788 if (state != SRPT_STATE_DONE) { 1587 - srpt_unmap_sg_to_ib_sge(ch, ioctx); 1588 1789 transport_generic_free_cmd(&ioctx->cmd, 0); 1589 1790 } else { 1590 1791 pr_err("IB completion has been received too late for" ··· 1629 1832 qp_init->srq = sdev->srq; 1630 1833 qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; 1631 1834 qp_init->qp_type = IB_QPT_RC; 1632 - qp_init->cap.max_send_wr = srp_sq_size; 1633 - qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE; 1835 + /* 1836 + * We divide up our send queue size into half SEND WRs to send the 1837 + * completions, and half R/W contexts to actually do the RDMA 1838 + * READ/WRITE transfers. Note that we need to allocate CQ slots for 1839 + * both both, as RDMA contexts will also post completions for the 1840 + * RDMA READ case. 1841 + */ 1842 + qp_init->cap.max_send_wr = srp_sq_size / 2; 1843 + qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; 1844 + qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd, 1845 + sdev->device->attrs.max_sge); 1846 + qp_init->port_num = ch->sport->port; 1634 1847 1635 1848 ch->qp = ib_create_qp(sdev->pd, qp_init); 1636 1849 if (IS_ERR(ch->qp)) { ··· 2193 2386 return ret; 2194 2387 } 2195 2388 2196 - /** 2197 - * srpt_perform_rdmas() - Perform IB RDMA. 2198 - * 2199 - * Returns zero upon success or a negative number upon failure. 2200 - */ 2201 - static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, 2202 - struct srpt_send_ioctx *ioctx) 2203 - { 2204 - struct ib_send_wr *bad_wr; 2205 - int sq_wr_avail, ret, i; 2206 - enum dma_data_direction dir; 2207 - const int n_rdma = ioctx->n_rdma; 2208 - 2209 - dir = ioctx->cmd.data_direction; 2210 - if (dir == DMA_TO_DEVICE) { 2211 - /* write */ 2212 - ret = -ENOMEM; 2213 - sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail); 2214 - if (sq_wr_avail < 0) { 2215 - pr_warn("IB send queue full (needed %d)\n", 2216 - n_rdma); 2217 - goto out; 2218 - } 2219 - } 2220 - 2221 - for (i = 0; i < n_rdma; i++) { 2222 - struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr; 2223 - 2224 - wr->opcode = (dir == DMA_FROM_DEVICE) ? 2225 - IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; 2226 - 2227 - if (i == n_rdma - 1) { 2228 - /* only get completion event for the last rdma read */ 2229 - if (dir == DMA_TO_DEVICE) { 2230 - wr->send_flags = IB_SEND_SIGNALED; 2231 - ioctx->rdma_cqe.done = srpt_rdma_read_done; 2232 - } else { 2233 - ioctx->rdma_cqe.done = srpt_rdma_write_done; 2234 - } 2235 - wr->wr_cqe = &ioctx->rdma_cqe; 2236 - wr->next = NULL; 2237 - } else { 2238 - wr->wr_cqe = NULL; 2239 - wr->next = &ioctx->rdma_wrs[i + 1].wr; 2240 - } 2241 - } 2242 - 2243 - ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr); 2244 - if (ret) 2245 - pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n", 2246 - __func__, __LINE__, ret, i, n_rdma); 2247 - out: 2248 - if (unlikely(dir == DMA_TO_DEVICE && ret < 0)) 2249 - atomic_add(n_rdma, &ch->sq_wr_avail); 2250 - return ret; 2251 - } 2252 - 2253 - /** 2254 - * srpt_xfer_data() - Start data transfer from initiator to target. 2255 - */ 2256 - static int srpt_xfer_data(struct srpt_rdma_ch *ch, 2257 - struct srpt_send_ioctx *ioctx) 2258 - { 2259 - int ret; 2260 - 2261 - ret = srpt_map_sg_to_ib_sge(ch, ioctx); 2262 - if (ret) { 2263 - pr_err("%s[%d] ret=%d\n", __func__, __LINE__, ret); 2264 - goto out; 2265 - } 2266 - 2267 - ret = srpt_perform_rdmas(ch, ioctx); 2268 - if (ret) { 2269 - if (ret == -EAGAIN || ret == -ENOMEM) 2270 - pr_info("%s[%d] queue full -- ret=%d\n", 2271 - __func__, __LINE__, ret); 2272 - else 2273 - pr_err("%s[%d] fatal error -- ret=%d\n", 2274 - __func__, __LINE__, ret); 2275 - goto out_unmap; 2276 - } 2277 - 2278 - out: 2279 - return ret; 2280 - out_unmap: 2281 - srpt_unmap_sg_to_ib_sge(ch, ioctx); 2282 - goto out; 2283 - } 2284 - 2285 2389 static int srpt_write_pending_status(struct se_cmd *se_cmd) 2286 2390 { 2287 2391 struct srpt_send_ioctx *ioctx; ··· 2209 2491 struct srpt_send_ioctx *ioctx = 2210 2492 container_of(se_cmd, struct srpt_send_ioctx, cmd); 2211 2493 struct srpt_rdma_ch *ch = ioctx->ch; 2494 + struct ib_send_wr *first_wr = NULL, *bad_wr; 2495 + struct ib_cqe *cqe = &ioctx->rdma_cqe; 2212 2496 enum srpt_command_state new_state; 2497 + int ret, i; 2213 2498 2214 2499 new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); 2215 2500 WARN_ON(new_state == SRPT_STATE_DONE); 2216 - return srpt_xfer_data(ch, ioctx); 2501 + 2502 + if (atomic_sub_return(ioctx->n_rdma, &ch->sq_wr_avail) < 0) { 2503 + pr_warn("%s: IB send queue full (needed %d)\n", 2504 + __func__, ioctx->n_rdma); 2505 + ret = -ENOMEM; 2506 + goto out_undo; 2507 + } 2508 + 2509 + cqe->done = srpt_rdma_read_done; 2510 + for (i = ioctx->n_rw_ctx - 1; i >= 0; i--) { 2511 + struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; 2512 + 2513 + first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp, ch->sport->port, 2514 + cqe, first_wr); 2515 + cqe = NULL; 2516 + } 2517 + 2518 + ret = ib_post_send(ch->qp, first_wr, &bad_wr); 2519 + if (ret) { 2520 + pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n", 2521 + __func__, ret, ioctx->n_rdma, 2522 + atomic_read(&ch->sq_wr_avail)); 2523 + goto out_undo; 2524 + } 2525 + 2526 + return 0; 2527 + out_undo: 2528 + atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); 2529 + return ret; 2217 2530 } 2218 2531 2219 2532 static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) ··· 2266 2517 */ 2267 2518 static void srpt_queue_response(struct se_cmd *cmd) 2268 2519 { 2269 - struct srpt_rdma_ch *ch; 2270 - struct srpt_send_ioctx *ioctx; 2520 + struct srpt_send_ioctx *ioctx = 2521 + container_of(cmd, struct srpt_send_ioctx, cmd); 2522 + struct srpt_rdma_ch *ch = ioctx->ch; 2523 + struct srpt_device *sdev = ch->sport->sdev; 2524 + struct ib_send_wr send_wr, *first_wr = NULL, *bad_wr; 2525 + struct ib_sge sge; 2271 2526 enum srpt_command_state state; 2272 2527 unsigned long flags; 2273 - int ret; 2274 - enum dma_data_direction dir; 2275 - int resp_len; 2528 + int resp_len, ret, i; 2276 2529 u8 srp_tm_status; 2277 2530 2278 - ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); 2279 - ch = ioctx->ch; 2280 2531 BUG_ON(!ch); 2281 2532 2282 2533 spin_lock_irqsave(&ioctx->spinlock, flags); ··· 2303 2554 return; 2304 2555 } 2305 2556 2306 - dir = ioctx->cmd.data_direction; 2307 - 2308 2557 /* For read commands, transfer the data to the initiator. */ 2309 - if (dir == DMA_FROM_DEVICE && ioctx->cmd.data_length && 2558 + if (ioctx->cmd.data_direction == DMA_FROM_DEVICE && 2559 + ioctx->cmd.data_length && 2310 2560 !ioctx->queue_status_only) { 2311 - ret = srpt_xfer_data(ch, ioctx); 2312 - if (ret) { 2313 - pr_err("xfer_data failed for tag %llu\n", 2314 - ioctx->cmd.tag); 2315 - return; 2561 + for (i = ioctx->n_rw_ctx - 1; i >= 0; i--) { 2562 + struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; 2563 + 2564 + first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp, 2565 + ch->sport->port, NULL, 2566 + first_wr ? first_wr : &send_wr); 2316 2567 } 2568 + } else { 2569 + first_wr = &send_wr; 2317 2570 } 2318 2571 2319 2572 if (state != SRPT_STATE_MGMT) ··· 2327 2576 resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status, 2328 2577 ioctx->cmd.tag); 2329 2578 } 2330 - ret = srpt_post_send(ch, ioctx, resp_len); 2331 - if (ret) { 2332 - pr_err("sending cmd response failed for tag %llu\n", 2333 - ioctx->cmd.tag); 2334 - srpt_unmap_sg_to_ib_sge(ch, ioctx); 2335 - srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); 2336 - target_put_sess_cmd(&ioctx->cmd); 2579 + 2580 + atomic_inc(&ch->req_lim); 2581 + 2582 + if (unlikely(atomic_sub_return(1 + ioctx->n_rdma, 2583 + &ch->sq_wr_avail) < 0)) { 2584 + pr_warn("%s: IB send queue full (needed %d)\n", 2585 + __func__, ioctx->n_rdma); 2586 + ret = -ENOMEM; 2587 + goto out; 2337 2588 } 2589 + 2590 + ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, resp_len, 2591 + DMA_TO_DEVICE); 2592 + 2593 + sge.addr = ioctx->ioctx.dma; 2594 + sge.length = resp_len; 2595 + sge.lkey = sdev->pd->local_dma_lkey; 2596 + 2597 + ioctx->ioctx.cqe.done = srpt_send_done; 2598 + send_wr.next = NULL; 2599 + send_wr.wr_cqe = &ioctx->ioctx.cqe; 2600 + send_wr.sg_list = &sge; 2601 + send_wr.num_sge = 1; 2602 + send_wr.opcode = IB_WR_SEND; 2603 + send_wr.send_flags = IB_SEND_SIGNALED; 2604 + 2605 + ret = ib_post_send(ch->qp, first_wr, &bad_wr); 2606 + if (ret < 0) { 2607 + pr_err("%s: sending cmd response failed for tag %llu (%d)\n", 2608 + __func__, ioctx->cmd.tag, ret); 2609 + goto out; 2610 + } 2611 + 2612 + return; 2613 + 2614 + out: 2615 + atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail); 2616 + atomic_dec(&ch->req_lim); 2617 + srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); 2618 + target_put_sess_cmd(&ioctx->cmd); 2338 2619 } 2339 2620 2340 2621 static int srpt_queue_data_in(struct se_cmd *cmd) ··· 2382 2599 2383 2600 static void srpt_aborted_task(struct se_cmd *cmd) 2384 2601 { 2385 - struct srpt_send_ioctx *ioctx = container_of(cmd, 2386 - struct srpt_send_ioctx, cmd); 2387 - 2388 - srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); 2389 2602 } 2390 2603 2391 2604 static int srpt_queue_status(struct se_cmd *cmd) ··· 2682 2903 unsigned long flags; 2683 2904 2684 2905 WARN_ON(ioctx->state != SRPT_STATE_DONE); 2685 - WARN_ON(ioctx->mapped_sg_count != 0); 2686 2906 2687 - if (ioctx->n_rbuf > 1) { 2688 - kfree(ioctx->rbufs); 2689 - ioctx->rbufs = NULL; 2690 - ioctx->n_rbuf = 0; 2907 + if (ioctx->n_rw_ctx) { 2908 + srpt_free_rw_ctxs(ch, ioctx); 2909 + ioctx->n_rw_ctx = 0; 2691 2910 } 2692 2911 2693 2912 spin_lock_irqsave(&ch->spinlock, flags);
+12 -19
drivers/infiniband/ulp/srpt/ib_srpt.h
··· 42 42 #include <rdma/ib_verbs.h> 43 43 #include <rdma/ib_sa.h> 44 44 #include <rdma/ib_cm.h> 45 + #include <rdma/rw.h> 45 46 46 47 #include <scsi/srp.h> 47 48 ··· 106 105 SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, 107 106 108 107 SRPT_DEF_SG_TABLESIZE = 128, 109 - SRPT_DEF_SG_PER_WQE = 16, 110 108 111 109 MIN_SRPT_SQ_SIZE = 16, 112 110 DEF_SRPT_SQ_SIZE = 4096, ··· 174 174 struct srpt_ioctx ioctx; 175 175 struct list_head wait_list; 176 176 }; 177 + 178 + struct srpt_rw_ctx { 179 + struct rdma_rw_ctx rw; 180 + struct scatterlist *sg; 181 + unsigned int nents; 182 + }; 177 183 178 184 /** 179 185 * struct srpt_send_ioctx - SRPT send I/O context. 180 186 * @ioctx: See above. 181 187 * @ch: Channel pointer. 182 - * @free_list: Node in srpt_rdma_ch.free_list. 183 - * @n_rbuf: Number of data buffers in the received SRP command. 184 - * @rbufs: Pointer to SRP data buffer array. 185 - * @single_rbuf: SRP data buffer if the command has only a single buffer. 186 - * @sg: Pointer to sg-list associated with this I/O context. 187 - * @sg_cnt: SG-list size. 188 - * @mapped_sg_count: ib_dma_map_sg() return value. 189 - * @n_rdma_wrs: Number of elements in the rdma_wrs array. 190 - * @rdma_wrs: Array with information about the RDMA mapping. 191 - * @tag: Tag of the received SRP information unit. 192 188 * @spinlock: Protects 'state'. 193 189 * @state: I/O context state. 194 190 * @cmd: Target core command data structure. ··· 193 197 struct srpt_send_ioctx { 194 198 struct srpt_ioctx ioctx; 195 199 struct srpt_rdma_ch *ch; 196 - struct ib_rdma_wr *rdma_wrs; 200 + 201 + struct srpt_rw_ctx s_rw_ctx; 202 + struct srpt_rw_ctx *rw_ctxs; 203 + 197 204 struct ib_cqe rdma_cqe; 198 - struct srp_direct_buf *rbufs; 199 - struct srp_direct_buf single_rbuf; 200 - struct scatterlist *sg; 201 205 struct list_head free_list; 202 206 spinlock_t spinlock; 203 207 enum srpt_command_state state; 204 208 struct se_cmd cmd; 205 209 struct completion tx_done; 206 - int sg_cnt; 207 - int mapped_sg_count; 208 - u16 n_rdma_wrs; 209 210 u8 n_rdma; 210 - u8 n_rbuf; 211 + u8 n_rw_ctx; 211 212 bool queue_status_only; 212 213 u8 sense_data[TRANSPORT_SENSE_BUFFER]; 213 214 };
+4
drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
··· 1392 1392 #define T5_ULP_MEMIO_ORDER_V(x) ((x) << T5_ULP_MEMIO_ORDER_S) 1393 1393 #define T5_ULP_MEMIO_ORDER_F T5_ULP_MEMIO_ORDER_V(1U) 1394 1394 1395 + #define T5_ULP_MEMIO_FID_S 4 1396 + #define T5_ULP_MEMIO_FID_M 0x7ff 1397 + #define T5_ULP_MEMIO_FID_V(x) ((x) << T5_ULP_MEMIO_FID_S) 1398 + 1395 1399 /* ulp_mem_io.lock_addr fields */ 1396 1400 #define ULP_MEMIO_ADDR_S 0 1397 1401 #define ULP_MEMIO_ADDR_V(x) ((x) << ULP_MEMIO_ADDR_S)
+59
drivers/net/ethernet/mellanox/mlx5/core/cq.c
··· 39 39 #include <linux/mlx5/cq.h> 40 40 #include "mlx5_core.h" 41 41 42 + #define TASKLET_MAX_TIME 2 43 + #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) 44 + 45 + void mlx5_cq_tasklet_cb(unsigned long data) 46 + { 47 + unsigned long flags; 48 + unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES; 49 + struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data; 50 + struct mlx5_core_cq *mcq; 51 + struct mlx5_core_cq *temp; 52 + 53 + spin_lock_irqsave(&ctx->lock, flags); 54 + list_splice_tail_init(&ctx->list, &ctx->process_list); 55 + spin_unlock_irqrestore(&ctx->lock, flags); 56 + 57 + list_for_each_entry_safe(mcq, temp, &ctx->process_list, 58 + tasklet_ctx.list) { 59 + list_del_init(&mcq->tasklet_ctx.list); 60 + mcq->tasklet_ctx.comp(mcq); 61 + if (atomic_dec_and_test(&mcq->refcount)) 62 + complete(&mcq->free); 63 + if (time_after(jiffies, end)) 64 + break; 65 + } 66 + 67 + if (!list_empty(&ctx->process_list)) 68 + tasklet_schedule(&ctx->task); 69 + } 70 + 71 + static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) 72 + { 73 + unsigned long flags; 74 + struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; 75 + 76 + spin_lock_irqsave(&tasklet_ctx->lock, flags); 77 + /* When migrating CQs between EQs will be implemented, please note 78 + * that you need to sync this point. It is possible that 79 + * while migrating a CQ, completions on the old EQs could 80 + * still arrive. 81 + */ 82 + if (list_empty_careful(&cq->tasklet_ctx.list)) { 83 + atomic_inc(&cq->refcount); 84 + list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); 85 + } 86 + spin_unlock_irqrestore(&tasklet_ctx->lock, flags); 87 + } 88 + 42 89 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) 43 90 { 44 91 struct mlx5_core_cq *cq; ··· 143 96 struct mlx5_create_cq_mbox_out out; 144 97 struct mlx5_destroy_cq_mbox_in din; 145 98 struct mlx5_destroy_cq_mbox_out dout; 99 + int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), 100 + c_eqn); 101 + struct mlx5_eq *eq; 102 + 103 + eq = mlx5_eqn2eq(dev, eqn); 104 + if (IS_ERR(eq)) 105 + return PTR_ERR(eq); 146 106 147 107 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ); 148 108 memset(&out, 0, sizeof(out)); ··· 165 111 cq->arm_sn = 0; 166 112 atomic_set(&cq->refcount, 1); 167 113 init_completion(&cq->free); 114 + if (!cq->comp) 115 + cq->comp = mlx5_add_cq_to_tasklet; 116 + /* assuming CQ will be deleted before the EQ */ 117 + cq->tasklet_ctx.priv = &eq->tasklet_ctx; 118 + INIT_LIST_HEAD(&cq->tasklet_ctx.list); 168 119 169 120 spin_lock_irq(&table->lock); 170 121 err = radix_tree_insert(&table->tree, cq->cqn, cq);
+11 -1
drivers/net/ethernet/mellanox/mlx5/core/eq.c
··· 202 202 struct mlx5_eqe *eqe; 203 203 int eqes_found = 0; 204 204 int set_ci = 0; 205 - u32 cqn; 205 + u32 cqn = -1; 206 206 u32 rsn; 207 207 u8 port; 208 208 ··· 320 320 321 321 eq_update_ci(eq, 1); 322 322 323 + if (cqn != -1) 324 + tasklet_schedule(&eq->tasklet_ctx.task); 325 + 323 326 return eqes_found; 324 327 } 325 328 ··· 406 403 if (err) 407 404 goto err_irq; 408 405 406 + INIT_LIST_HEAD(&eq->tasklet_ctx.list); 407 + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); 408 + spin_lock_init(&eq->tasklet_ctx.lock); 409 + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, 410 + (unsigned long)&eq->tasklet_ctx); 411 + 409 412 /* EQs are created in ARMED state 410 413 */ 411 414 eq_update_ci(eq, 1); ··· 445 436 mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", 446 437 eq->eqn); 447 438 synchronize_irq(eq->irqn); 439 + tasklet_disable(&eq->tasklet_ctx.task); 448 440 mlx5_buf_free(dev, &eq->buf); 449 441 450 442 return err;
+17
drivers/net/ethernet/mellanox/mlx5/core/main.c
··· 663 663 } 664 664 EXPORT_SYMBOL(mlx5_vector2eqn); 665 665 666 + struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) 667 + { 668 + struct mlx5_eq_table *table = &dev->priv.eq_table; 669 + struct mlx5_eq *eq; 670 + 671 + spin_lock(&table->lock); 672 + list_for_each_entry(eq, &table->comp_eqs_list, list) 673 + if (eq->eqn == eqn) { 674 + spin_unlock(&table->lock); 675 + return eq; 676 + } 677 + 678 + spin_unlock(&table->lock); 679 + 680 + return ERR_PTR(-ENOENT); 681 + } 682 + 666 683 static void free_comp_eqs(struct mlx5_core_dev *dev) 667 684 { 668 685 struct mlx5_eq_table *table = &dev->priv.eq_table;
+2
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
··· 102 102 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); 103 103 cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev); 104 104 u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); 105 + struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); 106 + void mlx5_cq_tasklet_cb(unsigned long data); 105 107 106 108 void mlx5e_init(void); 107 109 void mlx5e_cleanup(void);
+47 -46
drivers/staging/rdma/hfi1/affinity.c
··· 53 53 #include "sdma.h" 54 54 #include "trace.h" 55 55 56 - struct cpu_mask_set { 57 - struct cpumask mask; 58 - struct cpumask used; 59 - uint gen; 60 - }; 61 - 62 - struct hfi1_affinity { 63 - struct cpu_mask_set def_intr; 64 - struct cpu_mask_set rcv_intr; 65 - struct cpu_mask_set proc; 66 - /* spin lock to protect affinity struct */ 67 - spinlock_t lock; 68 - }; 69 - 70 56 /* Name of IRQ types, indexed by enum irq_type */ 71 57 static const char * const irq_type_names[] = { 72 58 "SDMA", ··· 68 82 set->gen = 0; 69 83 } 70 84 85 + /* Initialize non-HT cpu cores mask */ 86 + int init_real_cpu_mask(struct hfi1_devdata *dd) 87 + { 88 + struct hfi1_affinity *info; 89 + int possible, curr_cpu, i, ht; 90 + 91 + info = kzalloc(sizeof(*info), GFP_KERNEL); 92 + if (!info) 93 + return -ENOMEM; 94 + 95 + cpumask_clear(&info->real_cpu_mask); 96 + 97 + /* Start with cpu online mask as the real cpu mask */ 98 + cpumask_copy(&info->real_cpu_mask, cpu_online_mask); 99 + 100 + /* 101 + * Remove HT cores from the real cpu mask. Do this in two steps below. 102 + */ 103 + possible = cpumask_weight(&info->real_cpu_mask); 104 + ht = cpumask_weight(topology_sibling_cpumask( 105 + cpumask_first(&info->real_cpu_mask))); 106 + /* 107 + * Step 1. Skip over the first N HT siblings and use them as the 108 + * "real" cores. Assumes that HT cores are not enumerated in 109 + * succession (except in the single core case). 110 + */ 111 + curr_cpu = cpumask_first(&info->real_cpu_mask); 112 + for (i = 0; i < possible / ht; i++) 113 + curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask); 114 + /* 115 + * Step 2. Remove the remaining HT siblings. Use cpumask_next() to 116 + * skip any gaps. 117 + */ 118 + for (; i < possible; i++) { 119 + cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask); 120 + curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask); 121 + } 122 + 123 + dd->affinity = info; 124 + return 0; 125 + } 126 + 71 127 /* 72 128 * Interrupt affinity. 73 129 * ··· 121 93 * to the node relative 1 as necessary. 122 94 * 123 95 */ 124 - int hfi1_dev_affinity_init(struct hfi1_devdata *dd) 96 + void hfi1_dev_affinity_init(struct hfi1_devdata *dd) 125 97 { 126 98 int node = pcibus_to_node(dd->pcidev->bus); 127 - struct hfi1_affinity *info; 99 + struct hfi1_affinity *info = dd->affinity; 128 100 const struct cpumask *local_mask; 129 - int curr_cpu, possible, i, ht; 101 + int curr_cpu, possible, i; 130 102 131 103 if (node < 0) 132 104 node = numa_node_id(); 133 105 dd->node = node; 134 106 135 - info = kzalloc(sizeof(*info), GFP_KERNEL); 136 - if (!info) 137 - return -ENOMEM; 138 107 spin_lock_init(&info->lock); 139 108 140 109 init_cpu_mask_set(&info->def_intr); ··· 141 116 local_mask = cpumask_of_node(dd->node); 142 117 if (cpumask_first(local_mask) >= nr_cpu_ids) 143 118 local_mask = topology_core_cpumask(0); 144 - /* use local mask as default */ 145 - cpumask_copy(&info->def_intr.mask, local_mask); 146 - /* 147 - * Remove HT cores from the default mask. Do this in two steps below. 148 - */ 149 - possible = cpumask_weight(&info->def_intr.mask); 150 - ht = cpumask_weight(topology_sibling_cpumask( 151 - cpumask_first(&info->def_intr.mask))); 152 - /* 153 - * Step 1. Skip over the first N HT siblings and use them as the 154 - * "real" cores. Assumes that HT cores are not enumerated in 155 - * succession (except in the single core case). 156 - */ 157 - curr_cpu = cpumask_first(&info->def_intr.mask); 158 - for (i = 0; i < possible / ht; i++) 159 - curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); 160 - /* 161 - * Step 2. Remove the remaining HT siblings. Use cpumask_next() to 162 - * skip any gaps. 163 - */ 164 - for (; i < possible; i++) { 165 - cpumask_clear_cpu(curr_cpu, &info->def_intr.mask); 166 - curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); 167 - } 119 + /* Use the "real" cpu mask of this node as the default */ 120 + cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask); 168 121 169 122 /* fill in the receive list */ 170 123 possible = cpumask_weight(&info->def_intr.mask); ··· 170 167 } 171 168 172 169 cpumask_copy(&info->proc.mask, cpu_online_mask); 173 - dd->affinity = info; 174 - return 0; 175 170 } 176 171 177 172 void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
+18 -1
drivers/staging/rdma/hfi1/affinity.h
··· 64 64 AFF_IRQ_LOCAL 65 65 }; 66 66 67 + struct cpu_mask_set { 68 + struct cpumask mask; 69 + struct cpumask used; 70 + uint gen; 71 + }; 72 + 73 + struct hfi1_affinity { 74 + struct cpu_mask_set def_intr; 75 + struct cpu_mask_set rcv_intr; 76 + struct cpu_mask_set proc; 77 + struct cpumask real_cpu_mask; 78 + /* spin lock to protect affinity struct */ 79 + spinlock_t lock; 80 + }; 81 + 67 82 struct hfi1_msix_entry; 68 83 84 + /* Initialize non-HT cpu cores mask */ 85 + int init_real_cpu_mask(struct hfi1_devdata *); 69 86 /* Initialize driver affinity data */ 70 - int hfi1_dev_affinity_init(struct hfi1_devdata *); 87 + void hfi1_dev_affinity_init(struct hfi1_devdata *); 71 88 /* Free driver affinity data */ 72 89 void hfi1_dev_affinity_free(struct hfi1_devdata *); 73 90 /*
+464 -189
drivers/staging/rdma/hfi1/chip.c
··· 123 123 124 124 #define MIN_KERNEL_KCTXTS 2 125 125 #define FIRST_KERNEL_KCTXT 1 126 + /* sizes for both the QP and RSM map tables */ 127 + #define NUM_MAP_ENTRIES 256 126 128 #define NUM_MAP_REGS 32 127 129 128 130 /* Bit offset into the GUID which carries HFI id information */ ··· 1031 1029 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, 1032 1030 int msecs); 1033 1031 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); 1032 + static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); 1034 1033 static void handle_temp_err(struct hfi1_devdata *); 1035 1034 static void dc_shutdown(struct hfi1_devdata *); 1036 1035 static void dc_start(struct hfi1_devdata *); 1036 + static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, 1037 + unsigned int *np); 1037 1038 1038 1039 /* 1039 1040 * Error interrupt table entry. This is used as input to the interrupt ··· 5666 5661 sci = &dd->send_contexts[sw_index]; 5667 5662 5668 5663 /* there is no information for user (PSM) and ack contexts */ 5669 - if (sci->type != SC_KERNEL) 5664 + if ((sci->type != SC_KERNEL) && (sci->type != SC_VL15)) 5670 5665 return -1; 5671 5666 5672 5667 sc = sci->sc; ··· 6204 6199 6205 6200 /* 6206 6201 * Handle host requests from the 8051. 6207 - * 6208 - * This is a work-queue function outside of the interrupt. 6209 6202 */ 6210 - void handle_8051_request(struct work_struct *work) 6203 + static void handle_8051_request(struct hfi1_pportdata *ppd) 6211 6204 { 6212 - struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, 6213 - dc_host_req_work); 6214 6205 struct hfi1_devdata *dd = ppd->dd; 6215 6206 u64 reg; 6216 6207 u16 data = 0; 6217 - u8 type, i, lanes, *cache = ppd->qsfp_info.cache; 6218 - u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS]; 6208 + u8 type; 6219 6209 6220 6210 reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1); 6221 6211 if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0) ··· 6231 6231 case HREQ_READ_CONFIG: 6232 6232 case HREQ_SET_TX_EQ_ABS: 6233 6233 case HREQ_SET_TX_EQ_REL: 6234 + case HREQ_ENABLE: 6234 6235 dd_dev_info(dd, "8051 request: request 0x%x not supported\n", 6235 6236 type); 6236 6237 hreq_response(dd, HREQ_NOT_SUPPORTED, 0); 6237 6238 break; 6238 - 6239 - case HREQ_ENABLE: 6240 - lanes = data & 0xF; 6241 - for (i = 0; lanes; lanes >>= 1, i++) { 6242 - if (!(lanes & 1)) 6243 - continue; 6244 - if (data & 0x200) { 6245 - /* enable TX CDR */ 6246 - if (cache[QSFP_MOD_PWR_OFFS] & 0x8 && 6247 - cache[QSFP_CDR_INFO_OFFS] & 0x80) 6248 - cdr_ctrl_byte |= (1 << (i + 4)); 6249 - } else { 6250 - /* disable TX CDR */ 6251 - if (cache[QSFP_MOD_PWR_OFFS] & 0x8 && 6252 - cache[QSFP_CDR_INFO_OFFS] & 0x80) 6253 - cdr_ctrl_byte &= ~(1 << (i + 4)); 6254 - } 6255 - 6256 - if (data & 0x800) { 6257 - /* enable RX CDR */ 6258 - if (cache[QSFP_MOD_PWR_OFFS] & 0x4 && 6259 - cache[QSFP_CDR_INFO_OFFS] & 0x40) 6260 - cdr_ctrl_byte |= (1 << i); 6261 - } else { 6262 - /* disable RX CDR */ 6263 - if (cache[QSFP_MOD_PWR_OFFS] & 0x4 && 6264 - cache[QSFP_CDR_INFO_OFFS] & 0x40) 6265 - cdr_ctrl_byte &= ~(1 << i); 6266 - } 6267 - } 6268 - one_qsfp_write(ppd, dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS, 6269 - &cdr_ctrl_byte, 1); 6270 - hreq_response(dd, HREQ_SUCCESS, data); 6271 - refresh_qsfp_cache(ppd, &ppd->qsfp_info); 6272 - break; 6273 - 6274 6239 case HREQ_CONFIG_DONE: 6275 6240 hreq_response(dd, HREQ_SUCCESS, 0); 6276 6241 break; ··· 6243 6278 case HREQ_INTERFACE_TEST: 6244 6279 hreq_response(dd, HREQ_SUCCESS, data); 6245 6280 break; 6246 - 6247 6281 default: 6248 6282 dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type); 6249 6283 hreq_response(dd, HREQ_NOT_SUPPORTED, 0); ··· 6813 6849 ppd->neighbor_fm_security = 0; 6814 6850 } 6815 6851 6852 + static const char * const link_down_reason_strs[] = { 6853 + [OPA_LINKDOWN_REASON_NONE] = "None", 6854 + [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0", 6855 + [OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length", 6856 + [OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long", 6857 + [OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short", 6858 + [OPA_LINKDOWN_REASON_BAD_SLID] = "Bad SLID", 6859 + [OPA_LINKDOWN_REASON_BAD_DLID] = "Bad DLID", 6860 + [OPA_LINKDOWN_REASON_BAD_L2] = "Bad L2", 6861 + [OPA_LINKDOWN_REASON_BAD_SC] = "Bad SC", 6862 + [OPA_LINKDOWN_REASON_RCV_ERROR_8] = "Receive error 8", 6863 + [OPA_LINKDOWN_REASON_BAD_MID_TAIL] = "Bad mid tail", 6864 + [OPA_LINKDOWN_REASON_RCV_ERROR_10] = "Receive error 10", 6865 + [OPA_LINKDOWN_REASON_PREEMPT_ERROR] = "Preempt error", 6866 + [OPA_LINKDOWN_REASON_PREEMPT_VL15] = "Preempt vl15", 6867 + [OPA_LINKDOWN_REASON_BAD_VL_MARKER] = "Bad VL marker", 6868 + [OPA_LINKDOWN_REASON_RCV_ERROR_14] = "Receive error 14", 6869 + [OPA_LINKDOWN_REASON_RCV_ERROR_15] = "Receive error 15", 6870 + [OPA_LINKDOWN_REASON_BAD_HEAD_DIST] = "Bad head distance", 6871 + [OPA_LINKDOWN_REASON_BAD_TAIL_DIST] = "Bad tail distance", 6872 + [OPA_LINKDOWN_REASON_BAD_CTRL_DIST] = "Bad control distance", 6873 + [OPA_LINKDOWN_REASON_BAD_CREDIT_ACK] = "Bad credit ack", 6874 + [OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER] = "Unsupported VL marker", 6875 + [OPA_LINKDOWN_REASON_BAD_PREEMPT] = "Bad preempt", 6876 + [OPA_LINKDOWN_REASON_BAD_CONTROL_FLIT] = "Bad control flit", 6877 + [OPA_LINKDOWN_REASON_EXCEED_MULTICAST_LIMIT] = "Exceed multicast limit", 6878 + [OPA_LINKDOWN_REASON_RCV_ERROR_24] = "Receive error 24", 6879 + [OPA_LINKDOWN_REASON_RCV_ERROR_25] = "Receive error 25", 6880 + [OPA_LINKDOWN_REASON_RCV_ERROR_26] = "Receive error 26", 6881 + [OPA_LINKDOWN_REASON_RCV_ERROR_27] = "Receive error 27", 6882 + [OPA_LINKDOWN_REASON_RCV_ERROR_28] = "Receive error 28", 6883 + [OPA_LINKDOWN_REASON_RCV_ERROR_29] = "Receive error 29", 6884 + [OPA_LINKDOWN_REASON_RCV_ERROR_30] = "Receive error 30", 6885 + [OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN] = 6886 + "Excessive buffer overrun", 6887 + [OPA_LINKDOWN_REASON_UNKNOWN] = "Unknown", 6888 + [OPA_LINKDOWN_REASON_REBOOT] = "Reboot", 6889 + [OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN] = "Neighbor unknown", 6890 + [OPA_LINKDOWN_REASON_FM_BOUNCE] = "FM bounce", 6891 + [OPA_LINKDOWN_REASON_SPEED_POLICY] = "Speed policy", 6892 + [OPA_LINKDOWN_REASON_WIDTH_POLICY] = "Width policy", 6893 + [OPA_LINKDOWN_REASON_DISCONNECTED] = "Disconnected", 6894 + [OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED] = 6895 + "Local media not installed", 6896 + [OPA_LINKDOWN_REASON_NOT_INSTALLED] = "Not installed", 6897 + [OPA_LINKDOWN_REASON_CHASSIS_CONFIG] = "Chassis config", 6898 + [OPA_LINKDOWN_REASON_END_TO_END_NOT_INSTALLED] = 6899 + "End to end not installed", 6900 + [OPA_LINKDOWN_REASON_POWER_POLICY] = "Power policy", 6901 + [OPA_LINKDOWN_REASON_LINKSPEED_POLICY] = "Link speed policy", 6902 + [OPA_LINKDOWN_REASON_LINKWIDTH_POLICY] = "Link width policy", 6903 + [OPA_LINKDOWN_REASON_SWITCH_MGMT] = "Switch management", 6904 + [OPA_LINKDOWN_REASON_SMA_DISABLED] = "SMA disabled", 6905 + [OPA_LINKDOWN_REASON_TRANSIENT] = "Transient" 6906 + }; 6907 + 6908 + /* return the neighbor link down reason string */ 6909 + static const char *link_down_reason_str(u8 reason) 6910 + { 6911 + const char *str = NULL; 6912 + 6913 + if (reason < ARRAY_SIZE(link_down_reason_strs)) 6914 + str = link_down_reason_strs[reason]; 6915 + if (!str) 6916 + str = "(invalid)"; 6917 + 6918 + return str; 6919 + } 6920 + 6816 6921 /* 6817 6922 * Handle a link down interrupt from the 8051. 6818 6923 * ··· 6890 6857 void handle_link_down(struct work_struct *work) 6891 6858 { 6892 6859 u8 lcl_reason, neigh_reason = 0; 6860 + u8 link_down_reason; 6893 6861 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, 6894 - link_down_work); 6862 + link_down_work); 6863 + int was_up; 6864 + static const char ldr_str[] = "Link down reason: "; 6895 6865 6896 6866 if ((ppd->host_link_state & 6897 6867 (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) && ··· 6903 6867 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NOT_INSTALLED); 6904 6868 6905 6869 /* Go offline first, then deal with reading/writing through 8051 */ 6870 + was_up = !!(ppd->host_link_state & HLS_UP); 6906 6871 set_link_state(ppd, HLS_DN_OFFLINE); 6907 6872 6908 - lcl_reason = 0; 6909 - read_planned_down_reason_code(ppd->dd, &neigh_reason); 6873 + if (was_up) { 6874 + lcl_reason = 0; 6875 + /* link down reason is only valid if the link was up */ 6876 + read_link_down_reason(ppd->dd, &link_down_reason); 6877 + switch (link_down_reason) { 6878 + case LDR_LINK_TRANSFER_ACTIVE_LOW: 6879 + /* the link went down, no idle message reason */ 6880 + dd_dev_info(ppd->dd, "%sUnexpected link down\n", 6881 + ldr_str); 6882 + break; 6883 + case LDR_RECEIVED_LINKDOWN_IDLE_MSG: 6884 + /* 6885 + * The neighbor reason is only valid if an idle message 6886 + * was received for it. 6887 + */ 6888 + read_planned_down_reason_code(ppd->dd, &neigh_reason); 6889 + dd_dev_info(ppd->dd, 6890 + "%sNeighbor link down message %d, %s\n", 6891 + ldr_str, neigh_reason, 6892 + link_down_reason_str(neigh_reason)); 6893 + break; 6894 + case LDR_RECEIVED_HOST_OFFLINE_REQ: 6895 + dd_dev_info(ppd->dd, 6896 + "%sHost requested link to go offline\n", 6897 + ldr_str); 6898 + break; 6899 + default: 6900 + dd_dev_info(ppd->dd, "%sUnknown reason 0x%x\n", 6901 + ldr_str, link_down_reason); 6902 + break; 6903 + } 6910 6904 6911 - /* 6912 - * If no reason, assume peer-initiated but missed 6913 - * LinkGoingDown idle flits. 6914 - */ 6915 - if (neigh_reason == 0) 6916 - lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN; 6905 + /* 6906 + * If no reason, assume peer-initiated but missed 6907 + * LinkGoingDown idle flits. 6908 + */ 6909 + if (neigh_reason == 0) 6910 + lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN; 6911 + } else { 6912 + /* went down while polling or going up */ 6913 + lcl_reason = OPA_LINKDOWN_REASON_TRANSIENT; 6914 + } 6917 6915 6918 6916 set_link_down_reason(ppd, lcl_reason, neigh_reason, 0); 6917 + 6918 + /* inform the SMA when the link transitions from up to down */ 6919 + if (was_up && ppd->local_link_down_reason.sma == 0 && 6920 + ppd->neigh_link_down_reason.sma == 0) { 6921 + ppd->local_link_down_reason.sma = 6922 + ppd->local_link_down_reason.latest; 6923 + ppd->neigh_link_down_reason.sma = 6924 + ppd->neigh_link_down_reason.latest; 6925 + } 6919 6926 6920 6927 reset_neighbor_info(ppd); 6921 6928 ··· 6969 6890 * If there is no cable attached, turn the DC off. Otherwise, 6970 6891 * start the link bring up. 6971 6892 */ 6972 - if (!qsfp_mod_present(ppd)) { 6893 + if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) { 6973 6894 dc_shutdown(ppd->dd); 6974 6895 } else { 6975 6896 tune_serdes(ppd); ··· 7452 7373 ppd->link_width_downgrade_rx_active = rx; 7453 7374 } 7454 7375 7455 - if (lwde == 0) { 7376 + if (ppd->link_width_downgrade_tx_active == 0 || 7377 + ppd->link_width_downgrade_rx_active == 0) { 7378 + /* the 8051 reported a dead link as a downgrade */ 7379 + dd_dev_err(ppd->dd, "Link downgrade is really a link down, ignoring\n"); 7380 + } else if (lwde == 0) { 7456 7381 /* downgrade is disabled */ 7457 7382 7458 7383 /* bounce if not at starting active width */ ··· 7617 7534 host_msg &= ~(u64)LINKUP_ACHIEVED; 7618 7535 } 7619 7536 if (host_msg & EXT_DEVICE_CFG_REQ) { 7620 - queue_work(ppd->hfi1_wq, &ppd->dc_host_req_work); 7537 + handle_8051_request(ppd); 7621 7538 host_msg &= ~(u64)EXT_DEVICE_CFG_REQ; 7622 7539 } 7623 7540 if (host_msg & VERIFY_CAP_FRAME) { ··· 8743 8660 *pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK; 8744 8661 } 8745 8662 8663 + static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr) 8664 + { 8665 + u32 frame; 8666 + 8667 + read_8051_config(dd, LINK_DOWN_REASON, GENERAL_CONFIG, &frame); 8668 + *ldr = (frame & 0xff); 8669 + } 8670 + 8746 8671 static int read_tx_settings(struct hfi1_devdata *dd, 8747 8672 u8 *enable_lane_tx, 8748 8673 u8 *tx_polarity_inversion, ··· 9140 9049 } 9141 9050 9142 9051 /* 9143 - * Call this to start the link. Schedule a retry if the cable is not 9144 - * present or if unable to start polling. Do not do anything if the 9145 - * link is disabled. Returns 0 if link is disabled or moved to polling 9052 + * Call this to start the link. 9053 + * Do not do anything if the link is disabled. 9054 + * Returns 0 if link is disabled, moved to polling, or the driver is not ready. 9146 9055 */ 9147 9056 int start_link(struct hfi1_pportdata *ppd) 9148 9057 { ··· 9159 9068 return 0; 9160 9069 } 9161 9070 9162 - if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES || 9163 - loopback == LOOPBACK_LCB || 9164 - ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) 9165 - return set_link_state(ppd, HLS_DN_POLL); 9166 - 9167 - dd_dev_info(ppd->dd, 9168 - "%s: stopping link start because no cable is present\n", 9169 - __func__); 9170 - return -EAGAIN; 9071 + return set_link_state(ppd, HLS_DN_POLL); 9171 9072 } 9172 9073 9173 9074 static void wait_for_qsfp_init(struct hfi1_pportdata *ppd) ··· 9330 9247 return 0; 9331 9248 } 9332 9249 9333 - /* This routine will only be scheduled if the QSFP module is present */ 9250 + /* This routine will only be scheduled if the QSFP module present is asserted */ 9334 9251 void qsfp_event(struct work_struct *work) 9335 9252 { 9336 9253 struct qsfp_data *qd; ··· 9759 9676 & SEND_LEN_CHECK1_LEN_VL15_MASK) << 9760 9677 SEND_LEN_CHECK1_LEN_VL15_SHIFT; 9761 9678 int i; 9679 + u32 thres; 9762 9680 9763 9681 for (i = 0; i < ppd->vls_supported; i++) { 9764 9682 if (dd->vld[i].mtu > maxvlmtu) ··· 9778 9694 /* adjust kernel credit return thresholds based on new MTUs */ 9779 9695 /* all kernel receive contexts have the same hdrqentsize */ 9780 9696 for (i = 0; i < ppd->vls_supported; i++) { 9781 - sc_set_cr_threshold(dd->vld[i].sc, 9782 - sc_mtu_to_threshold(dd->vld[i].sc, 9783 - dd->vld[i].mtu, 9784 - dd->rcd[0]-> 9785 - rcvhdrqentsize)); 9786 - } 9787 - sc_set_cr_threshold(dd->vld[15].sc, 9788 - sc_mtu_to_threshold(dd->vld[15].sc, 9789 - dd->vld[15].mtu, 9697 + thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50), 9698 + sc_mtu_to_threshold(dd->vld[i].sc, 9699 + dd->vld[i].mtu, 9790 9700 dd->rcd[0]->rcvhdrqentsize)); 9701 + sc_set_cr_threshold(dd->vld[i].sc, thres); 9702 + } 9703 + thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50), 9704 + sc_mtu_to_threshold(dd->vld[15].sc, 9705 + dd->vld[15].mtu, 9706 + dd->rcd[0]->rcvhdrqentsize)); 9707 + sc_set_cr_threshold(dd->vld[15].sc, thres); 9791 9708 9792 9709 /* Adjust maximum MTU for the port in DC */ 9793 9710 dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 : ··· 10115 10030 struct hfi1_devdata *dd = ppd->dd; 10116 10031 struct ib_event event = {.device = NULL}; 10117 10032 int ret1, ret = 0; 10118 - int was_up, is_down; 10119 10033 int orig_new_state, poll_bounce; 10120 10034 10121 10035 mutex_lock(&ppd->hls_lock); ··· 10132 10048 link_state_name(orig_new_state), 10133 10049 poll_bounce ? "(bounce) " : "", 10134 10050 link_state_reason_name(ppd, state)); 10135 - 10136 - was_up = !!(ppd->host_link_state & HLS_UP); 10137 10051 10138 10052 /* 10139 10053 * If we're going to a (HLS_*) link state that implies the logical ··· 10341 10259 __func__, state); 10342 10260 ret = -EINVAL; 10343 10261 break; 10344 - } 10345 - 10346 - is_down = !!(ppd->host_link_state & (HLS_DN_POLL | 10347 - HLS_DN_DISABLE | HLS_DN_OFFLINE)); 10348 - 10349 - if (was_up && is_down && ppd->local_link_down_reason.sma == 0 && 10350 - ppd->neigh_link_down_reason.sma == 0) { 10351 - ppd->local_link_down_reason.sma = 10352 - ppd->local_link_down_reason.latest; 10353 - ppd->neigh_link_down_reason.sma = 10354 - ppd->neigh_link_down_reason.latest; 10355 10262 } 10356 10263 10357 10264 goto done; ··· 12744 12673 int total_contexts; 12745 12674 int ret; 12746 12675 unsigned ngroups; 12676 + int qos_rmt_count; 12677 + int user_rmt_reduced; 12747 12678 12748 12679 /* 12749 - * Kernel contexts: (to be fixed later): 12750 - * - min or 2 or 1 context/numa 12680 + * Kernel receive contexts: 12681 + * - min of 2 or 1 context/numa (excluding control context) 12751 12682 * - Context 0 - control context (VL15/multicast/error) 12752 - * - Context 1 - default context 12683 + * - Context 1 - first kernel context 12684 + * - Context 2 - second kernel context 12685 + * ... 12753 12686 */ 12754 12687 if (n_krcvqs) 12755 12688 /* 12756 - * Don't count context 0 in n_krcvqs since 12757 - * is isn't used for normal verbs traffic. 12758 - * 12759 - * krcvqs will reflect number of kernel 12760 - * receive contexts above 0. 12689 + * n_krcvqs is the sum of module parameter kernel receive 12690 + * contexts, krcvqs[]. It does not include the control 12691 + * context, so add that. 12761 12692 */ 12762 - num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1; 12693 + num_kernel_contexts = n_krcvqs + 1; 12763 12694 else 12764 12695 num_kernel_contexts = num_online_nodes() + 1; 12765 12696 num_kernel_contexts = ··· 12778 12705 num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; 12779 12706 } 12780 12707 /* 12781 - * User contexts: (to be fixed later) 12782 - * - default to 1 user context per CPU if num_user_contexts is 12783 - * negative 12708 + * User contexts: 12709 + * - default to 1 user context per real (non-HT) CPU core if 12710 + * num_user_contexts is negative 12784 12711 */ 12785 12712 if (num_user_contexts < 0) 12786 - num_user_contexts = num_online_cpus(); 12713 + num_user_contexts = 12714 + cpumask_weight(&dd->affinity->real_cpu_mask); 12787 12715 12788 12716 total_contexts = num_kernel_contexts + num_user_contexts; 12789 12717 ··· 12798 12724 (int)num_user_contexts); 12799 12725 num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts; 12800 12726 /* recalculate */ 12727 + total_contexts = num_kernel_contexts + num_user_contexts; 12728 + } 12729 + 12730 + /* each user context requires an entry in the RMT */ 12731 + qos_rmt_count = qos_rmt_entries(dd, NULL, NULL); 12732 + if (qos_rmt_count + num_user_contexts > NUM_MAP_ENTRIES) { 12733 + user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count; 12734 + dd_dev_err(dd, 12735 + "RMT size is reducing the number of user receive contexts from %d to %d\n", 12736 + (int)num_user_contexts, 12737 + user_rmt_reduced); 12738 + /* recalculate */ 12739 + num_user_contexts = user_rmt_reduced; 12801 12740 total_contexts = num_kernel_contexts + num_user_contexts; 12802 12741 } 12803 12742 ··· 12863 12776 dd->num_send_contexts = ret; 12864 12777 dd_dev_info( 12865 12778 dd, 12866 - "send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n", 12779 + "send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n", 12867 12780 dd->chip_send_contexts, 12868 12781 dd->num_send_contexts, 12869 12782 dd->sc_sizes[SC_KERNEL].count, 12870 12783 dd->sc_sizes[SC_ACK].count, 12871 - dd->sc_sizes[SC_USER].count); 12784 + dd->sc_sizes[SC_USER].count, 12785 + dd->sc_sizes[SC_VL15].count); 12872 12786 ret = 0; /* success */ 12873 12787 } 12874 12788 ··· 13539 13451 int i; 13540 13452 u64 ctxt = first_ctxt; 13541 13453 13542 - for (i = 0; i < 256;) { 13454 + for (i = 0; i < 256; i++) { 13543 13455 reg |= ctxt << (8 * (i % 8)); 13544 - i++; 13545 13456 ctxt++; 13546 13457 if (ctxt > last_ctxt) 13547 13458 ctxt = first_ctxt; 13548 - if (i % 8 == 0) { 13459 + if (i % 8 == 7) { 13549 13460 write_csr(dd, regno, reg); 13550 13461 reg = 0; 13551 13462 regno += 8; 13552 13463 } 13553 13464 } 13554 - if (i % 8) 13555 - write_csr(dd, regno, reg); 13556 13465 13557 13466 add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK 13558 13467 | RCV_CTRL_RCV_BYPASS_ENABLE_SMASK); 13559 13468 } 13560 13469 13561 - /** 13562 - * init_qos - init RX qos 13563 - * @dd - device data 13564 - * @first_context 13565 - * 13566 - * This routine initializes Rule 0 and the 13567 - * RSM map table to implement qos. 13568 - * 13569 - * If all of the limit tests succeed, 13570 - * qos is applied based on the array 13571 - * interpretation of krcvqs where 13572 - * entry 0 is VL0. 13573 - * 13574 - * The number of vl bits (n) and the number of qpn 13575 - * bits (m) are computed to feed both the RSM map table 13576 - * and the single rule. 13577 - * 13578 - */ 13579 - static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt) 13580 - { 13581 - u8 max_by_vl = 0; 13582 - unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m; 13583 - u64 *rsmmap; 13584 - u64 reg; 13585 - u8 rxcontext = is_ax(dd) ? 0 : 0xff; /* 0 is default if a0 ver. */ 13470 + struct rsm_map_table { 13471 + u64 map[NUM_MAP_REGS]; 13472 + unsigned int used; 13473 + }; 13586 13474 13587 - /* validate */ 13475 + struct rsm_rule_data { 13476 + u8 offset; 13477 + u8 pkt_type; 13478 + u32 field1_off; 13479 + u32 field2_off; 13480 + u32 index1_off; 13481 + u32 index1_width; 13482 + u32 index2_off; 13483 + u32 index2_width; 13484 + u32 mask1; 13485 + u32 value1; 13486 + u32 mask2; 13487 + u32 value2; 13488 + }; 13489 + 13490 + /* 13491 + * Return an initialized RMT map table for users to fill in. OK if it 13492 + * returns NULL, indicating no table. 13493 + */ 13494 + static struct rsm_map_table *alloc_rsm_map_table(struct hfi1_devdata *dd) 13495 + { 13496 + struct rsm_map_table *rmt; 13497 + u8 rxcontext = is_ax(dd) ? 0 : 0xff; /* 0 is default if a0 ver. */ 13498 + 13499 + rmt = kmalloc(sizeof(*rmt), GFP_KERNEL); 13500 + if (rmt) { 13501 + memset(rmt->map, rxcontext, sizeof(rmt->map)); 13502 + rmt->used = 0; 13503 + } 13504 + 13505 + return rmt; 13506 + } 13507 + 13508 + /* 13509 + * Write the final RMT map table to the chip and free the table. OK if 13510 + * table is NULL. 13511 + */ 13512 + static void complete_rsm_map_table(struct hfi1_devdata *dd, 13513 + struct rsm_map_table *rmt) 13514 + { 13515 + int i; 13516 + 13517 + if (rmt) { 13518 + /* write table to chip */ 13519 + for (i = 0; i < NUM_MAP_REGS; i++) 13520 + write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rmt->map[i]); 13521 + 13522 + /* enable RSM */ 13523 + add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); 13524 + } 13525 + } 13526 + 13527 + /* 13528 + * Add a receive side mapping rule. 13529 + */ 13530 + static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index, 13531 + struct rsm_rule_data *rrd) 13532 + { 13533 + write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 13534 + (u64)rrd->offset << RCV_RSM_CFG_OFFSET_SHIFT | 13535 + 1ull << rule_index | /* enable bit */ 13536 + (u64)rrd->pkt_type << RCV_RSM_CFG_PACKET_TYPE_SHIFT); 13537 + write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 13538 + (u64)rrd->field1_off << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT | 13539 + (u64)rrd->field2_off << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT | 13540 + (u64)rrd->index1_off << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT | 13541 + (u64)rrd->index1_width << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT | 13542 + (u64)rrd->index2_off << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT | 13543 + (u64)rrd->index2_width << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT); 13544 + write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 13545 + (u64)rrd->mask1 << RCV_RSM_MATCH_MASK1_SHIFT | 13546 + (u64)rrd->value1 << RCV_RSM_MATCH_VALUE1_SHIFT | 13547 + (u64)rrd->mask2 << RCV_RSM_MATCH_MASK2_SHIFT | 13548 + (u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT); 13549 + } 13550 + 13551 + /* return the number of RSM map table entries that will be used for QOS */ 13552 + static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, 13553 + unsigned int *np) 13554 + { 13555 + int i; 13556 + unsigned int m, n; 13557 + u8 max_by_vl = 0; 13558 + 13559 + /* is QOS active at all? */ 13588 13560 if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS || 13589 13561 num_vls == 1 || 13590 13562 krcvqsset <= 1) 13591 - goto bail; 13592 - for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++) 13563 + goto no_qos; 13564 + 13565 + /* determine bits for qpn */ 13566 + for (i = 0; i < min_t(unsigned int, num_vls, krcvqsset); i++) 13593 13567 if (krcvqs[i] > max_by_vl) 13594 13568 max_by_vl = krcvqs[i]; 13595 13569 if (max_by_vl > 32) 13596 - goto bail; 13597 - qpns_per_vl = __roundup_pow_of_two(max_by_vl); 13598 - /* determine bits vl */ 13599 - n = ilog2(num_vls); 13600 - /* determine bits for qpn */ 13601 - m = ilog2(qpns_per_vl); 13570 + goto no_qos; 13571 + m = ilog2(__roundup_pow_of_two(max_by_vl)); 13572 + 13573 + /* determine bits for vl */ 13574 + n = ilog2(__roundup_pow_of_two(num_vls)); 13575 + 13576 + /* reject if too much is used */ 13602 13577 if ((m + n) > 7) 13578 + goto no_qos; 13579 + 13580 + if (mp) 13581 + *mp = m; 13582 + if (np) 13583 + *np = n; 13584 + 13585 + return 1 << (m + n); 13586 + 13587 + no_qos: 13588 + if (mp) 13589 + *mp = 0; 13590 + if (np) 13591 + *np = 0; 13592 + return 0; 13593 + } 13594 + 13595 + /** 13596 + * init_qos - init RX qos 13597 + * @dd - device data 13598 + * @rmt - RSM map table 13599 + * 13600 + * This routine initializes Rule 0 and the RSM map table to implement 13601 + * quality of service (qos). 13602 + * 13603 + * If all of the limit tests succeed, qos is applied based on the array 13604 + * interpretation of krcvqs where entry 0 is VL0. 13605 + * 13606 + * The number of vl bits (n) and the number of qpn bits (m) are computed to 13607 + * feed both the RSM map table and the single rule. 13608 + */ 13609 + static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt) 13610 + { 13611 + struct rsm_rule_data rrd; 13612 + unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m; 13613 + unsigned int rmt_entries; 13614 + u64 reg; 13615 + 13616 + if (!rmt) 13603 13617 goto bail; 13604 - if (num_vls * qpns_per_vl > dd->chip_rcv_contexts) 13618 + rmt_entries = qos_rmt_entries(dd, &m, &n); 13619 + if (rmt_entries == 0) 13605 13620 goto bail; 13606 - rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL); 13607 - if (!rsmmap) 13621 + qpns_per_vl = 1 << m; 13622 + 13623 + /* enough room in the map table? */ 13624 + rmt_entries = 1 << (m + n); 13625 + if (rmt->used + rmt_entries >= NUM_MAP_ENTRIES) 13608 13626 goto bail; 13609 - memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64)); 13610 - /* init the local copy of the table */ 13611 - for (i = 0, ctxt = first_ctxt; i < num_vls; i++) { 13627 + 13628 + /* add qos entries to the the RSM map table */ 13629 + for (i = 0, ctxt = FIRST_KERNEL_KCTXT; i < num_vls; i++) { 13612 13630 unsigned tctxt; 13613 13631 13614 13632 for (qpn = 0, tctxt = ctxt; 13615 13633 krcvqs[i] && qpn < qpns_per_vl; qpn++) { 13616 13634 unsigned idx, regoff, regidx; 13617 13635 13618 - /* generate index <= 128 */ 13619 - idx = (qpn << n) ^ i; 13636 + /* generate the index the hardware will produce */ 13637 + idx = rmt->used + ((qpn << n) ^ i); 13620 13638 regoff = (idx % 8) * 8; 13621 13639 regidx = idx / 8; 13622 - reg = rsmmap[regidx]; 13623 - /* replace 0xff with context number */ 13640 + /* replace default with context number */ 13641 + reg = rmt->map[regidx]; 13624 13642 reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK 13625 13643 << regoff); 13626 13644 reg |= (u64)(tctxt++) << regoff; 13627 - rsmmap[regidx] = reg; 13645 + rmt->map[regidx] = reg; 13628 13646 if (tctxt == ctxt + krcvqs[i]) 13629 13647 tctxt = ctxt; 13630 13648 } 13631 13649 ctxt += krcvqs[i]; 13632 13650 } 13633 - /* flush cached copies to chip */ 13634 - for (i = 0; i < NUM_MAP_REGS; i++) 13635 - write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]); 13636 - /* add rule0 */ 13637 - write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */, 13638 - RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK << 13639 - RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT | 13640 - 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT); 13641 - write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */, 13642 - LRH_BTH_MATCH_OFFSET << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT | 13643 - LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT | 13644 - LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT | 13645 - ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT | 13646 - QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT | 13647 - ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT); 13648 - write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */, 13649 - LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT | 13650 - LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT | 13651 - LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT | 13652 - LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT); 13653 - /* Enable RSM */ 13654 - add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); 13655 - kfree(rsmmap); 13656 - /* map everything else to first context */ 13657 - init_qpmap_table(dd, FIRST_KERNEL_KCTXT, MIN_KERNEL_KCTXTS - 1); 13651 + 13652 + rrd.offset = rmt->used; 13653 + rrd.pkt_type = 2; 13654 + rrd.field1_off = LRH_BTH_MATCH_OFFSET; 13655 + rrd.field2_off = LRH_SC_MATCH_OFFSET; 13656 + rrd.index1_off = LRH_SC_SELECT_OFFSET; 13657 + rrd.index1_width = n; 13658 + rrd.index2_off = QPN_SELECT_OFFSET; 13659 + rrd.index2_width = m + n; 13660 + rrd.mask1 = LRH_BTH_MASK; 13661 + rrd.value1 = LRH_BTH_VALUE; 13662 + rrd.mask2 = LRH_SC_MASK; 13663 + rrd.value2 = LRH_SC_VALUE; 13664 + 13665 + /* add rule 0 */ 13666 + add_rsm_rule(dd, 0, &rrd); 13667 + 13668 + /* mark RSM map entries as used */ 13669 + rmt->used += rmt_entries; 13670 + /* map everything else to the mcast/err/vl15 context */ 13671 + init_qpmap_table(dd, HFI1_CTRL_CTXT, HFI1_CTRL_CTXT); 13658 13672 dd->qos_shift = n + 1; 13659 13673 return; 13660 13674 bail: ··· 13764 13574 init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1); 13765 13575 } 13766 13576 13577 + static void init_user_fecn_handling(struct hfi1_devdata *dd, 13578 + struct rsm_map_table *rmt) 13579 + { 13580 + struct rsm_rule_data rrd; 13581 + u64 reg; 13582 + int i, idx, regoff, regidx; 13583 + u8 offset; 13584 + 13585 + /* there needs to be enough room in the map table */ 13586 + if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) { 13587 + dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n"); 13588 + return; 13589 + } 13590 + 13591 + /* 13592 + * RSM will extract the destination context as an index into the 13593 + * map table. The destination contexts are a sequential block 13594 + * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive). 13595 + * Map entries are accessed as offset + extracted value. Adjust 13596 + * the added offset so this sequence can be placed anywhere in 13597 + * the table - as long as the entries themselves do not wrap. 13598 + * There are only enough bits in offset for the table size, so 13599 + * start with that to allow for a "negative" offset. 13600 + */ 13601 + offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used - 13602 + (int)dd->first_user_ctxt); 13603 + 13604 + for (i = dd->first_user_ctxt, idx = rmt->used; 13605 + i < dd->num_rcv_contexts; i++, idx++) { 13606 + /* replace with identity mapping */ 13607 + regoff = (idx % 8) * 8; 13608 + regidx = idx / 8; 13609 + reg = rmt->map[regidx]; 13610 + reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK << regoff); 13611 + reg |= (u64)i << regoff; 13612 + rmt->map[regidx] = reg; 13613 + } 13614 + 13615 + /* 13616 + * For RSM intercept of Expected FECN packets: 13617 + * o packet type 0 - expected 13618 + * o match on F (bit 95), using select/match 1, and 13619 + * o match on SH (bit 133), using select/match 2. 13620 + * 13621 + * Use index 1 to extract the 8-bit receive context from DestQP 13622 + * (start at bit 64). Use that as the RSM map table index. 13623 + */ 13624 + rrd.offset = offset; 13625 + rrd.pkt_type = 0; 13626 + rrd.field1_off = 95; 13627 + rrd.field2_off = 133; 13628 + rrd.index1_off = 64; 13629 + rrd.index1_width = 8; 13630 + rrd.index2_off = 0; 13631 + rrd.index2_width = 0; 13632 + rrd.mask1 = 1; 13633 + rrd.value1 = 1; 13634 + rrd.mask2 = 1; 13635 + rrd.value2 = 1; 13636 + 13637 + /* add rule 1 */ 13638 + add_rsm_rule(dd, 1, &rrd); 13639 + 13640 + rmt->used += dd->num_user_contexts; 13641 + } 13642 + 13767 13643 static void init_rxe(struct hfi1_devdata *dd) 13768 13644 { 13645 + struct rsm_map_table *rmt; 13646 + 13769 13647 /* enable all receive errors */ 13770 13648 write_csr(dd, RCV_ERR_MASK, ~0ull); 13771 - /* setup QPN map table - start where VL15 context leaves off */ 13772 - init_qos(dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? 13773 - MIN_KERNEL_KCTXTS : 0); 13649 + 13650 + rmt = alloc_rsm_map_table(dd); 13651 + /* set up QOS, including the QPN map table */ 13652 + init_qos(dd, rmt); 13653 + init_user_fecn_handling(dd, rmt); 13654 + complete_rsm_map_table(dd, rmt); 13655 + kfree(rmt); 13656 + 13774 13657 /* 13775 13658 * make sure RcvCtrl.RcvWcb <= PCIe Device Control 13776 13659 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config ··· 14025 13762 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg); 14026 13763 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); 14027 13764 reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; 13765 + reg &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK; 14028 13766 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); 14029 13767 done: 14030 13768 return ret; ··· 14412 14148 (dd->revision >> CCE_REVISION_SW_SHIFT) 14413 14149 & CCE_REVISION_SW_MASK); 14414 14150 14151 + /* 14152 + * The real cpu mask is part of the affinity struct but has to be 14153 + * initialized earlier than the rest of the affinity struct because it 14154 + * is needed to calculate the number of user contexts in 14155 + * set_up_context_variables(). However, hfi1_dev_affinity_init(), 14156 + * which initializes the rest of the affinity struct members, 14157 + * depends on set_up_context_variables() for the number of kernel 14158 + * contexts, so it cannot be called before set_up_context_variables(). 14159 + */ 14160 + ret = init_real_cpu_mask(dd); 14161 + if (ret) 14162 + goto bail_cleanup; 14163 + 14415 14164 ret = set_up_context_variables(dd); 14416 14165 if (ret) 14417 14166 goto bail_cleanup; ··· 14438 14161 /* set up KDETH QP prefix in both RX and TX CSRs */ 14439 14162 init_kdeth_qp(dd); 14440 14163 14441 - ret = hfi1_dev_affinity_init(dd); 14442 - if (ret) 14443 - goto bail_cleanup; 14164 + hfi1_dev_affinity_init(dd); 14444 14165 14445 14166 /* send contexts must be set up before receive contexts */ 14446 14167 ret = init_send_contexts(dd);
+6 -1
drivers/staging/rdma/hfi1/chip.h
··· 389 389 #define LAST_REMOTE_STATE_COMPLETE 0x13 390 390 #define LINK_QUALITY_INFO 0x14 391 391 #define REMOTE_DEVICE_ID 0x15 392 + #define LINK_DOWN_REASON 0x16 392 393 393 394 /* 8051 lane specific register field IDs */ 394 395 #define TX_EQ_SETTINGS 0x00 ··· 497 496 /* verify capability PHY power management bits */ 498 497 #define PWRM_BER_CONTROL 0x1 499 498 #define PWRM_BANDWIDTH_CONTROL 0x2 499 + 500 + /* 8051 link down reasons */ 501 + #define LDR_LINK_TRANSFER_ACTIVE_LOW 0xa 502 + #define LDR_RECEIVED_LINKDOWN_IDLE_MSG 0xb 503 + #define LDR_RECEIVED_HOST_OFFLINE_REQ 0xc 500 504 501 505 /* verify capability fabric CRC size bits */ 502 506 enum { ··· 697 691 void handle_freeze(struct work_struct *work); 698 692 void handle_link_up(struct work_struct *work); 699 693 void handle_link_down(struct work_struct *work); 700 - void handle_8051_request(struct work_struct *work); 701 694 void handle_link_downgrade(struct work_struct *work); 702 695 void handle_link_bounce(struct work_struct *work); 703 696 void handle_sma_message(struct work_struct *work);
+1
drivers/staging/rdma/hfi1/chip_registers.h
··· 771 771 #define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK 0x1ull 772 772 #define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT 0 773 773 #define RCV_RSM_CFG_PACKET_TYPE_SHIFT 60 774 + #define RCV_RSM_CFG_OFFSET_SHIFT 32 774 775 #define RCV_RSM_MAP_TABLE (RXE + 0x000000000900) 775 776 #define RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK 0xFFull 776 777 #define RCV_RSM_MATCH (RXE + 0x000000000800)
+2 -1
drivers/staging/rdma/hfi1/diag.c
··· 413 413 goto bail; 414 414 } 415 415 /* can only use kernel contexts */ 416 - if (dd->send_contexts[dp->sw_index].type != SC_KERNEL) { 416 + if (dd->send_contexts[dp->sw_index].type != SC_KERNEL && 417 + dd->send_contexts[dp->sw_index].type != SC_VL15) { 417 418 ret = -EINVAL; 418 419 goto bail; 419 420 }
+2 -1
drivers/staging/rdma/hfi1/driver.c
··· 75 75 76 76 unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; 77 77 module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO); 78 - MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is 8192"); 78 + MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is " __stringify( 79 + HFI1_DEFAULT_MAX_MTU)); 79 80 80 81 unsigned int hfi1_cu = 1; 81 82 module_param_named(cu, hfi1_cu, uint, S_IRUGO);
+8 -1
drivers/staging/rdma/hfi1/firmware.c
··· 1413 1413 1414 1414 if (resource & CR_DYN_MASK) { 1415 1415 /* a dynamic resource is in use if either HFI has set the bit */ 1416 - all_bits = resource_mask(0, resource) | 1416 + if (dd->pcidev->device == PCI_DEVICE_ID_INTEL0 && 1417 + (resource & (CR_I2C1 | CR_I2C2))) { 1418 + /* discrete devices must serialize across both chains */ 1419 + all_bits = resource_mask(0, CR_I2C1 | CR_I2C2) | 1420 + resource_mask(1, CR_I2C1 | CR_I2C2); 1421 + } else { 1422 + all_bits = resource_mask(0, resource) | 1417 1423 resource_mask(1, resource); 1424 + } 1418 1425 my_bit = resource_mask(dd->hfi1_id, resource); 1419 1426 } else { 1420 1427 /* non-dynamic resources are not split between HFIs */
+7 -4
drivers/staging/rdma/hfi1/hfi.h
··· 455 455 #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) 456 456 457 457 /* use this MTU size if none other is given */ 458 - #define HFI1_DEFAULT_ACTIVE_MTU 8192 458 + #define HFI1_DEFAULT_ACTIVE_MTU 10240 459 459 /* use this MTU size as the default maximum */ 460 - #define HFI1_DEFAULT_MAX_MTU 8192 460 + #define HFI1_DEFAULT_MAX_MTU 10240 461 461 /* default partition key */ 462 462 #define DEFAULT_PKEY 0xffff 463 463 ··· 606 606 struct work_struct link_vc_work; 607 607 struct work_struct link_up_work; 608 608 struct work_struct link_down_work; 609 - struct work_struct dc_host_req_work; 610 609 struct work_struct sma_message_work; 611 610 struct work_struct freeze_work; 612 611 struct work_struct link_downgrade_work; ··· 1257 1258 static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) 1258 1259 { 1259 1260 return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | 1260 - ((!!(rhf & RHF_DC_INFO_MASK)) << 4); 1261 + ((!!(rhf & RHF_DC_INFO_SMASK)) << 4); 1261 1262 } 1262 1263 1263 1264 static inline u16 generate_jkey(kuid_t uid) ··· 1332 1333 void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, 1333 1334 u32 pkey, u32 slid, u32 dlid, u8 sc5, 1334 1335 const struct ib_grh *old_grh); 1336 + #define PKEY_CHECK_INVALID -1 1337 + int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth, 1338 + u8 sc5, int8_t s_pkey_index); 1335 1339 1336 1340 #define PACKET_EGRESS_TIMEOUT 350 1337 1341 static inline void pause_for_credit_return(struct hfi1_devdata *dd) ··· 1778 1776 1779 1777 #define HFI1_PKT_USER_SC_INTEGRITY \ 1780 1778 (SEND_CTXT_CHECK_ENABLE_DISALLOW_NON_KDETH_PACKETS_SMASK \ 1779 + | SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK \ 1781 1780 | SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_SMASK \ 1782 1781 | SEND_CTXT_CHECK_ENABLE_DISALLOW_GRH_SMASK) 1783 1782
+11 -14
drivers/staging/rdma/hfi1/init.c
··· 422 422 struct cca_timer *cca_timer; 423 423 struct hfi1_pportdata *ppd; 424 424 int sl; 425 - u16 ccti, ccti_timer, ccti_min; 425 + u16 ccti_timer, ccti_min; 426 426 struct cc_state *cc_state; 427 427 unsigned long flags; 428 + enum hrtimer_restart ret = HRTIMER_NORESTART; 428 429 429 430 cca_timer = container_of(t, struct cca_timer, hrtimer); 430 431 ppd = cca_timer->ppd; ··· 451 450 452 451 spin_lock_irqsave(&ppd->cca_timer_lock, flags); 453 452 454 - ccti = cca_timer->ccti; 455 - 456 - if (ccti > ccti_min) { 453 + if (cca_timer->ccti > ccti_min) { 457 454 cca_timer->ccti--; 458 455 set_link_ipg(ppd); 459 456 } 460 457 461 - spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); 462 - 463 - rcu_read_unlock(); 464 - 465 - if (ccti > ccti_min) { 458 + if (cca_timer->ccti > ccti_min) { 466 459 unsigned long nsec = 1024 * ccti_timer; 467 460 /* ccti_timer is in units of 1.024 usec */ 468 461 hrtimer_forward_now(t, ns_to_ktime(nsec)); 469 - return HRTIMER_RESTART; 462 + ret = HRTIMER_RESTART; 470 463 } 471 - return HRTIMER_NORESTART; 464 + 465 + spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); 466 + rcu_read_unlock(); 467 + return ret; 472 468 } 473 469 474 470 /* ··· 494 496 INIT_WORK(&ppd->link_vc_work, handle_verify_cap); 495 497 INIT_WORK(&ppd->link_up_work, handle_link_up); 496 498 INIT_WORK(&ppd->link_down_work, handle_link_down); 497 - INIT_WORK(&ppd->dc_host_req_work, handle_8051_request); 498 499 INIT_WORK(&ppd->freeze_work, handle_freeze); 499 500 INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade); 500 501 INIT_WORK(&ppd->sma_message_work, handle_sma_message); ··· 1004 1007 free_percpu(dd->rcv_limit); 1005 1008 hfi1_dev_affinity_free(dd); 1006 1009 free_percpu(dd->send_schedule); 1007 - ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); 1010 + rvt_dealloc_device(&dd->verbs_dev.rdi); 1008 1011 } 1009 1012 1010 1013 /* ··· 1107 1110 bail: 1108 1111 if (!list_empty(&dd->list)) 1109 1112 list_del_init(&dd->list); 1110 - ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); 1113 + rvt_dealloc_device(&dd->verbs_dev.rdi); 1111 1114 return ERR_PTR(ret); 1112 1115 } 1113 1116
+15 -1
drivers/staging/rdma/hfi1/mad.c
··· 999 999 break; 1000 1000 } 1001 1001 1002 - set_link_state(ppd, link_state); 1002 + if ((link_state == HLS_DN_POLL || 1003 + link_state == HLS_DN_DOWNDEF)) { 1004 + /* 1005 + * Going to poll. No matter what the current state, 1006 + * always move offline first, then tune and start the 1007 + * link. This correctly handles a FM link bounce and 1008 + * a link enable. Going offline is a no-op if already 1009 + * offline. 1010 + */ 1011 + set_link_state(ppd, HLS_DN_OFFLINE); 1012 + tune_serdes(ppd); 1013 + start_link(ppd); 1014 + } else { 1015 + set_link_state(ppd, link_state); 1016 + } 1003 1017 if (link_state == HLS_DN_DISABLE && 1004 1018 (ppd->offline_disabled_reason > 1005 1019 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
+29 -6
drivers/staging/rdma/hfi1/mmu_rb.c
··· 91 91 92 92 static unsigned long mmu_node_last(struct mmu_rb_node *node) 93 93 { 94 - return PAGE_ALIGN((node->addr & PAGE_MASK) + node->len) - 1; 94 + return PAGE_ALIGN(node->addr + node->len) - 1; 95 95 } 96 96 97 97 int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) ··· 126 126 if (!handler) 127 127 return; 128 128 129 + /* Unregister first so we don't get any more notifications. */ 130 + if (current->mm) 131 + mmu_notifier_unregister(&handler->mn, current->mm); 132 + 129 133 spin_lock_irqsave(&mmu_rb_lock, flags); 130 134 list_del(&handler->list); 131 135 spin_unlock_irqrestore(&mmu_rb_lock, flags); 132 136 137 + spin_lock_irqsave(&handler->lock, flags); 133 138 if (!RB_EMPTY_ROOT(root)) { 134 139 struct rb_node *node; 135 140 struct mmu_rb_node *rbnode; ··· 146 141 handler->ops->remove(root, rbnode, NULL); 147 142 } 148 143 } 144 + spin_unlock_irqrestore(&handler->lock, flags); 149 145 150 - if (current->mm) 151 - mmu_notifier_unregister(&handler->mn, current->mm); 152 146 kfree(handler); 153 147 } 154 148 ··· 239 235 return node; 240 236 } 241 237 238 + struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *root, 239 + unsigned long addr, unsigned long len) 240 + { 241 + struct mmu_rb_handler *handler = find_mmu_handler(root); 242 + struct mmu_rb_node *node; 243 + unsigned long flags; 244 + 245 + if (!handler) 246 + return ERR_PTR(-EINVAL); 247 + 248 + spin_lock_irqsave(&handler->lock, flags); 249 + node = __mmu_rb_search(handler, addr, len); 250 + if (node) 251 + __mmu_int_rb_remove(node, handler->root); 252 + spin_unlock_irqrestore(&handler->lock, flags); 253 + 254 + return node; 255 + } 256 + 242 257 void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) 243 258 { 244 259 struct mmu_rb_handler *handler = find_mmu_handler(root); ··· 316 293 hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", 317 294 node->addr, node->len); 318 295 if (handler->ops->invalidate(root, node)) { 319 - spin_unlock_irqrestore(&handler->lock, flags); 320 - __mmu_rb_remove(handler, node, mm); 321 - spin_lock_irqsave(&handler->lock, flags); 296 + __mmu_int_rb_remove(node, root); 297 + if (handler->ops->remove) 298 + handler->ops->remove(root, node, mm); 322 299 } 323 300 } 324 301 spin_unlock_irqrestore(&handler->lock, flags);
+2
drivers/staging/rdma/hfi1/mmu_rb.h
··· 70 70 void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *); 71 71 struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long, 72 72 unsigned long); 73 + struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *, unsigned long, 74 + unsigned long); 73 75 74 76 #endif /* _HFI1_MMU_RB_H */
+41 -11
drivers/staging/rdma/hfi1/pio.c
··· 139 139 /* Send Context Size (SCS) wildcards */ 140 140 #define SCS_POOL_0 -1 141 141 #define SCS_POOL_1 -2 142 + 142 143 /* Send Context Count (SCC) wildcards */ 143 144 #define SCC_PER_VL -1 144 145 #define SCC_PER_CPU -2 145 - 146 146 #define SCC_PER_KRCVQ -3 147 - #define SCC_ACK_CREDITS 32 147 + 148 + /* Send Context Size (SCS) constants */ 149 + #define SCS_ACK_CREDITS 32 150 + #define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */ 151 + 152 + #define PIO_THRESHOLD_CEILING 4096 148 153 149 154 #define PIO_WAIT_BATCH_SIZE 5 150 155 151 156 /* default send context sizes */ 152 157 static struct sc_config_sizes sc_config_sizes[SC_MAX] = { 153 158 [SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 154 - .count = SCC_PER_VL },/* one per NUMA */ 155 - [SC_ACK] = { .size = SCC_ACK_CREDITS, 159 + .count = SCC_PER_VL }, /* one per NUMA */ 160 + [SC_ACK] = { .size = SCS_ACK_CREDITS, 156 161 .count = SCC_PER_KRCVQ }, 157 162 [SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */ 158 163 .count = SCC_PER_CPU }, /* one per CPU */ 164 + [SC_VL15] = { .size = SCS_VL15_CREDITS, 165 + .count = 1 }, 159 166 160 167 }; 161 168 ··· 209 202 static const char *sc_type_names[SC_MAX] = { 210 203 "kernel", 211 204 "ack", 212 - "user" 205 + "user", 206 + "vl15" 213 207 }; 214 208 215 209 static const char *sc_type_name(int index) ··· 237 229 int ab_total; /* absolute block total */ 238 230 int extra; 239 231 int i; 232 + 233 + /* 234 + * When SDMA is enabled, kernel context pio packet size is capped by 235 + * "piothreshold". Reduce pio buffer allocation for kernel context by 236 + * setting it to a fixed size. The allocation allows 3-deep buffering 237 + * of the largest pio packets plus up to 128 bytes header, sufficient 238 + * to maintain verbs performance. 239 + * 240 + * When SDMA is disabled, keep the default pooling allocation. 241 + */ 242 + if (HFI1_CAP_IS_KSET(SDMA)) { 243 + u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ? 244 + piothreshold : PIO_THRESHOLD_CEILING; 245 + sc_config_sizes[SC_KERNEL].size = 246 + 3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE; 247 + } 240 248 241 249 /* 242 250 * Step 0: ··· 335 311 if (i == SC_ACK) { 336 312 count = dd->n_krcv_queues; 337 313 } else if (i == SC_KERNEL) { 338 - count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */; 314 + count = INIT_SC_PER_VL * num_vls; 339 315 } else if (count == SCC_PER_CPU) { 340 316 count = dd->num_rcv_contexts - dd->n_krcv_queues; 341 317 } else if (count < 0) { ··· 620 596 * Return value is what to write into the CSR: trigger return when 621 597 * unreturned credits pass this count. 622 598 */ 623 - static u32 sc_percent_to_threshold(struct send_context *sc, u32 percent) 599 + u32 sc_percent_to_threshold(struct send_context *sc, u32 percent) 624 600 { 625 601 return (sc->credits * percent) / 100; 626 602 } ··· 814 790 * For Ack contexts, set a threshold for half the credits. 815 791 * For User contexts use the given percentage. This has been 816 792 * sanitized on driver start-up. 817 - * For Kernel contexts, use the default MTU plus a header. 793 + * For Kernel contexts, use the default MTU plus a header 794 + * or half the credits, whichever is smaller. This should 795 + * work for both the 3-deep buffering allocation and the 796 + * pooling allocation. 818 797 */ 819 798 if (type == SC_ACK) { 820 799 thresh = sc_percent_to_threshold(sc, 50); ··· 825 798 thresh = sc_percent_to_threshold(sc, 826 799 user_credit_return_threshold); 827 800 } else { /* kernel */ 828 - thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize); 801 + thresh = min(sc_percent_to_threshold(sc, 50), 802 + sc_mtu_to_threshold(sc, hfi1_max_mtu, 803 + hdrqentsize)); 829 804 } 830 805 reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT); 831 806 /* add in early return */ ··· 1560 1531 unsigned long flags; 1561 1532 unsigned i, n = 0; 1562 1533 1563 - if (dd->send_contexts[sc->sw_index].type != SC_KERNEL) 1534 + if (dd->send_contexts[sc->sw_index].type != SC_KERNEL && 1535 + dd->send_contexts[sc->sw_index].type != SC_VL15) 1564 1536 return; 1565 1537 list = &sc->piowait; 1566 1538 /* ··· 1930 1900 u32 ctxt; 1931 1901 struct hfi1_pportdata *ppd = dd->pport; 1932 1902 1933 - dd->vld[15].sc = sc_alloc(dd, SC_KERNEL, 1903 + dd->vld[15].sc = sc_alloc(dd, SC_VL15, 1934 1904 dd->rcd[0]->rcvhdrqentsize, dd->node); 1935 1905 if (!dd->vld[15].sc) 1936 1906 goto nomem;
+3 -1
drivers/staging/rdma/hfi1/pio.h
··· 51 51 #define SC_KERNEL 0 52 52 #define SC_ACK 1 53 53 #define SC_USER 2 54 - #define SC_MAX 3 54 + #define SC_VL15 3 55 + #define SC_MAX 4 55 56 56 57 /* invalid send context index */ 57 58 #define INVALID_SCI 0xff ··· 294 293 void sc_add_credit_return_intr(struct send_context *sc); 295 294 void sc_del_credit_return_intr(struct send_context *sc); 296 295 void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold); 296 + u32 sc_percent_to_threshold(struct send_context *sc, u32 percent); 297 297 u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize); 298 298 void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint); 299 299 void sc_wait(struct hfi1_devdata *dd);
+54 -45
drivers/staging/rdma/hfi1/platform.c
··· 114 114 if (ret) 115 115 return ret; 116 116 117 - if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4) 118 - cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]); 119 - else 120 - cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]); 117 + cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); 121 118 122 - if (cable_power_class <= 3 && cable_power_class > (power_class_max - 1)) 119 + if (cable_power_class > power_class_max) 123 120 ppd->offline_disabled_reason = 124 121 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY); 125 - else if (cable_power_class > 4 && cable_power_class > (power_class_max)) 126 - ppd->offline_disabled_reason = 127 - HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY); 128 - /* 129 - * cable_power_class will never have value 4 as this simply 130 - * means the high power settings are unused 131 - */ 132 122 133 123 if (ppd->offline_disabled_reason == 134 124 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) { ··· 163 173 u8 *cache = ppd->qsfp_info.cache; 164 174 int ret; 165 175 166 - if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4) 167 - cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]); 168 - else 169 - cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]); 176 + cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); 170 177 171 - if (cable_power_class) { 178 + if (cable_power_class > QSFP_POWER_CLASS_1) { 172 179 power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS]; 173 180 174 181 power_ctrl_byte |= 1; ··· 177 190 if (ret != 1) 178 191 return -EIO; 179 192 180 - if (cable_power_class > 3) { 181 - /* > power class 4*/ 193 + if (cable_power_class > QSFP_POWER_CLASS_4) { 182 194 power_ctrl_byte |= (1 << 2); 183 195 ret = qsfp_write(ppd, ppd->dd->hfi1_id, 184 196 QSFP_PWR_CTRL_BYTE_OFFS, ··· 198 212 { 199 213 u32 rx_preset; 200 214 u8 *cache = ppd->qsfp_info.cache; 215 + int cable_power_class; 201 216 202 217 if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) && 203 218 (cache[QSFP_CDR_INFO_OFFS] & 0x40))) 204 219 return; 205 220 206 - /* rx_preset preset to zero to catch error */ 221 + /* RX CDR present, bypass supported */ 222 + cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); 223 + 224 + if (cable_power_class <= QSFP_POWER_CLASS_3) { 225 + /* Power class <= 3, ignore config & turn RX CDR on */ 226 + *cdr_ctrl_byte |= 0xF; 227 + return; 228 + } 229 + 207 230 get_platform_config_field( 208 231 ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE, 209 232 rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY, ··· 245 250 246 251 static void apply_tx_cdr(struct hfi1_pportdata *ppd, 247 252 u32 tx_preset_index, 248 - u8 *ctr_ctrl_byte) 253 + u8 *cdr_ctrl_byte) 249 254 { 250 255 u32 tx_preset; 251 256 u8 *cache = ppd->qsfp_info.cache; 257 + int cable_power_class; 252 258 253 259 if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) && 254 260 (cache[QSFP_CDR_INFO_OFFS] & 0x80))) 255 261 return; 262 + 263 + /* TX CDR present, bypass supported */ 264 + cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); 265 + 266 + if (cable_power_class <= QSFP_POWER_CLASS_3) { 267 + /* Power class <= 3, ignore config & turn TX CDR on */ 268 + *cdr_ctrl_byte |= 0xF0; 269 + return; 270 + } 256 271 257 272 get_platform_config_field( 258 273 ppd->dd, ··· 287 282 (tx_preset << 2) | (tx_preset << 3)); 288 283 289 284 if (tx_preset) 290 - *ctr_ctrl_byte |= (tx_preset << 4); 285 + *cdr_ctrl_byte |= (tx_preset << 4); 291 286 else 292 287 /* Preserve current/determined RX CDR status */ 293 - *ctr_ctrl_byte &= ((tx_preset << 4) | 0xF); 288 + *cdr_ctrl_byte &= ((tx_preset << 4) | 0xF); 294 289 } 295 290 296 291 static void apply_cdr_settings( ··· 603 598 "Applying TX settings"); 604 599 } 605 600 601 + /* Must be holding the QSFP i2c resource */ 606 602 static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, 607 603 u32 *ptr_rx_preset, u32 *ptr_total_atten) 608 604 { ··· 611 605 u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled; 612 606 u8 *cache = ppd->qsfp_info.cache; 613 607 614 - ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT); 615 - if (ret) { 616 - dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n", 617 - __func__, (int)ppd->dd->hfi1_id); 618 - return ret; 619 - } 620 - 621 608 ppd->qsfp_info.limiting_active = 1; 622 609 623 610 ret = set_qsfp_tx(ppd, 0); 624 611 if (ret) 625 - goto bail_unlock; 612 + return ret; 626 613 627 614 ret = qual_power(ppd); 628 615 if (ret) 629 - goto bail_unlock; 616 + return ret; 630 617 631 618 ret = qual_bitrate(ppd); 632 619 if (ret) 633 - goto bail_unlock; 620 + return ret; 634 621 635 622 if (ppd->qsfp_info.reset_needed) { 636 623 reset_qsfp(ppd); ··· 635 636 636 637 ret = set_qsfp_high_power(ppd); 637 638 if (ret) 638 - goto bail_unlock; 639 + return ret; 639 640 640 641 if (cache[QSFP_EQ_INFO_OFFS] & 0x4) { 641 642 ret = get_platform_config_field( ··· 645 646 ptr_tx_preset, 4); 646 647 if (ret) { 647 648 *ptr_tx_preset = OPA_INVALID_INDEX; 648 - goto bail_unlock; 649 + return ret; 649 650 } 650 651 } else { 651 652 ret = get_platform_config_field( ··· 655 656 ptr_tx_preset, 4); 656 657 if (ret) { 657 658 *ptr_tx_preset = OPA_INVALID_INDEX; 658 - goto bail_unlock; 659 + return ret; 659 660 } 660 661 } 661 662 ··· 664 665 PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4); 665 666 if (ret) { 666 667 *ptr_rx_preset = OPA_INVALID_INDEX; 667 - goto bail_unlock; 668 + return ret; 668 669 } 669 670 670 671 if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G)) ··· 684 685 685 686 ret = set_qsfp_tx(ppd, 1); 686 687 687 - bail_unlock: 688 - release_chip_resource(ppd->dd, qsfp_resource(ppd->dd)); 689 688 return ret; 690 689 } 691 690 ··· 830 833 total_atten = platform_atten + remote_atten; 831 834 832 835 tuning_method = OPA_PASSIVE_TUNING; 833 - } else 836 + } else { 834 837 ppd->offline_disabled_reason = 835 838 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG); 839 + goto bail; 840 + } 836 841 break; 837 842 case PORT_TYPE_QSFP: 838 843 if (qsfp_mod_present(ppd)) { 844 + ret = acquire_chip_resource(ppd->dd, 845 + qsfp_resource(ppd->dd), 846 + QSFP_WAIT); 847 + if (ret) { 848 + dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n", 849 + __func__, (int)ppd->dd->hfi1_id); 850 + goto bail; 851 + } 839 852 refresh_qsfp_cache(ppd, &ppd->qsfp_info); 840 853 841 854 if (ppd->qsfp_info.cache_valid) { ··· 860 853 * update the cache to reflect the changes 861 854 */ 862 855 refresh_qsfp_cache(ppd, &ppd->qsfp_info); 863 - if (ret) 864 - goto bail; 865 - 866 856 limiting_active = 867 857 ppd->qsfp_info.limiting_active; 868 858 } else { 869 859 dd_dev_err(dd, 870 860 "%s: Reading QSFP memory failed\n", 871 861 __func__); 872 - goto bail; 862 + ret = -EINVAL; /* a fail indication */ 873 863 } 874 - } else 864 + release_chip_resource(ppd->dd, qsfp_resource(ppd->dd)); 865 + if (ret) 866 + goto bail; 867 + } else { 875 868 ppd->offline_disabled_reason = 876 869 HFI1_ODR_MASK( 877 870 OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED); 871 + goto bail; 872 + } 878 873 break; 879 874 default: 880 875 dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
+5 -1
drivers/staging/rdma/hfi1/qp.c
··· 167 167 */ 168 168 static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) 169 169 { 170 - int val = opa_mtu_enum_to_int((int)mtu); 170 + int val; 171 171 172 + /* Constraining 10KB packets to 8KB packets */ 173 + if (mtu == (enum ib_mtu)OPA_MTU_10240) 174 + mtu = OPA_MTU_8192; 175 + val = opa_mtu_enum_to_int((int)mtu); 172 176 if (val > 0) 173 177 return val; 174 178 return ib_mtu_enum_to_int(mtu);
+42 -16
drivers/staging/rdma/hfi1/qsfp.c
··· 96 96 { 97 97 int ret; 98 98 99 - if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) 99 + if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) 100 100 return -EACCES; 101 101 102 102 /* make sure the TWSI bus is in a sane state */ ··· 162 162 { 163 163 int ret; 164 164 165 - if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) 165 + if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) 166 166 return -EACCES; 167 167 168 168 /* make sure the TWSI bus is in a sane state */ ··· 192 192 int ret; 193 193 u8 page; 194 194 195 - if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) 195 + if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) 196 196 return -EACCES; 197 197 198 198 /* make sure the TWSI bus is in a sane state */ ··· 276 276 int ret; 277 277 u8 page; 278 278 279 - if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) 279 + if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) 280 280 return -EACCES; 281 281 282 282 /* make sure the TWSI bus is in a sane state */ ··· 355 355 * The calls to qsfp_{read,write} in this function correctly handle the 356 356 * address map difference between this mapping and the mapping implemented 357 357 * by those functions 358 + * 359 + * The caller must be holding the QSFP i2c chain resource. 358 360 */ 359 361 int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) 360 362 { ··· 373 371 374 372 if (!qsfp_mod_present(ppd)) { 375 373 ret = -ENODEV; 376 - goto bail_no_release; 374 + goto bail; 377 375 } 378 - 379 - ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT); 380 - if (ret) 381 - goto bail_no_release; 382 376 383 377 ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE); 384 378 if (ret != QSFP_PAGESIZE) { ··· 438 440 } 439 441 } 440 442 441 - release_chip_resource(ppd->dd, qsfp_resource(ppd->dd)); 442 - 443 443 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); 444 444 ppd->qsfp_info.cache_valid = 1; 445 445 ppd->qsfp_info.cache_refresh_required = 0; ··· 446 450 return 0; 447 451 448 452 bail: 449 - release_chip_resource(ppd->dd, qsfp_resource(ppd->dd)); 450 - bail_no_release: 451 453 memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128)); 452 454 return ret; 453 455 } ··· 460 466 #define QSFP_DUMP_CHUNK 16 /* Holds longest string */ 461 467 #define QSFP_DEFAULT_HDR_CNT 224 462 468 463 - static const char *pwr_codes = "1.5W2.0W2.5W3.5W"; 469 + #define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3) 470 + #define QSFP_HIGH_PWR(pbyte) ((pbyte) & 3) 471 + /* For use with QSFP_HIGH_PWR macro */ 472 + #define QSFP_HIGH_PWR_UNUSED 0 /* Bits [1:0] = 00 implies low power module */ 473 + 474 + /* 475 + * Takes power class byte [Page 00 Byte 129] in SFF 8636 476 + * Returns power class as integer (1 through 7, per SFF 8636 rev 2.4) 477 + */ 478 + int get_qsfp_power_class(u8 power_byte) 479 + { 480 + if (QSFP_HIGH_PWR(power_byte) == QSFP_HIGH_PWR_UNUSED) 481 + /* power classes count from 1, their bit encodings from 0 */ 482 + return (QSFP_PWR(power_byte) + 1); 483 + /* 484 + * 00 in the high power classes stands for unused, bringing 485 + * balance to the off-by-1 offset above, we add 4 here to 486 + * account for the difference between the low and high power 487 + * groups 488 + */ 489 + return (QSFP_HIGH_PWR(power_byte) + 4); 490 + } 464 491 465 492 int qsfp_mod_present(struct hfi1_pportdata *ppd) 466 493 { ··· 552 537 return ret; 553 538 } 554 539 540 + static const char *pwr_codes[8] = {"N/AW", 541 + "1.5W", 542 + "2.0W", 543 + "2.5W", 544 + "3.5W", 545 + "4.0W", 546 + "4.5W", 547 + "5.0W" 548 + }; 549 + 555 550 int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len) 556 551 { 557 552 u8 *cache = &ppd->qsfp_info.cache[0]; ··· 571 546 int bidx = 0; 572 547 u8 *atten = &cache[QSFP_ATTEN_OFFS]; 573 548 u8 *vendor_oui = &cache[QSFP_VOUI_OFFS]; 549 + u8 power_byte = 0; 574 550 575 551 sofar = 0; 576 552 lenstr[0] = ' '; ··· 581 555 if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS])) 582 556 sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]); 583 557 558 + power_byte = cache[QSFP_MOD_PWR_OFFS]; 584 559 sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n", 585 - pwr_codes + 586 - (QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]) * 4)); 560 + pwr_codes[get_qsfp_power_class(power_byte)]); 587 561 588 562 sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n", 589 563 lenstr,
+7 -8
drivers/staging/rdma/hfi1/qsfp.h
··· 82 82 /* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */ 83 83 #define QSFP_MOD_ID_OFFS 128 84 84 /* 85 - * Byte 129 is "Extended Identifier". We only care about D7,D6: Power class 86 - * 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W 85 + * Byte 129 is "Extended Identifier". 86 + * For bits [7:6]: 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W 87 + * For bits [1:0]: 0:Unused, 1:4W, 2:4.5W, 3:5W 87 88 */ 88 89 #define QSFP_MOD_PWR_OFFS 129 89 90 /* Byte 130 is Connector type. Not Intel req'd */ ··· 191 190 #define QSFP_HIGH_BIAS_WARNING 0x22 192 191 #define QSFP_LOW_BIAS_WARNING 0x11 193 192 193 + #define QSFP_ATTEN_SDR(attenarray) (attenarray[0]) 194 + #define QSFP_ATTEN_DDR(attenarray) (attenarray[1]) 195 + 194 196 /* 195 197 * struct qsfp_data encapsulates state of QSFP device for one port. 196 198 * it will be part of port-specific data if a board supports QSFP. ··· 205 201 * and let the qsfp_lock arbitrate access to common resources. 206 202 * 207 203 */ 208 - 209 - #define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3) 210 - #define QSFP_HIGH_PWR(pbyte) (((pbyte) & 3) | 4) 211 - #define QSFP_ATTEN_SDR(attenarray) (attenarray[0]) 212 - #define QSFP_ATTEN_DDR(attenarray) (attenarray[1]) 213 - 214 204 struct qsfp_data { 215 205 /* Helps to find our way */ 216 206 struct hfi1_pportdata *ppd; ··· 221 223 222 224 int refresh_qsfp_cache(struct hfi1_pportdata *ppd, 223 225 struct qsfp_data *cp); 226 + int get_qsfp_power_class(u8 power_byte); 224 227 int qsfp_mod_present(struct hfi1_pportdata *ppd); 225 228 int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, 226 229 u32 len, u8 *data);
+4 -5
drivers/staging/rdma/hfi1/rc.c
··· 1497 1497 /* Ignore reserved NAK codes. */ 1498 1498 goto bail_stop; 1499 1499 } 1500 - return ret; 1500 + /* cannot be reached */ 1501 1501 bail_stop: 1502 1502 hfi1_stop_rc_timers(qp); 1503 1503 return ret; ··· 2021 2021 if (sl >= OPA_MAX_SLS) 2022 2022 return; 2023 2023 2024 - cca_timer = &ppd->cca_timer[sl]; 2025 - 2026 2024 cc_state = get_cc_state(ppd); 2027 2025 2028 2026 if (!cc_state) ··· 2039 2041 2040 2042 spin_lock_irqsave(&ppd->cca_timer_lock, flags); 2041 2043 2044 + cca_timer = &ppd->cca_timer[sl]; 2042 2045 if (cca_timer->ccti < ccti_limit) { 2043 2046 if (cca_timer->ccti + ccti_incr <= ccti_limit) 2044 2047 cca_timer->ccti += ccti_incr; ··· 2047 2048 cca_timer->ccti = ccti_limit; 2048 2049 set_link_ipg(ppd); 2049 2050 } 2050 - 2051 - spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); 2052 2051 2053 2052 ccti = cca_timer->ccti; 2054 2053 ··· 2057 2060 hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec), 2058 2061 HRTIMER_MODE_REL); 2059 2062 } 2063 + 2064 + spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); 2060 2065 2061 2066 if ((trigger_threshold != 0) && (ccti >= trigger_threshold)) 2062 2067 log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type);
+11 -9
drivers/staging/rdma/hfi1/ruc.c
··· 831 831 struct hfi1_pkt_state ps; 832 832 struct hfi1_qp_priv *priv = qp->priv; 833 833 int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 834 - unsigned long flags; 835 834 unsigned long timeout; 836 835 unsigned long timeout_int; 837 836 int cpu; ··· 865 866 timeout_int = SEND_RESCHED_TIMEOUT; 866 867 } 867 868 868 - spin_lock_irqsave(&qp->s_lock, flags); 869 + spin_lock_irqsave(&qp->s_lock, ps.flags); 869 870 870 871 /* Return if we are already busy processing a work request. */ 871 872 if (!hfi1_send_ok(qp)) { 872 - spin_unlock_irqrestore(&qp->s_lock, flags); 873 + spin_unlock_irqrestore(&qp->s_lock, ps.flags); 873 874 return; 874 875 } 875 876 ··· 883 884 do { 884 885 /* Check for a constructed packet to be sent. */ 885 886 if (qp->s_hdrwords != 0) { 886 - spin_unlock_irqrestore(&qp->s_lock, flags); 887 + spin_unlock_irqrestore(&qp->s_lock, ps.flags); 887 888 /* 888 889 * If the packet cannot be sent now, return and 889 890 * the send tasklet will be woken up later. ··· 896 897 if (unlikely(time_after(jiffies, timeout))) { 897 898 if (workqueue_congested(cpu, 898 899 ps.ppd->hfi1_wq)) { 899 - spin_lock_irqsave(&qp->s_lock, flags); 900 + spin_lock_irqsave( 901 + &qp->s_lock, 902 + ps.flags); 900 903 qp->s_flags &= ~RVT_S_BUSY; 901 904 hfi1_schedule_send(qp); 902 - spin_unlock_irqrestore(&qp->s_lock, 903 - flags); 905 + spin_unlock_irqrestore( 906 + &qp->s_lock, 907 + ps.flags); 904 908 this_cpu_inc( 905 909 *ps.ppd->dd->send_schedule); 906 910 return; ··· 915 913 } 916 914 timeout = jiffies + (timeout_int) / 8; 917 915 } 918 - spin_lock_irqsave(&qp->s_lock, flags); 916 + spin_lock_irqsave(&qp->s_lock, ps.flags); 919 917 } 920 918 } while (make_req(qp, &ps)); 921 919 922 - spin_unlock_irqrestore(&qp->s_lock, flags); 920 + spin_unlock_irqrestore(&qp->s_lock, ps.flags); 923 921 } 924 922 925 923 /*
+2 -2
drivers/staging/rdma/hfi1/sysfs.c
··· 84 84 rcu_read_unlock(); 85 85 return -EINVAL; 86 86 } 87 - memcpy(buf, &cc_state->cct, count); 87 + memcpy(buf, (void *)&cc_state->cct + pos, count); 88 88 rcu_read_unlock(); 89 89 90 90 return count; ··· 131 131 rcu_read_unlock(); 132 132 return -EINVAL; 133 133 } 134 - memcpy(buf, &cc_state->cong_setting, count); 134 + memcpy(buf, (void *)&cc_state->cong_setting + pos, count); 135 135 rcu_read_unlock(); 136 136 137 137 return count;
+4 -4
drivers/staging/rdma/hfi1/ud.c
··· 322 322 (lid == ppd->lid || 323 323 (lid == be16_to_cpu(IB_LID_PERMISSIVE) && 324 324 qp->ibqp.qp_type == IB_QPT_GSI)))) { 325 - unsigned long flags; 325 + unsigned long tflags = ps->flags; 326 326 /* 327 327 * If DMAs are in progress, we can't generate 328 328 * a completion for the loopback packet since ··· 335 335 goto bail; 336 336 } 337 337 qp->s_cur = next_cur; 338 - local_irq_save(flags); 339 - spin_unlock_irqrestore(&qp->s_lock, flags); 338 + spin_unlock_irqrestore(&qp->s_lock, tflags); 340 339 ud_loopback(qp, wqe); 341 - spin_lock_irqsave(&qp->s_lock, flags); 340 + spin_lock_irqsave(&qp->s_lock, tflags); 341 + ps->flags = tflags; 342 342 hfi1_send_complete(qp, wqe, IB_WC_SUCCESS); 343 343 goto done_free_tx; 344 344 }
+5 -2
drivers/staging/rdma/hfi1/user_exp_rcv.c
··· 399 399 * pages, accept the amount pinned so far and program only that. 400 400 * User space knows how to deal with partially programmed buffers. 401 401 */ 402 - if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) 403 - return -ENOMEM; 402 + if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) { 403 + ret = -ENOMEM; 404 + goto bail; 405 + } 406 + 404 407 pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages); 405 408 if (pinned <= 0) { 406 409 ret = pinned;
+65 -32
drivers/staging/rdma/hfi1/user_sdma.c
··· 180 180 u64 offset; 181 181 }; 182 182 183 + #define SDMA_CACHE_NODE_EVICT BIT(0) 184 + 183 185 struct sdma_mmu_node { 184 186 struct mmu_rb_node rb; 185 187 struct list_head list; ··· 189 187 atomic_t refcount; 190 188 struct page **pages; 191 189 unsigned npages; 190 + unsigned long flags; 192 191 }; 193 192 194 193 struct user_sdma_request { ··· 596 593 if (vl >= dd->pport->vls_operational || 597 594 vl != sc_to_vlt(dd, sc)) { 598 595 SDMA_DBG(req, "Invalid SC(%u)/VL(%u)", sc, vl); 596 + ret = -EINVAL; 597 + goto free_req; 598 + } 599 + 600 + /* Checking P_KEY for requests from user-space */ 601 + if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc, 602 + PKEY_CHECK_INVALID)) { 599 603 ret = -EINVAL; 600 604 goto free_req; 601 605 } ··· 1040 1030 return 1 + ((epage - spage) >> PAGE_SHIFT); 1041 1031 } 1042 1032 1043 - /* Caller must hold pq->evict_lock */ 1044 1033 static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) 1045 1034 { 1046 1035 u32 cleared = 0; 1047 1036 struct sdma_mmu_node *node, *ptr; 1037 + struct list_head to_evict = LIST_HEAD_INIT(to_evict); 1048 1038 1039 + spin_lock(&pq->evict_lock); 1049 1040 list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) { 1050 1041 /* Make sure that no one is still using the node. */ 1051 1042 if (!atomic_read(&node->refcount)) { 1052 - /* 1053 - * Need to use the page count now as the remove callback 1054 - * will free the node. 1055 - */ 1043 + set_bit(SDMA_CACHE_NODE_EVICT, &node->flags); 1044 + list_del_init(&node->list); 1045 + list_add(&node->list, &to_evict); 1056 1046 cleared += node->npages; 1057 - spin_unlock(&pq->evict_lock); 1058 - hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb); 1059 - spin_lock(&pq->evict_lock); 1060 1047 if (cleared >= npages) 1061 1048 break; 1062 1049 } 1063 1050 } 1051 + spin_unlock(&pq->evict_lock); 1052 + 1053 + list_for_each_entry_safe(node, ptr, &to_evict, list) 1054 + hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb); 1055 + 1064 1056 return cleared; 1065 1057 } 1066 1058 ··· 1074 1062 struct sdma_mmu_node *node = NULL; 1075 1063 struct mmu_rb_node *rb_node; 1076 1064 1077 - rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root, 1078 - (unsigned long)iovec->iov.iov_base, 1079 - iovec->iov.iov_len); 1065 + rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root, 1066 + (unsigned long)iovec->iov.iov_base, 1067 + iovec->iov.iov_len); 1080 1068 if (rb_node && !IS_ERR(rb_node)) 1081 1069 node = container_of(rb_node, struct sdma_mmu_node, rb); 1082 1070 else ··· 1088 1076 return -ENOMEM; 1089 1077 1090 1078 node->rb.addr = (unsigned long)iovec->iov.iov_base; 1091 - node->rb.len = iovec->iov.iov_len; 1092 1079 node->pq = pq; 1093 1080 atomic_set(&node->refcount, 0); 1094 1081 INIT_LIST_HEAD(&node->list); ··· 1104 1093 memcpy(pages, node->pages, node->npages * sizeof(*pages)); 1105 1094 1106 1095 npages -= node->npages; 1096 + 1097 + /* 1098 + * If rb_node is NULL, it means that this is brand new node 1099 + * and, therefore not on the eviction list. 1100 + * If, however, the rb_node is non-NULL, it means that the 1101 + * node is already in RB tree and, therefore on the eviction 1102 + * list (nodes are unconditionally inserted in the eviction 1103 + * list). In that case, we have to remove the node prior to 1104 + * calling the eviction function in order to prevent it from 1105 + * freeing this node. 1106 + */ 1107 + if (rb_node) { 1108 + spin_lock(&pq->evict_lock); 1109 + list_del_init(&node->list); 1110 + spin_unlock(&pq->evict_lock); 1111 + } 1107 1112 retry: 1108 1113 if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) { 1109 - spin_lock(&pq->evict_lock); 1110 1114 cleared = sdma_cache_evict(pq, npages); 1111 - spin_unlock(&pq->evict_lock); 1112 1115 if (cleared >= npages) 1113 1116 goto retry; 1114 1117 } ··· 1142 1117 goto bail; 1143 1118 } 1144 1119 kfree(node->pages); 1120 + node->rb.len = iovec->iov.iov_len; 1145 1121 node->pages = pages; 1146 1122 node->npages += pinned; 1147 1123 npages = node->npages; 1148 1124 spin_lock(&pq->evict_lock); 1149 - if (!rb_node) 1150 - list_add(&node->list, &pq->evict); 1151 - else 1152 - list_move(&node->list, &pq->evict); 1125 + list_add(&node->list, &pq->evict); 1153 1126 pq->n_locked += pinned; 1154 1127 spin_unlock(&pq->evict_lock); 1155 1128 } 1156 1129 iovec->pages = node->pages; 1157 1130 iovec->npages = npages; 1158 1131 1159 - if (!rb_node) { 1160 - ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); 1161 - if (ret) { 1162 - spin_lock(&pq->evict_lock); 1132 + ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); 1133 + if (ret) { 1134 + spin_lock(&pq->evict_lock); 1135 + if (!list_empty(&node->list)) 1163 1136 list_del(&node->list); 1164 - pq->n_locked -= node->npages; 1165 - spin_unlock(&pq->evict_lock); 1166 - ret = 0; 1167 - goto bail; 1168 - } 1169 - } else { 1170 - atomic_inc(&node->refcount); 1137 + pq->n_locked -= node->npages; 1138 + spin_unlock(&pq->evict_lock); 1139 + goto bail; 1171 1140 } 1172 1141 return 0; 1173 1142 bail: 1174 - if (!rb_node) 1175 - kfree(node); 1143 + if (rb_node) 1144 + unpin_vector_pages(current->mm, node->pages, 0, node->npages); 1145 + kfree(node); 1176 1146 return ret; 1177 1147 } 1178 1148 ··· 1578 1558 container_of(mnode, struct sdma_mmu_node, rb); 1579 1559 1580 1560 spin_lock(&node->pq->evict_lock); 1581 - list_del(&node->list); 1561 + /* 1562 + * We've been called by the MMU notifier but this node has been 1563 + * scheduled for eviction. The eviction function will take care 1564 + * of freeing this node. 1565 + * We have to take the above lock first because we are racing 1566 + * against the setting of the bit in the eviction function. 1567 + */ 1568 + if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) { 1569 + spin_unlock(&node->pq->evict_lock); 1570 + return; 1571 + } 1572 + 1573 + if (!list_empty(&node->list)) 1574 + list_del(&node->list); 1582 1575 node->pq->n_locked -= node->npages; 1583 1576 spin_unlock(&node->pq->evict_lock); 1584 1577
+65 -41
drivers/staging/rdma/hfi1/verbs.c
··· 545 545 546 546 if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) 547 547 goto dropit; 548 - if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) || 548 + if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) || 549 549 (opcode == IB_OPCODE_CNP)) 550 550 return 1; 551 551 dropit: ··· 1089 1089 1090 1090 /* 1091 1091 * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent 1092 - * being an entry from the ingress partition key table), return 0 1092 + * being an entry from the partition key table), return 0 1093 1093 * otherwise. Use the matching criteria for egress partition keys 1094 1094 * specified in the OPAv1 spec., section 9.1l.7. 1095 1095 */ 1096 1096 static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) 1097 1097 { 1098 1098 u16 mkey = pkey & PKEY_LOW_15_MASK; 1099 - u16 ment = ent & PKEY_LOW_15_MASK; 1099 + u16 mentry = ent & PKEY_LOW_15_MASK; 1100 1100 1101 - if (mkey == ment) { 1101 + if (mkey == mentry) { 1102 1102 /* 1103 1103 * If pkey[15] is set (full partition member), 1104 1104 * is bit 15 in the corresponding table element ··· 1111 1111 return 0; 1112 1112 } 1113 1113 1114 - /* 1115 - * egress_pkey_check - return 0 if hdr's pkey matches according to the 1116 - * criteria in the OPAv1 spec., section 9.11.7. 1114 + /** 1115 + * egress_pkey_check - check P_KEY of a packet 1116 + * @ppd: Physical IB port data 1117 + * @lrh: Local route header 1118 + * @bth: Base transport header 1119 + * @sc5: SC for packet 1120 + * @s_pkey_index: It will be used for look up optimization for kernel contexts 1121 + * only. If it is negative value, then it means user contexts is calling this 1122 + * function. 1123 + * 1124 + * It checks if hdr's pkey is valid. 1125 + * 1126 + * Return: 0 on success, otherwise, 1 1117 1127 */ 1118 - static inline int egress_pkey_check(struct hfi1_pportdata *ppd, 1119 - struct hfi1_ib_header *hdr, 1120 - struct rvt_qp *qp) 1128 + int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth, 1129 + u8 sc5, int8_t s_pkey_index) 1121 1130 { 1122 - struct hfi1_qp_priv *priv = qp->priv; 1123 - struct hfi1_other_headers *ohdr; 1124 1131 struct hfi1_devdata *dd; 1125 - int i = 0; 1132 + int i; 1126 1133 u16 pkey; 1127 - u8 lnh, sc5 = priv->s_sc; 1134 + int is_user_ctxt_mechanism = (s_pkey_index < 0); 1128 1135 1129 1136 if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) 1130 1137 return 0; 1131 1138 1132 - /* locate the pkey within the headers */ 1133 - lnh = be16_to_cpu(hdr->lrh[0]) & 3; 1134 - if (lnh == HFI1_LRH_GRH) 1135 - ohdr = &hdr->u.l.oth; 1136 - else 1137 - ohdr = &hdr->u.oth; 1138 - 1139 - pkey = (u16)be32_to_cpu(ohdr->bth[0]); 1139 + pkey = (u16)be32_to_cpu(bth[0]); 1140 1140 1141 1141 /* If SC15, pkey[0:14] must be 0x7fff */ 1142 1142 if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) ··· 1146 1146 if ((pkey & PKEY_LOW_15_MASK) == 0) 1147 1147 goto bad; 1148 1148 1149 - /* The most likely matching pkey has index qp->s_pkey_index */ 1150 - if (unlikely(!egress_pkey_matches_entry(pkey, 1151 - ppd->pkeys 1152 - [qp->s_pkey_index]))) { 1153 - /* no match - try the entire table */ 1154 - for (; i < MAX_PKEY_VALUES; i++) { 1155 - if (egress_pkey_matches_entry(pkey, ppd->pkeys[i])) 1156 - break; 1157 - } 1149 + /* 1150 + * For the kernel contexts only, if a qp is passed into the function, 1151 + * the most likely matching pkey has index qp->s_pkey_index 1152 + */ 1153 + if (!is_user_ctxt_mechanism && 1154 + egress_pkey_matches_entry(pkey, ppd->pkeys[s_pkey_index])) { 1155 + return 0; 1158 1156 } 1159 1157 1160 - if (i < MAX_PKEY_VALUES) 1161 - return 0; 1158 + for (i = 0; i < MAX_PKEY_VALUES; i++) { 1159 + if (egress_pkey_matches_entry(pkey, ppd->pkeys[i])) 1160 + return 0; 1161 + } 1162 1162 bad: 1163 - incr_cntr64(&ppd->port_xmit_constraint_errors); 1164 - dd = ppd->dd; 1165 - if (!(dd->err_info_xmit_constraint.status & OPA_EI_STATUS_SMASK)) { 1166 - u16 slid = be16_to_cpu(hdr->lrh[3]); 1163 + /* 1164 + * For the user-context mechanism, the P_KEY check would only happen 1165 + * once per SDMA request, not once per packet. Therefore, there's no 1166 + * need to increment the counter for the user-context mechanism. 1167 + */ 1168 + if (!is_user_ctxt_mechanism) { 1169 + incr_cntr64(&ppd->port_xmit_constraint_errors); 1170 + dd = ppd->dd; 1171 + if (!(dd->err_info_xmit_constraint.status & 1172 + OPA_EI_STATUS_SMASK)) { 1173 + u16 slid = be16_to_cpu(lrh[3]); 1167 1174 1168 - dd->err_info_xmit_constraint.status |= OPA_EI_STATUS_SMASK; 1169 - dd->err_info_xmit_constraint.slid = slid; 1170 - dd->err_info_xmit_constraint.pkey = pkey; 1175 + dd->err_info_xmit_constraint.status |= 1176 + OPA_EI_STATUS_SMASK; 1177 + dd->err_info_xmit_constraint.slid = slid; 1178 + dd->err_info_xmit_constraint.pkey = pkey; 1179 + } 1171 1180 } 1172 1181 return 1; 1173 1182 } ··· 1236 1227 { 1237 1228 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 1238 1229 struct hfi1_qp_priv *priv = qp->priv; 1230 + struct hfi1_other_headers *ohdr; 1231 + struct hfi1_ib_header *hdr; 1239 1232 send_routine sr; 1240 1233 int ret; 1234 + u8 lnh; 1235 + 1236 + hdr = &ps->s_txreq->phdr.hdr; 1237 + /* locate the pkey within the headers */ 1238 + lnh = be16_to_cpu(hdr->lrh[0]) & 3; 1239 + if (lnh == HFI1_LRH_GRH) 1240 + ohdr = &hdr->u.l.oth; 1241 + else 1242 + ohdr = &hdr->u.oth; 1241 1243 1242 1244 sr = get_send_routine(qp, ps->s_txreq); 1243 - ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp); 1245 + ret = egress_pkey_check(dd->pport, 1246 + hdr->lrh, 1247 + ohdr->bth, 1248 + priv->s_sc, 1249 + qp->s_pkey_index); 1244 1250 if (unlikely(ret)) { 1245 1251 /* 1246 1252 * The value we are returning here does not get propagated to
+1 -3
drivers/staging/rdma/hfi1/verbs.h
··· 215 215 struct hfi1_ibport *ibp; 216 216 struct hfi1_pportdata *ppd; 217 217 struct verbs_txreq *s_txreq; 218 + unsigned long flags; 218 219 }; 219 220 220 221 #define HFI1_PSN_CREDIT 16 ··· 334 333 #define PSN_SHIFT 1 335 334 #endif 336 335 #define PSN_MODIFY_MASK 0xFFFFFF 337 - 338 - /* Number of bits to pay attention to in the opcode for checking qp type */ 339 - #define OPCODE_QP_MASK 0xE0 340 336 341 337 /* 342 338 * Compare the lower 24 bits of the msn values.
+18 -14
drivers/target/target_core_transport.c
··· 2195 2195 transport_handle_queue_full(cmd, cmd->se_dev); 2196 2196 } 2197 2197 2198 - static inline void transport_free_sgl(struct scatterlist *sgl, int nents) 2198 + void target_free_sgl(struct scatterlist *sgl, int nents) 2199 2199 { 2200 2200 struct scatterlist *sg; 2201 2201 int count; ··· 2205 2205 2206 2206 kfree(sgl); 2207 2207 } 2208 + EXPORT_SYMBOL(target_free_sgl); 2208 2209 2209 2210 static inline void transport_reset_sgl_orig(struct se_cmd *cmd) 2210 2211 { ··· 2226 2225 static inline void transport_free_pages(struct se_cmd *cmd) 2227 2226 { 2228 2227 if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) { 2229 - transport_free_sgl(cmd->t_prot_sg, cmd->t_prot_nents); 2228 + target_free_sgl(cmd->t_prot_sg, cmd->t_prot_nents); 2230 2229 cmd->t_prot_sg = NULL; 2231 2230 cmd->t_prot_nents = 0; 2232 2231 } ··· 2237 2236 * SG_TO_MEM_NOALLOC to function with COMPARE_AND_WRITE 2238 2237 */ 2239 2238 if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) { 2240 - transport_free_sgl(cmd->t_bidi_data_sg, 2239 + target_free_sgl(cmd->t_bidi_data_sg, 2241 2240 cmd->t_bidi_data_nents); 2242 2241 cmd->t_bidi_data_sg = NULL; 2243 2242 cmd->t_bidi_data_nents = 0; ··· 2247 2246 } 2248 2247 transport_reset_sgl_orig(cmd); 2249 2248 2250 - transport_free_sgl(cmd->t_data_sg, cmd->t_data_nents); 2249 + target_free_sgl(cmd->t_data_sg, cmd->t_data_nents); 2251 2250 cmd->t_data_sg = NULL; 2252 2251 cmd->t_data_nents = 0; 2253 2252 2254 - transport_free_sgl(cmd->t_bidi_data_sg, cmd->t_bidi_data_nents); 2253 + target_free_sgl(cmd->t_bidi_data_sg, cmd->t_bidi_data_nents); 2255 2254 cmd->t_bidi_data_sg = NULL; 2256 2255 cmd->t_bidi_data_nents = 0; 2257 2256 } ··· 2325 2324 2326 2325 int 2327 2326 target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, u32 length, 2328 - bool zero_page) 2327 + bool zero_page, bool chainable) 2329 2328 { 2330 2329 struct scatterlist *sg; 2331 2330 struct page *page; 2332 2331 gfp_t zero_flag = (zero_page) ? __GFP_ZERO : 0; 2333 - unsigned int nent; 2332 + unsigned int nalloc, nent; 2334 2333 int i = 0; 2335 2334 2336 - nent = DIV_ROUND_UP(length, PAGE_SIZE); 2337 - sg = kmalloc(sizeof(struct scatterlist) * nent, GFP_KERNEL); 2335 + nalloc = nent = DIV_ROUND_UP(length, PAGE_SIZE); 2336 + if (chainable) 2337 + nalloc++; 2338 + sg = kmalloc_array(nalloc, sizeof(struct scatterlist), GFP_KERNEL); 2338 2339 if (!sg) 2339 2340 return -ENOMEM; 2340 2341 2341 - sg_init_table(sg, nent); 2342 + sg_init_table(sg, nalloc); 2342 2343 2343 2344 while (length) { 2344 2345 u32 page_len = min_t(u32, length, PAGE_SIZE); ··· 2364 2361 kfree(sg); 2365 2362 return -ENOMEM; 2366 2363 } 2364 + EXPORT_SYMBOL(target_alloc_sgl); 2367 2365 2368 2366 /* 2369 2367 * Allocate any required resources to execute the command. For writes we ··· 2380 2376 if (cmd->prot_op != TARGET_PROT_NORMAL && 2381 2377 !(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) { 2382 2378 ret = target_alloc_sgl(&cmd->t_prot_sg, &cmd->t_prot_nents, 2383 - cmd->prot_length, true); 2379 + cmd->prot_length, true, false); 2384 2380 if (ret < 0) 2385 2381 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 2386 2382 } ··· 2405 2401 2406 2402 ret = target_alloc_sgl(&cmd->t_bidi_data_sg, 2407 2403 &cmd->t_bidi_data_nents, 2408 - bidi_length, zero_flag); 2404 + bidi_length, zero_flag, false); 2409 2405 if (ret < 0) 2410 2406 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 2411 2407 } 2412 2408 2413 2409 ret = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, 2414 - cmd->data_length, zero_flag); 2410 + cmd->data_length, zero_flag, false); 2415 2411 if (ret < 0) 2416 2412 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 2417 2413 } else if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) && ··· 2425 2421 2426 2422 ret = target_alloc_sgl(&cmd->t_bidi_data_sg, 2427 2423 &cmd->t_bidi_data_nents, 2428 - caw_length, zero_flag); 2424 + caw_length, zero_flag, false); 2429 2425 if (ret < 0) 2430 2426 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 2431 2427 }
+1 -1
drivers/target/target_core_xcopy.c
··· 563 563 564 564 if (alloc_mem) { 565 565 rc = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, 566 - cmd->data_length, false); 566 + cmd->data_length, false, false); 567 567 if (rc < 0) { 568 568 ret = rc; 569 569 goto out;
+5
include/linux/mlx5/cq.h
··· 53 53 unsigned arm_sn; 54 54 struct mlx5_rsc_debug *dbg; 55 55 int pid; 56 + struct { 57 + struct list_head list; 58 + void (*comp)(struct mlx5_core_cq *); 59 + void *priv; 60 + } tasklet_ctx; 56 61 }; 57 62 58 63
+10
include/linux/mlx5/driver.h
··· 42 42 #include <linux/vmalloc.h> 43 43 #include <linux/radix-tree.h> 44 44 #include <linux/workqueue.h> 45 + #include <linux/interrupt.h> 45 46 46 47 #include <linux/mlx5/device.h> 47 48 #include <linux/mlx5/doorbell.h> ··· 313 312 u8 page_shift; 314 313 }; 315 314 315 + struct mlx5_eq_tasklet { 316 + struct list_head list; 317 + struct list_head process_list; 318 + struct tasklet_struct task; 319 + /* lock on completion tasklet list */ 320 + spinlock_t lock; 321 + }; 322 + 316 323 struct mlx5_eq { 317 324 struct mlx5_core_dev *dev; 318 325 __be32 __iomem *doorbell; ··· 334 325 struct list_head list; 335 326 int index; 336 327 struct mlx5_rsc_debug *dbg; 328 + struct mlx5_eq_tasklet tasklet_ctx; 337 329 }; 338 330 339 331 struct mlx5_core_psv {
+45 -16
include/rdma/ib_verbs.h
··· 220 220 IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), 221 221 IB_DEVICE_SG_GAPS_REG = (1ULL << 32), 222 222 IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33), 223 + IB_DEVICE_RAW_SCATTER_FCS = ((u64)1 << 34), 223 224 }; 224 225 225 226 enum ib_signature_prot_cap { ··· 932 931 u32 max_send_sge; 933 932 u32 max_recv_sge; 934 933 u32 max_inline_data; 934 + 935 + /* 936 + * Maximum number of rdma_rw_ctx structures in flight at a time. 937 + * ib_create_qp() will calculate the right amount of neededed WRs 938 + * and MRs based on this. 939 + */ 940 + u32 max_rdma_ctxs; 935 941 }; 936 942 937 943 enum ib_sig_type { ··· 989 981 IB_QP_CREATE_NETIF_QP = 1 << 5, 990 982 IB_QP_CREATE_SIGNATURE_EN = 1 << 6, 991 983 IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, 984 + IB_QP_CREATE_SCATTER_FCS = 1 << 8, 992 985 /* reserve bits 26-31 for low level drivers' internal use */ 993 986 IB_QP_CREATE_RESERVED_START = 1 << 26, 994 987 IB_QP_CREATE_RESERVED_END = 1 << 31, ··· 1011 1002 enum ib_sig_type sq_sig_type; 1012 1003 enum ib_qp_type qp_type; 1013 1004 enum ib_qp_create_flags create_flags; 1014 - u8 port_num; /* special QP types only */ 1005 + 1006 + /* 1007 + * Only needed for special QP types, or when using the RW API. 1008 + */ 1009 + u8 port_num; 1015 1010 }; 1016 1011 1017 1012 struct ib_qp_open_attr { ··· 1434 1421 struct ib_pd *pd; 1435 1422 struct ib_cq *send_cq; 1436 1423 struct ib_cq *recv_cq; 1424 + spinlock_t mr_lock; 1425 + int mrs_used; 1426 + struct list_head rdma_mrs; 1427 + struct list_head sig_mrs; 1437 1428 struct ib_srq *srq; 1438 1429 struct ib_xrcd *xrcd; /* XRC TGT QPs only */ 1439 1430 struct list_head xrcd_list; 1431 + 1440 1432 /* count times opened, mcast attaches, flow attaches */ 1441 1433 atomic_t usecnt; 1442 1434 struct list_head open_list; ··· 1456 1438 struct ib_mr { 1457 1439 struct ib_device *device; 1458 1440 struct ib_pd *pd; 1459 - struct ib_uobject *uobject; 1460 1441 u32 lkey; 1461 1442 u32 rkey; 1462 1443 u64 iova; 1463 1444 u32 length; 1464 1445 unsigned int page_size; 1446 + bool need_inval; 1447 + union { 1448 + struct ib_uobject *uobject; /* user */ 1449 + struct list_head qp_entry; /* FR */ 1450 + }; 1465 1451 }; 1466 1452 1467 1453 struct ib_mw { ··· 1849 1827 u32 max_num_sg); 1850 1828 int (*map_mr_sg)(struct ib_mr *mr, 1851 1829 struct scatterlist *sg, 1852 - int sg_nents); 1830 + int sg_nents, 1831 + unsigned int *sg_offset); 1853 1832 struct ib_mw * (*alloc_mw)(struct ib_pd *pd, 1854 1833 enum ib_mw_type type, 1855 1834 struct ib_udata *udata); ··· 2338 2315 { 2339 2316 return rdma_protocol_roce(device, port_num) && 2340 2317 device->add_gid && device->del_gid; 2318 + } 2319 + 2320 + /* 2321 + * Check if the device supports READ W/ INVALIDATE. 2322 + */ 2323 + static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) 2324 + { 2325 + /* 2326 + * iWarp drivers must support READ W/ INVALIDATE. No other protocol 2327 + * has support for it yet. 2328 + */ 2329 + return rdma_protocol_iwarp(dev, port_num); 2341 2330 } 2342 2331 2343 2332 int ib_query_gid(struct ib_device *device, ··· 3146 3111 u16 pkey, const union ib_gid *gid, 3147 3112 const struct sockaddr *addr); 3148 3113 3149 - int ib_map_mr_sg(struct ib_mr *mr, 3150 - struct scatterlist *sg, 3151 - int sg_nents, 3152 - unsigned int page_size); 3114 + int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, 3115 + unsigned int *sg_offset, unsigned int page_size); 3153 3116 3154 3117 static inline int 3155 - ib_map_mr_sg_zbva(struct ib_mr *mr, 3156 - struct scatterlist *sg, 3157 - int sg_nents, 3158 - unsigned int page_size) 3118 + ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, 3119 + unsigned int *sg_offset, unsigned int page_size) 3159 3120 { 3160 3121 int n; 3161 3122 3162 - n = ib_map_mr_sg(mr, sg, sg_nents, page_size); 3123 + n = ib_map_mr_sg(mr, sg, sg_nents, sg_offset, page_size); 3163 3124 mr->iova = 0; 3164 3125 3165 3126 return n; 3166 3127 } 3167 3128 3168 - int ib_sg_to_pages(struct ib_mr *mr, 3169 - struct scatterlist *sgl, 3170 - int sg_nents, 3171 - int (*set_page)(struct ib_mr *, u64)); 3129 + int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, 3130 + unsigned int *sg_offset, int (*set_page)(struct ib_mr *, u64)); 3172 3131 3173 3132 void ib_drain_rq(struct ib_qp *qp); 3174 3133 void ib_drain_sq(struct ib_qp *qp);
+25
include/rdma/mr_pool.h
··· 1 + /* 2 + * Copyright (c) 2016 HGST, a Western Digital Company. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + */ 13 + #ifndef _RDMA_MR_POOL_H 14 + #define _RDMA_MR_POOL_H 1 15 + 16 + #include <rdma/ib_verbs.h> 17 + 18 + struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list); 19 + void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr); 20 + 21 + int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr, 22 + enum ib_mr_type type, u32 max_num_sg); 23 + void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list); 24 + 25 + #endif /* _RDMA_MR_POOL_H */
+1
include/rdma/rdma_vt.h
··· 467 467 } 468 468 469 469 struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); 470 + void rvt_dealloc_device(struct rvt_dev_info *rdi); 470 471 int rvt_register_device(struct rvt_dev_info *rvd); 471 472 void rvt_unregister_device(struct rvt_dev_info *rvd); 472 473 int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+3 -2
include/rdma/rdmavt_qp.h
··· 117 117 /* 118 118 * Wait flags that would prevent any packet type from being sent. 119 119 */ 120 - #define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ 121 - RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) 120 + #define RVT_S_ANY_WAIT_IO \ 121 + (RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \ 122 + RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) 122 123 123 124 /* 124 125 * Wait flags that would prevent send work requests from making progress.
+88
include/rdma/rw.h
··· 1 + /* 2 + * Copyright (c) 2016 HGST, a Western Digital Company. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + */ 13 + #ifndef _RDMA_RW_H 14 + #define _RDMA_RW_H 15 + 16 + #include <linux/dma-mapping.h> 17 + #include <linux/scatterlist.h> 18 + #include <rdma/ib_verbs.h> 19 + #include <rdma/rdma_cm.h> 20 + #include <rdma/mr_pool.h> 21 + 22 + struct rdma_rw_ctx { 23 + /* number of RDMA READ/WRITE WRs (not counting MR WRs) */ 24 + u32 nr_ops; 25 + 26 + /* tag for the union below: */ 27 + u8 type; 28 + 29 + union { 30 + /* for mapping a single SGE: */ 31 + struct { 32 + struct ib_sge sge; 33 + struct ib_rdma_wr wr; 34 + } single; 35 + 36 + /* for mapping of multiple SGEs: */ 37 + struct { 38 + struct ib_sge *sges; 39 + struct ib_rdma_wr *wrs; 40 + } map; 41 + 42 + /* for registering multiple WRs: */ 43 + struct rdma_rw_reg_ctx { 44 + struct ib_sge sge; 45 + struct ib_rdma_wr wr; 46 + struct ib_reg_wr reg_wr; 47 + struct ib_send_wr inv_wr; 48 + struct ib_mr *mr; 49 + } *reg; 50 + 51 + struct { 52 + struct rdma_rw_reg_ctx data; 53 + struct rdma_rw_reg_ctx prot; 54 + struct ib_send_wr sig_inv_wr; 55 + struct ib_mr *sig_mr; 56 + struct ib_sge sig_sge; 57 + struct ib_sig_handover_wr sig_wr; 58 + } *sig; 59 + }; 60 + }; 61 + 62 + int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 63 + struct scatterlist *sg, u32 sg_cnt, u32 sg_offset, 64 + u64 remote_addr, u32 rkey, enum dma_data_direction dir); 65 + void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 66 + struct scatterlist *sg, u32 sg_cnt, 67 + enum dma_data_direction dir); 68 + 69 + int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 70 + u8 port_num, struct scatterlist *sg, u32 sg_cnt, 71 + struct scatterlist *prot_sg, u32 prot_sg_cnt, 72 + struct ib_sig_attrs *sig_attrs, u64 remote_addr, u32 rkey, 73 + enum dma_data_direction dir); 74 + void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 75 + u8 port_num, struct scatterlist *sg, u32 sg_cnt, 76 + struct scatterlist *prot_sg, u32 prot_sg_cnt, 77 + enum dma_data_direction dir); 78 + 79 + struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 80 + u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); 81 + int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 82 + struct ib_cqe *cqe, struct ib_send_wr *chain_wr); 83 + 84 + void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr); 85 + int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr); 86 + void rdma_rw_cleanup_mrs(struct ib_qp *qp); 87 + 88 + #endif /* _RDMA_RW_H */
-1
include/target/target_core_backend.h
··· 85 85 void *transport_kmap_data_sg(struct se_cmd *); 86 86 void transport_kunmap_data_sg(struct se_cmd *); 87 87 /* core helpers also used by xcopy during internal command setup */ 88 - int target_alloc_sgl(struct scatterlist **, unsigned int *, u32, bool); 89 88 sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *, 90 89 struct scatterlist *, u32, struct scatterlist *, u32); 91 90
+4
include/target/target_core_fabric.h
··· 185 185 int core_tpg_register(struct se_wwn *, struct se_portal_group *, int); 186 186 int core_tpg_deregister(struct se_portal_group *); 187 187 188 + int target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, 189 + u32 length, bool zero_page, bool chainable); 190 + void target_free_sgl(struct scatterlist *sgl, int nents); 191 + 188 192 /* 189 193 * The LIO target core uses DMA_TO_DEVICE to mean that data is going 190 194 * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean
+1
include/uapi/rdma/ib_user_verbs.h
··· 226 226 struct ib_uverbs_odp_caps odp_caps; 227 227 __u64 timestamp_mask; 228 228 __u64 hca_core_clock; /* in KHZ */ 229 + __u64 device_cap_flags_ex; 229 230 }; 230 231 231 232 struct ib_uverbs_query_port {
+1 -1
net/rds/ib_frmr.c
··· 111 111 cpu_relax(); 112 112 } 113 113 114 - ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, PAGE_SIZE); 114 + ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, 0, PAGE_SIZE); 115 115 if (unlikely(ret != ibmr->sg_len)) 116 116 return ret < 0 ? ret : -EINVAL; 117 117
+1 -1
net/sunrpc/xprtrdma/frwr_ops.c
··· 421 421 return -ENOMEM; 422 422 } 423 423 424 - n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); 424 + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); 425 425 if (unlikely(n != frmr->sg_nents)) { 426 426 pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", 427 427 __func__, frmr->fr_mr, n, frmr->sg_nents);
+1 -1
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
··· 281 281 } 282 282 atomic_inc(&xprt->sc_dma_used); 283 283 284 - n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); 284 + n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); 285 285 if (unlikely(n != frmr->sg_nents)) { 286 286 pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", 287 287 frmr->mr, n, frmr->sg_nents);