Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
"Small collection of incremental improvement patches:

- Minor code cleanup patches, comment improvements, etc from static
tools

- Clean the some of the kernel caps, reducing the historical stealth
uAPI leftovers

- Bug fixes and minor changes for rdmavt, hns, rxe, irdma

- Remove unimplemented cruft from rxe

- Reorganize UMR QP code in mlx5 to avoid going through the IB verbs
layer

- flush_workqueue(system_unbound_wq) removal

- Ensure rxe waits for objects to be unused before allowing the core
to free them

- Several rc quality bug fixes for hfi1"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (67 commits)
RDMA/rtrs-clt: Fix one kernel-doc comment
RDMA/hfi1: Remove all traces of diagpkt support
RDMA/hfi1: Consolidate software versions
RDMA/hfi1: Remove pointless driver version
RDMA/hfi1: Fix potential integer multiplication overflow errors
RDMA/hfi1: Prevent panic when SDMA is disabled
RDMA/hfi1: Prevent use of lock before it is initialized
RDMA/rxe: Fix an error handling path in rxe_get_mcg()
IB/core: Fix typo in comment
RDMA/core: Fix typo in comment
IB/hf1: Fix typo in comment
IB/qib: Fix typo in comment
IB/iser: Fix typo in comment
RDMA/mlx4: Avoid flush_scheduled_work() usage
IB/isert: Avoid flush_scheduled_work() usage
RDMA/mlx5: Remove duplicate pointer assignment in mlx5_ib_alloc_implicit_mr()
RDMA/qedr: Remove unnecessary synchronize_irq() before free_irq()
RDMA/hns: Use hr_reg_read() instead of remaining roce_get_xxx()
RDMA/hns: Use hr_reg_xxx() instead of remaining roce_set_xxx()
RDMA/irdma: Add SW mechanism to generate completions on error
...

+1960 -1994
+14 -10
drivers/infiniband/core/device.c
··· 58 58 struct workqueue_struct *ib_comp_unbound_wq; 59 59 struct workqueue_struct *ib_wq; 60 60 EXPORT_SYMBOL_GPL(ib_wq); 61 + static struct workqueue_struct *ib_unreg_wq; 61 62 62 63 /* 63 64 * Each of the three rwsem locks (devices, clients, client_data) protects the ··· 1603 1602 WARN_ON(!refcount_read(&ib_dev->refcount)); 1604 1603 WARN_ON(!ib_dev->ops.dealloc_driver); 1605 1604 get_device(&ib_dev->dev); 1606 - if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work)) 1605 + if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work)) 1607 1606 put_device(&ib_dev->dev); 1608 1607 } 1609 1608 EXPORT_SYMBOL(ib_unregister_device_queued); ··· 2752 2751 2753 2752 static int __init ib_core_init(void) 2754 2753 { 2755 - int ret; 2754 + int ret = -ENOMEM; 2756 2755 2757 2756 ib_wq = alloc_workqueue("infiniband", 0, 0); 2758 2757 if (!ib_wq) 2759 2758 return -ENOMEM; 2760 2759 2760 + ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND, 2761 + WQ_UNBOUND_MAX_ACTIVE); 2762 + if (!ib_unreg_wq) 2763 + goto err; 2764 + 2761 2765 ib_comp_wq = alloc_workqueue("ib-comp-wq", 2762 2766 WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); 2763 - if (!ib_comp_wq) { 2764 - ret = -ENOMEM; 2765 - goto err; 2766 - } 2767 + if (!ib_comp_wq) 2768 + goto err_unbound; 2767 2769 2768 2770 ib_comp_unbound_wq = 2769 2771 alloc_workqueue("ib-comp-unb-wq", 2770 2772 WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM | 2771 2773 WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE); 2772 - if (!ib_comp_unbound_wq) { 2773 - ret = -ENOMEM; 2774 + if (!ib_comp_unbound_wq) 2774 2775 goto err_comp; 2775 - } 2776 2776 2777 2777 ret = class_register(&ib_class); 2778 2778 if (ret) { ··· 2833 2831 destroy_workqueue(ib_comp_unbound_wq); 2834 2832 err_comp: 2835 2833 destroy_workqueue(ib_comp_wq); 2834 + err_unbound: 2835 + destroy_workqueue(ib_unreg_wq); 2836 2836 err: 2837 2837 destroy_workqueue(ib_wq); 2838 2838 return ret; ··· 2856 2852 destroy_workqueue(ib_comp_wq); 2857 2853 /* Make sure that any pending umem accounting work is done. */ 2858 2854 destroy_workqueue(ib_wq); 2859 - flush_workqueue(system_unbound_wq); 2855 + destroy_workqueue(ib_unreg_wq); 2860 2856 WARN_ON(!xa_empty(&clients)); 2861 2857 WARN_ON(!xa_empty(&devices)); 2862 2858 }
+1 -1
drivers/infiniband/core/nldev.c
··· 1739 1739 if (!device) 1740 1740 return -EINVAL; 1741 1741 1742 - if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) { 1742 + if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) { 1743 1743 ib_device_put(device); 1744 1744 return -EINVAL; 1745 1745 }
+8 -8
drivers/infiniband/core/sa_query.c
··· 1034 1034 struct netlink_ext_ack *extack) 1035 1035 { 1036 1036 unsigned long flags; 1037 - struct ib_sa_query *query; 1037 + struct ib_sa_query *query = NULL, *iter; 1038 1038 struct ib_mad_send_buf *send_buf; 1039 1039 struct ib_mad_send_wc mad_send_wc; 1040 - int found = 0; 1041 1040 int ret; 1042 1041 1043 1042 if ((nlh->nlmsg_flags & NLM_F_REQUEST) || ··· 1044 1045 return -EPERM; 1045 1046 1046 1047 spin_lock_irqsave(&ib_nl_request_lock, flags); 1047 - list_for_each_entry(query, &ib_nl_request_list, list) { 1048 + list_for_each_entry(iter, &ib_nl_request_list, list) { 1048 1049 /* 1049 1050 * If the query is cancelled, let the timeout routine 1050 1051 * take care of it. 1051 1052 */ 1052 - if (nlh->nlmsg_seq == query->seq) { 1053 - found = !ib_sa_query_cancelled(query); 1054 - if (found) 1055 - list_del(&query->list); 1053 + if (nlh->nlmsg_seq == iter->seq) { 1054 + if (!ib_sa_query_cancelled(iter)) { 1055 + list_del(&iter->list); 1056 + query = iter; 1057 + } 1056 1058 break; 1057 1059 } 1058 1060 } 1059 1061 1060 - if (!found) { 1062 + if (!query) { 1061 1063 spin_unlock_irqrestore(&ib_nl_request_lock, flags); 1062 1064 goto resp_out; 1063 1065 }
+1 -1
drivers/infiniband/core/umem_odp.c
··· 455 455 break; 456 456 } 457 457 } 458 - /* upon sucesss lock should stay on hold for the callee */ 458 + /* upon success lock should stay on hold for the callee */ 459 459 if (!ret) 460 460 ret = dma_index - start_idx; 461 461 else
+1 -1
drivers/infiniband/core/uverbs_cmd.c
··· 337 337 resp->hw_ver = attr->hw_ver; 338 338 resp->max_qp = attr->max_qp; 339 339 resp->max_qp_wr = attr->max_qp_wr; 340 - resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); 340 + resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); 341 341 resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge); 342 342 resp->max_sge_rd = attr->max_sge_rd; 343 343 resp->max_cq = attr->max_cq;
+4 -4
drivers/infiniband/core/verbs.c
··· 281 281 } 282 282 rdma_restrack_add(&pd->res); 283 283 284 - if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) 284 + if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY) 285 285 pd->local_dma_lkey = device->local_dma_lkey; 286 286 else 287 287 mr_access_flags |= IB_ACCESS_LOCAL_WRITE; ··· 308 308 309 309 pd->__internal_mr = mr; 310 310 311 - if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) 311 + if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)) 312 312 pd->local_dma_lkey = pd->__internal_mr->lkey; 313 313 314 314 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) ··· 2131 2131 struct ib_mr *mr; 2132 2132 2133 2133 if (access_flags & IB_ACCESS_ON_DEMAND) { 2134 - if (!(pd->device->attrs.device_cap_flags & 2135 - IB_DEVICE_ON_DEMAND_PAGING)) { 2134 + if (!(pd->device->attrs.kernel_cap_flags & 2135 + IBK_ON_DEMAND_PAGING)) { 2136 2136 pr_debug("ODP support not available\n"); 2137 2137 return ERR_PTR(-EINVAL); 2138 2138 }
+1 -1
drivers/infiniband/hw/bnxt_re/ib_verbs.c
··· 146 146 | IB_DEVICE_RC_RNR_NAK_GEN 147 147 | IB_DEVICE_SHUTDOWN_PORT 148 148 | IB_DEVICE_SYS_IMAGE_GUID 149 - | IB_DEVICE_LOCAL_DMA_LKEY 150 149 | IB_DEVICE_RESIZE_MAX_WR 151 150 | IB_DEVICE_PORT_ACTIVE_EVENT 152 151 | IB_DEVICE_N_NOTIFY_CQ 153 152 | IB_DEVICE_MEM_WINDOW 154 153 | IB_DEVICE_MEM_WINDOW_TYPE_2B 155 154 | IB_DEVICE_MEM_MGT_EXTENSIONS; 155 + ib_attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; 156 156 ib_attr->max_send_sge = dev_attr->max_qp_sges; 157 157 ib_attr->max_recv_sge = dev_attr->max_qp_sges; 158 158 ib_attr->max_sge_rd = dev_attr->max_qp_sges;
-1
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
··· 314 314 struct c4iw_dev { 315 315 struct ib_device ibdev; 316 316 struct c4iw_rdev rdev; 317 - u32 device_cap_flags; 318 317 struct xarray cqs; 319 318 struct xarray qps; 320 319 struct xarray mrs;
+4 -4
drivers/infiniband/hw/cxgb4/provider.c
··· 269 269 dev->rdev.lldi.ports[0]->dev_addr); 270 270 props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type); 271 271 props->fw_ver = dev->rdev.lldi.fw_vers; 272 - props->device_cap_flags = dev->device_cap_flags; 272 + props->device_cap_flags = IB_DEVICE_MEM_WINDOW; 273 + props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; 274 + if (fastreg_support) 275 + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 273 276 props->page_size_cap = T4_PAGESIZE_MASK; 274 277 props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor; 275 278 props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; ··· 532 529 pr_debug("c4iw_dev %p\n", dev); 533 530 addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid, 534 531 dev->rdev.lldi.ports[0]->dev_addr); 535 - dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; 536 - if (fastreg_support) 537 - dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 538 532 dev->ibdev.local_dma_lkey = 0; 539 533 dev->ibdev.node_type = RDMA_NODE_RNIC; 540 534 BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
-55
drivers/infiniband/hw/hfi1/common.h
··· 137 137 #define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \ 138 138 HFI1_USER_SWMINOR) 139 139 140 - #ifndef HFI1_KERN_TYPE 141 - #define HFI1_KERN_TYPE 0 142 - #endif 143 - 144 - /* 145 - * Similarly, this is the kernel version going back to the user. It's 146 - * slightly different, in that we want to tell if the driver was built as 147 - * part of a Intel release, or from the driver from openfabrics.org, 148 - * kernel.org, or a standard distribution, for support reasons. 149 - * The high bit is 0 for non-Intel and 1 for Intel-built/supplied. 150 - * 151 - * It's returned by the driver to the user code during initialization in the 152 - * spi_sw_version field of hfi1_base_info, so the user code can in turn 153 - * check for compatibility with the kernel. 154 - */ 155 - #define HFI1_KERN_SWVERSION ((HFI1_KERN_TYPE << 31) | HFI1_USER_SWVERSION) 156 - 157 - /* 158 - * Define the driver version number. This is something that refers only 159 - * to the driver itself, not the software interfaces it supports. 160 - */ 161 - #ifndef HFI1_DRIVER_VERSION_BASE 162 - #define HFI1_DRIVER_VERSION_BASE "0.9-294" 163 - #endif 164 - 165 - /* create the final driver version string */ 166 - #ifdef HFI1_IDSTR 167 - #define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE " " HFI1_IDSTR 168 - #else 169 - #define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE 170 - #endif 171 - 172 - /* 173 - * Diagnostics can send a packet by writing the following 174 - * struct to the diag packet special file. 175 - * 176 - * This allows a custom PBC qword, so that special modes and deliberate 177 - * changes to CRCs can be used. 178 - */ 179 - #define _DIAG_PKT_VERS 1 180 - struct diag_pkt { 181 - __u16 version; /* structure version */ 182 - __u16 unit; /* which device */ 183 - __u16 sw_index; /* send sw index to use */ 184 - __u16 len; /* data length, in bytes */ 185 - __u16 port; /* port number */ 186 - __u16 unused; 187 - __u32 flags; /* call flags */ 188 - __u64 data; /* user data pointer */ 189 - __u64 pbc; /* PBC for the packet */ 190 - }; 191 - 192 - /* diag_pkt flags */ 193 - #define F_DIAGPKT_WAIT 0x1 /* wait until packet is sent */ 194 - 195 140 /* 196 141 * The next set of defines are for packet headers, and chip register 197 142 * and memory bits that are visible to and/or used by user-mode software.
-6
drivers/infiniband/hw/hfi1/driver.c
··· 29 29 #undef pr_fmt 30 30 #define pr_fmt(fmt) DRIVER_NAME ": " fmt 31 31 32 - /* 33 - * The size has to be longer than this string, so we can append 34 - * board/chip information to it in the initialization code. 35 - */ 36 - const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n"; 37 - 38 32 DEFINE_MUTEX(hfi1_mutex); /* general driver use */ 39 33 40 34 unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
+1 -1
drivers/infiniband/hw/hfi1/efivar.c
··· 72 72 * is in the EFIVAR_FS code and may not be compiled in. 73 73 * However, even that is insufficient since it does not cover 74 74 * EFI_BUFFER_TOO_SMALL which could be an important return. 75 - * For now, just split out succces or not found. 75 + * For now, just split out success or not found. 76 76 */ 77 77 ret = status == EFI_SUCCESS ? 0 : 78 78 status == EFI_NOT_FOUND ? -ENOENT :
+3 -1
drivers/infiniband/hw/hfi1/file_ops.c
··· 265 265 unsigned long dim = from->nr_segs; 266 266 int idx; 267 267 268 + if (!HFI1_CAP_IS_KSET(SDMA)) 269 + return -EINVAL; 268 270 idx = srcu_read_lock(&fd->pq_srcu); 269 271 pq = srcu_dereference(fd->pq, &fd->pq_srcu); 270 272 if (!cq || !pq) { ··· 1222 1220 1223 1221 memset(&binfo, 0, sizeof(binfo)); 1224 1222 binfo.hw_version = dd->revision; 1225 - binfo.sw_version = HFI1_KERN_SWVERSION; 1223 + binfo.sw_version = HFI1_USER_SWVERSION; 1226 1224 binfo.bthqp = RVT_KDETH_QP_PREFIX; 1227 1225 binfo.jkey = uctxt->jkey; 1228 1226 /*
+1 -1
drivers/infiniband/hw/hfi1/init.c
··· 489 489 u16 shift, mult; 490 490 u64 src; 491 491 u32 current_egress_rate; /* Mbits /sec */ 492 - u32 max_pkt_time; 492 + u64 max_pkt_time; 493 493 /* 494 494 * max_pkt_time is the maximum packet egress time in units 495 495 * of the fabric clock period 1/(805 MHz).
+7 -5
drivers/infiniband/hw/hfi1/sdma.c
··· 1288 1288 kvfree(sde->tx_ring); 1289 1289 sde->tx_ring = NULL; 1290 1290 } 1291 - spin_lock_irq(&dd->sde_map_lock); 1292 - sdma_map_free(rcu_access_pointer(dd->sdma_map)); 1293 - RCU_INIT_POINTER(dd->sdma_map, NULL); 1294 - spin_unlock_irq(&dd->sde_map_lock); 1295 - synchronize_rcu(); 1291 + if (rcu_access_pointer(dd->sdma_map)) { 1292 + spin_lock_irq(&dd->sde_map_lock); 1293 + sdma_map_free(rcu_access_pointer(dd->sdma_map)); 1294 + RCU_INIT_POINTER(dd->sdma_map, NULL); 1295 + spin_unlock_irq(&dd->sde_map_lock); 1296 + synchronize_rcu(); 1297 + } 1296 1298 kfree(dd->per_sdma); 1297 1299 dd->per_sdma = NULL; 1298 1300
+2 -2
drivers/infiniband/hw/hfi1/verbs.c
··· 1300 1300 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | 1301 1301 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | 1302 1302 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | 1303 - IB_DEVICE_MEM_MGT_EXTENSIONS | 1304 - IB_DEVICE_RDMA_NETDEV_OPA; 1303 + IB_DEVICE_MEM_MGT_EXTENSIONS; 1304 + rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA; 1305 1305 rdi->dparms.props.page_size_cap = PAGE_SIZE; 1306 1306 rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; 1307 1307 rdi->dparms.props.vendor_part_id = dd->pcidev->device;
+18 -14
drivers/infiniband/hw/hns/hns_roce_device.h
··· 106 106 SERV_TYPE_XRC = 5, 107 107 }; 108 108 109 - enum hns_roce_qp_state { 110 - HNS_ROCE_QP_STATE_RST, 111 - HNS_ROCE_QP_STATE_INIT, 112 - HNS_ROCE_QP_STATE_RTR, 113 - HNS_ROCE_QP_STATE_RTS, 114 - HNS_ROCE_QP_STATE_SQD, 115 - HNS_ROCE_QP_STATE_ERR, 116 - HNS_ROCE_QP_NUM_STATE, 117 - }; 118 - 119 109 enum hns_roce_event { 120 110 HNS_ROCE_EVENT_TYPE_PATH_MIG = 0x01, 121 111 HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED = 0x02, ··· 128 138 HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION = 0x16, 129 139 HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17, 130 140 }; 131 - 132 - #define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 133 141 134 142 enum { 135 143 HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), ··· 523 535 u16 busy; 524 536 }; 525 537 538 + enum hns_roce_cmdq_state { 539 + HNS_ROCE_CMDQ_STATE_NORMAL, 540 + HNS_ROCE_CMDQ_STATE_FATAL_ERR, 541 + }; 542 + 526 543 struct hns_roce_cmdq { 527 544 struct dma_pool *pool; 528 545 struct semaphore poll_sem; ··· 547 554 * close device, switch into poll mode(non event mode) 548 555 */ 549 556 u8 use_events; 557 + enum hns_roce_cmdq_state state; 550 558 }; 551 559 552 560 struct hns_roce_cmd_mailbox { ··· 651 657 __le32 rsv[15]; 652 658 }; 653 659 660 + #define CEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ceqe, h, l) 661 + 662 + #define CEQE_CQN CEQE_FIELD_LOC(23, 0) 663 + #define CEQE_OWNER CEQE_FIELD_LOC(31, 31) 664 + 654 665 struct hns_roce_aeqe { 655 666 __le32 asyn; 656 667 union { ··· 674 675 } event; 675 676 __le32 rsv[12]; 676 677 }; 678 + 679 + #define AEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_aeqe, h, l) 680 + 681 + #define AEQE_EVENT_TYPE AEQE_FIELD_LOC(7, 0) 682 + #define AEQE_SUB_TYPE AEQE_FIELD_LOC(15, 8) 683 + #define AEQE_OWNER AEQE_FIELD_LOC(31, 31) 684 + #define AEQE_EVENT_QUEUE_NUM AEQE_FIELD_LOC(55, 32) 677 685 678 686 struct hns_roce_eq { 679 687 struct hns_roce_dev *hr_dev; ··· 731 725 u32 num_pi_qps; 732 726 u32 reserved_qps; 733 727 int num_qpc_timer; 734 - int num_cqc_timer; 735 728 u32 num_srqs; 736 729 u32 max_wqes; 737 730 u32 max_srq_wrs; ··· 1196 1191 void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n); 1197 1192 bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, 1198 1193 struct ib_cq *ib_cq); 1199 - enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); 1200 1194 void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, 1201 1195 struct hns_roce_cq *recv_cq); 1202 1196 void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
+170 -275
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 149 149 aseg->cmp_data = 0; 150 150 } 151 151 152 - roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, 153 - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); 152 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); 154 153 } 155 154 156 155 static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, ··· 270 271 dseg += sizeof(struct hns_roce_v2_rc_send_wqe); 271 272 272 273 if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) { 273 - roce_set_bit(rc_sq_wqe->byte_20, 274 - V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0); 274 + hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE); 275 275 276 276 for (i = 0; i < wr->num_sge; i++) { 277 277 memcpy(dseg, ((void *)wr->sg_list[i].addr), ··· 278 280 dseg += wr->sg_list[i].length; 279 281 } 280 282 } else { 281 - roce_set_bit(rc_sq_wqe->byte_20, 282 - V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1); 283 + hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE); 283 284 284 285 ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len); 285 286 if (ret) 286 287 return ret; 287 288 288 - roce_set_field(rc_sq_wqe->byte_16, 289 - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, 290 - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, 291 - curr_idx - *sge_idx); 289 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx); 292 290 } 293 291 294 292 *sge_idx = curr_idx; ··· 303 309 int j = 0; 304 310 int i; 305 311 306 - roce_set_field(rc_sq_wqe->byte_20, 307 - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, 308 - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, 309 - (*sge_ind) & (qp->sge.sge_cnt - 1)); 312 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX, 313 + (*sge_ind) & (qp->sge.sge_cnt - 1)); 310 314 311 - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 315 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE, 312 316 !!(wr->send_flags & IB_SEND_INLINE)); 313 317 if (wr->send_flags & IB_SEND_INLINE) 314 318 return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind); ··· 331 339 valid_num_sge - HNS_ROCE_SGE_IN_WQE); 332 340 } 333 341 334 - roce_set_field(rc_sq_wqe->byte_16, 335 - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, 336 - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); 342 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); 337 343 338 344 return 0; 339 345 } ··· 402 412 403 413 ud_sq_wqe->immtdata = get_immtdata(wr); 404 414 405 - roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M, 406 - V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); 415 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op)); 407 416 408 417 return 0; 409 418 } ··· 413 424 struct ib_device *ib_dev = ah->ibah.device; 414 425 struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); 415 426 416 - roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, 417 - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); 418 - 419 - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, 420 - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); 421 - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, 422 - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); 423 - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, 424 - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); 427 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport); 428 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit); 429 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass); 430 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel); 425 431 426 432 if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) 427 433 return -EINVAL; 428 434 429 - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, 430 - V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); 435 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl); 431 436 432 437 ud_sq_wqe->sgid_index = ah->av.gid_index; 433 438 ··· 431 448 if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) 432 449 return 0; 433 450 434 - roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, 435 - ah->av.vlan_en); 436 - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, 437 - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); 451 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en); 452 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id); 438 453 439 454 return 0; 440 455 } ··· 457 476 458 477 ud_sq_wqe->msg_len = cpu_to_le32(msg_len); 459 478 460 - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, 479 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE, 461 480 !!(wr->send_flags & IB_SEND_SIGNALED)); 462 - 463 - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, 481 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE, 464 482 !!(wr->send_flags & IB_SEND_SOLICITED)); 465 483 466 - roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, 467 - V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); 468 - 469 - roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, 470 - V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); 471 - 472 - roce_set_field(ud_sq_wqe->byte_20, 473 - V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, 474 - V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, 475 - curr_idx & (qp->sge.sge_cnt - 1)); 484 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn); 485 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge); 486 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX, 487 + curr_idx & (qp->sge.sge_cnt - 1)); 476 488 477 489 ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? 478 490 qp->qkey : ud_wr(wr)->remote_qkey); 479 - roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, 480 - V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); 491 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn); 481 492 482 493 ret = fill_ud_av(ud_sq_wqe, ah); 483 494 if (ret) ··· 489 516 dma_wmb(); 490 517 491 518 *sge_idx = curr_idx; 492 - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, 493 - owner_bit); 519 + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit); 494 520 495 521 return 0; 496 522 } ··· 525 553 ret = -EOPNOTSUPP; 526 554 break; 527 555 case IB_WR_LOCAL_INV: 528 - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); 556 + hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_SO); 529 557 fallthrough; 530 558 case IB_WR_SEND_WITH_INV: 531 559 rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); ··· 537 565 if (unlikely(ret)) 538 566 return ret; 539 567 540 - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 541 - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); 568 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op)); 542 569 543 570 return ret; 544 571 } 572 + 545 573 static inline int set_rc_wqe(struct hns_roce_qp *qp, 546 574 const struct ib_send_wr *wr, 547 575 void *wqe, unsigned int *sge_idx, ··· 562 590 if (WARN_ON(ret)) 563 591 return ret; 564 592 565 - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, 593 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE, 566 594 (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); 567 595 568 - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S, 596 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE, 569 597 (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); 570 598 571 - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, 599 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE, 572 600 (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); 573 601 574 602 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || ··· 588 616 dma_wmb(); 589 617 590 618 *sge_idx = curr_idx; 591 - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, 592 - owner_bit); 619 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit); 593 620 594 621 return ret; 595 622 } ··· 653 682 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; 654 683 655 684 /* All kinds of DirectWQE have the same header field layout */ 656 - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1); 657 - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M, 658 - V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl); 659 - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M, 660 - V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, 661 - qp->sl >> HNS_ROCE_SL_SHIFT); 662 - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M, 663 - V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); 685 + hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG); 686 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl); 687 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H, 688 + qp->sl >> HNS_ROCE_SL_SHIFT); 689 + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head); 664 690 665 691 hns_roce_write512(hr_dev, wqe, qp->sq.db_reg); 666 692 } ··· 1233 1265 return tail == priv->cmq.csq.head; 1234 1266 } 1235 1267 1268 + static void update_cmdq_status(struct hns_roce_dev *hr_dev) 1269 + { 1270 + struct hns_roce_v2_priv *priv = hr_dev->priv; 1271 + struct hnae3_handle *handle = priv->handle; 1272 + 1273 + if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT || 1274 + handle->rinfo.instance_state == HNS_ROCE_STATE_INIT) 1275 + hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR; 1276 + } 1277 + 1236 1278 static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, 1237 1279 struct hns_roce_cmq_desc *desc, int num) 1238 1280 { ··· 1274 1296 } while (++timeout < priv->cmq.tx_timeout); 1275 1297 1276 1298 if (hns_roce_cmq_csq_done(hr_dev)) { 1277 - for (ret = 0, i = 0; i < num; i++) { 1299 + ret = 0; 1300 + for (i = 0; i < num; i++) { 1278 1301 /* check the result of hardware write back */ 1279 1302 desc[i] = csq->desc[tail++]; 1280 1303 if (tail == csq->desc_num) ··· 1297 1318 csq->head, tail); 1298 1319 csq->head = tail; 1299 1320 1321 + update_cmdq_status(hr_dev); 1322 + 1300 1323 ret = -EAGAIN; 1301 1324 } 1302 1325 ··· 1312 1331 { 1313 1332 bool busy; 1314 1333 int ret; 1334 + 1335 + if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR) 1336 + return -EIO; 1315 1337 1316 1338 if (!v2_chk_mbox_is_avail(hr_dev, &busy)) 1317 1339 return busy ? -EBUSY : 0; ··· 1483 1499 if (ret) 1484 1500 continue; 1485 1501 1486 - if (roce_get_bit(resp->func_done, FUNC_CLEAR_RST_FUN_DONE_S)) { 1502 + if (hr_reg_read(resp, FUNC_CLEAR_RST_FUN_DONE)) { 1487 1503 if (vf_id == 0) 1488 1504 hr_dev->is_reset = true; 1489 1505 return; ··· 1494 1510 hns_roce_func_clr_rst_proc(hr_dev, ret, fclr_write_fail_flag); 1495 1511 } 1496 1512 1497 - static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id) 1513 + static int hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id) 1498 1514 { 1499 1515 enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES; 1500 1516 struct hns_roce_cmq_desc desc[2]; ··· 1505 1521 desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); 1506 1522 hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false); 1507 1523 hr_reg_write(req_a, FUNC_RES_A_VF_ID, vf_id); 1508 - hns_roce_cmq_send(hr_dev, desc, 2); 1524 + 1525 + return hns_roce_cmq_send(hr_dev, desc, 2); 1509 1526 } 1510 1527 1511 1528 static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) 1512 1529 { 1530 + int ret; 1513 1531 int i; 1532 + 1533 + if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR) 1534 + return; 1514 1535 1515 1536 for (i = hr_dev->func_num - 1; i >= 0; i--) { 1516 1537 __hns_roce_function_clear(hr_dev, i); 1517 - if (i != 0) 1518 - hns_roce_free_vf_resource(hr_dev, i); 1538 + 1539 + if (i == 0) 1540 + continue; 1541 + 1542 + ret = hns_roce_free_vf_resource(hr_dev, i); 1543 + if (ret) 1544 + ibdev_err(&hr_dev->ib_dev, 1545 + "failed to free vf resource, vf_id = %d, ret = %d.\n", 1546 + i, ret); 1519 1547 } 1520 1548 } 1521 1549 ··· 1753 1757 swt = (struct hns_roce_vf_switch *)desc.data; 1754 1758 hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true); 1755 1759 swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL); 1756 - roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M, 1757 - VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id); 1760 + hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id); 1758 1761 ret = hns_roce_cmq_send(hr_dev, &desc, 1); 1759 1762 if (ret) 1760 1763 return ret; 1761 1764 1762 1765 desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN); 1763 1766 desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); 1764 - roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1); 1765 - roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0); 1766 - roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1); 1767 + hr_reg_enable(swt, VF_SWITCH_ALW_LPBK); 1768 + hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK); 1769 + hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD); 1767 1770 1768 1771 return hns_roce_cmq_send(hr_dev, &desc, 1); 1769 1772 } ··· 1942 1947 caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; 1943 1948 caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM; 1944 1949 caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; 1945 - caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; 1950 + caps->cqc_timer_bt_num = HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM; 1946 1951 1947 1952 caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA; 1948 1953 caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; ··· 2238 2243 caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); 2239 2244 caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); 2240 2245 caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); 2241 - caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); 2242 2246 caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); 2243 2247 caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); 2244 2248 caps->num_aeq_vectors = resp_a->num_aeq_vectors; ··· 2264 2270 ctx_hop_num = resp_b->ctx_hop_num; 2265 2271 pbl_hop_num = resp_b->pbl_hop_num; 2266 2272 2267 - caps->num_pds = 1 << roce_get_field(resp_c->cap_flags_num_pds, 2268 - V2_QUERY_PF_CAPS_C_NUM_PDS_M, 2269 - V2_QUERY_PF_CAPS_C_NUM_PDS_S); 2270 - caps->flags = roce_get_field(resp_c->cap_flags_num_pds, 2271 - V2_QUERY_PF_CAPS_C_CAP_FLAGS_M, 2272 - V2_QUERY_PF_CAPS_C_CAP_FLAGS_S); 2273 + caps->num_pds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_PDS); 2274 + 2275 + caps->flags = hr_reg_read(resp_c, PF_CAPS_C_CAP_FLAGS); 2273 2276 caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) << 2274 2277 HNS_ROCE_CAP_FLAGS_EX_SHIFT; 2275 2278 2276 - caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs, 2277 - V2_QUERY_PF_CAPS_C_NUM_CQS_M, 2278 - V2_QUERY_PF_CAPS_C_NUM_CQS_S); 2279 - caps->gid_table_len[0] = roce_get_field(resp_c->max_gid_num_cqs, 2280 - V2_QUERY_PF_CAPS_C_MAX_GID_M, 2281 - V2_QUERY_PF_CAPS_C_MAX_GID_S); 2282 - 2283 - caps->max_cqes = 1 << roce_get_field(resp_c->cq_depth, 2284 - V2_QUERY_PF_CAPS_C_CQ_DEPTH_M, 2285 - V2_QUERY_PF_CAPS_C_CQ_DEPTH_S); 2286 - caps->num_mtpts = 1 << roce_get_field(resp_c->num_mrws, 2287 - V2_QUERY_PF_CAPS_C_NUM_MRWS_M, 2288 - V2_QUERY_PF_CAPS_C_NUM_MRWS_S); 2289 - caps->num_qps = 1 << roce_get_field(resp_c->ord_num_qps, 2290 - V2_QUERY_PF_CAPS_C_NUM_QPS_M, 2291 - V2_QUERY_PF_CAPS_C_NUM_QPS_S); 2292 - caps->max_qp_init_rdma = roce_get_field(resp_c->ord_num_qps, 2293 - V2_QUERY_PF_CAPS_C_MAX_ORD_M, 2294 - V2_QUERY_PF_CAPS_C_MAX_ORD_S); 2279 + caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS); 2280 + caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID); 2281 + caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH); 2282 + caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS); 2283 + caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS); 2284 + caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD); 2295 2285 caps->max_qp_dest_rdma = caps->max_qp_init_rdma; 2296 2286 caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth); 2297 - caps->num_srqs = 1 << roce_get_field(resp_d->wq_hop_num_max_srqs, 2298 - V2_QUERY_PF_CAPS_D_NUM_SRQS_M, 2299 - V2_QUERY_PF_CAPS_D_NUM_SRQS_S); 2300 - caps->cong_type = roce_get_field(resp_d->wq_hop_num_max_srqs, 2301 - V2_QUERY_PF_CAPS_D_CONG_TYPE_M, 2302 - V2_QUERY_PF_CAPS_D_CONG_TYPE_S); 2287 + 2288 + caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS); 2289 + caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE); 2303 2290 caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth); 2291 + caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH); 2292 + caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS); 2293 + caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH); 2294 + caps->default_aeq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_AEQ_ARM_ST); 2295 + caps->default_ceq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_CEQ_ARM_ST); 2296 + caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS); 2297 + caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS); 2298 + caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS); 2299 + caps->reserved_uars = hr_reg_read(resp_d, PF_CAPS_D_RSV_UARS); 2304 2300 2305 - caps->ceqe_depth = 1 << roce_get_field(resp_d->num_ceqs_ceq_depth, 2306 - V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M, 2307 - V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S); 2308 - caps->num_comp_vectors = roce_get_field(resp_d->num_ceqs_ceq_depth, 2309 - V2_QUERY_PF_CAPS_D_NUM_CEQS_M, 2310 - V2_QUERY_PF_CAPS_D_NUM_CEQS_S); 2311 - 2312 - caps->aeqe_depth = 1 << roce_get_field(resp_d->arm_st_aeq_depth, 2313 - V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M, 2314 - V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S); 2315 - caps->default_aeq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth, 2316 - V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M, 2317 - V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S); 2318 - caps->default_ceq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth, 2319 - V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M, 2320 - V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S); 2321 - caps->reserved_pds = roce_get_field(resp_d->num_uars_rsv_pds, 2322 - V2_QUERY_PF_CAPS_D_RSV_PDS_M, 2323 - V2_QUERY_PF_CAPS_D_RSV_PDS_S); 2324 - caps->num_uars = 1 << roce_get_field(resp_d->num_uars_rsv_pds, 2325 - V2_QUERY_PF_CAPS_D_NUM_UARS_M, 2326 - V2_QUERY_PF_CAPS_D_NUM_UARS_S); 2327 - caps->reserved_qps = roce_get_field(resp_d->rsv_uars_rsv_qps, 2328 - V2_QUERY_PF_CAPS_D_RSV_QPS_M, 2329 - V2_QUERY_PF_CAPS_D_RSV_QPS_S); 2330 - caps->reserved_uars = roce_get_field(resp_d->rsv_uars_rsv_qps, 2331 - V2_QUERY_PF_CAPS_D_RSV_UARS_M, 2332 - V2_QUERY_PF_CAPS_D_RSV_UARS_S); 2333 - caps->reserved_mrws = roce_get_field(resp_e->chunk_size_shift_rsv_mrws, 2334 - V2_QUERY_PF_CAPS_E_RSV_MRWS_M, 2335 - V2_QUERY_PF_CAPS_E_RSV_MRWS_S); 2336 - caps->chunk_sz = 1 << roce_get_field(resp_e->chunk_size_shift_rsv_mrws, 2337 - V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M, 2338 - V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S); 2339 - caps->reserved_cqs = roce_get_field(resp_e->rsv_cqs, 2340 - V2_QUERY_PF_CAPS_E_RSV_CQS_M, 2341 - V2_QUERY_PF_CAPS_E_RSV_CQS_S); 2342 - caps->reserved_srqs = roce_get_field(resp_e->rsv_srqs, 2343 - V2_QUERY_PF_CAPS_E_RSV_SRQS_M, 2344 - V2_QUERY_PF_CAPS_E_RSV_SRQS_S); 2345 - caps->reserved_lkey = roce_get_field(resp_e->rsv_lkey, 2346 - V2_QUERY_PF_CAPS_E_RSV_LKEYS_M, 2347 - V2_QUERY_PF_CAPS_E_RSV_LKEYS_S); 2301 + caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS); 2302 + caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT); 2303 + caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS); 2304 + caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS); 2305 + caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS); 2348 2306 caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt); 2349 2307 caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period); 2350 2308 caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt); ··· 2311 2365 caps->cqe_hop_num = pbl_hop_num; 2312 2366 caps->srqwqe_hop_num = pbl_hop_num; 2313 2367 caps->idx_hop_num = pbl_hop_num; 2314 - caps->wqe_sq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, 2315 - V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M, 2316 - V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S); 2317 - caps->wqe_sge_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, 2318 - V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M, 2319 - V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S); 2320 - caps->wqe_rq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, 2321 - V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M, 2322 - V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S); 2368 + caps->wqe_sq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_SQWQE_HOP_NUM); 2369 + caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM); 2370 + caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM); 2323 2371 2324 2372 return 0; 2325 2373 } ··· 2940 3000 mb_st = (struct hns_roce_mbox_status *)desc.data; 2941 3001 end = msecs_to_jiffies(timeout) + jiffies; 2942 3002 while (v2_chk_mbox_is_avail(hr_dev, &busy)) { 3003 + if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR) 3004 + return -EIO; 3005 + 2943 3006 status = 0; 2944 3007 hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, 2945 3008 true); ··· 3046 3103 3047 3104 hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); 3048 3105 3049 - roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M, 3050 - CFG_SGID_TB_TABLE_IDX_S, gid_index); 3051 - roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, 3052 - CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); 3106 + hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index); 3107 + hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type); 3053 3108 3054 3109 copy_gid(&sgid_tb->vf_sgid_l, gid); 3055 3110 ··· 3082 3141 3083 3142 copy_gid(&tb_a->vf_sgid_l, gid); 3084 3143 3085 - roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M, 3086 - CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type); 3087 - roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S, 3088 - vlan_id < VLAN_CFI_MASK); 3089 - roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M, 3090 - CFG_GMV_TB_VF_VLAN_ID_S, vlan_id); 3144 + hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type); 3145 + hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK); 3146 + hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id); 3091 3147 3092 3148 tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac); 3093 - roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M, 3094 - CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]); 3095 3149 3096 - roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M, 3097 - CFG_GMV_TB_SGID_IDX_S, gid_index); 3150 + hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]); 3151 + hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index); 3098 3152 3099 3153 return hns_roce_cmq_send(hr_dev, desc, 2); 3100 3154 } ··· 3138 3202 reg_smac_l = *(u32 *)(&addr[0]); 3139 3203 reg_smac_h = *(u16 *)(&addr[4]); 3140 3204 3141 - roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M, 3142 - CFG_SMAC_TB_IDX_S, phy_port); 3143 - roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M, 3144 - CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); 3205 + hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port); 3206 + hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h); 3145 3207 smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l); 3146 3208 3147 3209 return hns_roce_cmq_send(hr_dev, &desc, 1); ··· 3168 3234 3169 3235 mpt_entry->pbl_size = cpu_to_le32(mr->npages); 3170 3236 mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3); 3171 - roce_set_field(mpt_entry->byte_48_mode_ba, 3172 - V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S, 3173 - upper_32_bits(pbl_ba >> 3)); 3237 + hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); 3174 3238 3175 3239 mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); 3176 - roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M, 3177 - V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0])); 3240 + hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0])); 3178 3241 3179 3242 mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1])); 3180 - roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M, 3181 - V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1])); 3182 - roce_set_field(mpt_entry->byte_64_buf_pa1, 3183 - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, 3184 - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, 3185 - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); 3243 + hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1])); 3244 + hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, 3245 + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); 3186 3246 3187 3247 return 0; 3188 3248 } ··· 3185 3257 void *mb_buf, struct hns_roce_mr *mr) 3186 3258 { 3187 3259 struct hns_roce_v2_mpt_entry *mpt_entry; 3188 - int ret; 3189 3260 3190 3261 mpt_entry = mb_buf; 3191 3262 memset(mpt_entry, 0, sizeof(*mpt_entry)); ··· 3223 3296 to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); 3224 3297 hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD); 3225 3298 3226 - ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); 3227 - 3228 - return ret; 3299 + return set_mtpt_pbl(hr_dev, mpt_entry, mr); 3229 3300 } 3230 3301 3231 3302 static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, ··· 3234 3309 u32 mr_access_flags = mr->access; 3235 3310 int ret = 0; 3236 3311 3237 - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, 3238 - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); 3239 - 3240 - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, 3241 - V2_MPT_BYTE_4_PD_S, mr->pd); 3312 + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); 3313 + hr_reg_write(mpt_entry, MPT_PD, mr->pd); 3242 3314 3243 3315 if (flags & IB_MR_REREG_ACCESS) { 3244 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, 3245 - V2_MPT_BYTE_8_BIND_EN_S, 3316 + hr_reg_write(mpt_entry, MPT_BIND_EN, 3246 3317 (mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0)); 3247 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, 3248 - V2_MPT_BYTE_8_ATOMIC_EN_S, 3318 + hr_reg_write(mpt_entry, MPT_ATOMIC_EN, 3249 3319 mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); 3250 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, 3320 + hr_reg_write(mpt_entry, MPT_RR_EN, 3251 3321 mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0); 3252 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, 3322 + hr_reg_write(mpt_entry, MPT_RW_EN, 3253 3323 mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0); 3254 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 3324 + hr_reg_write(mpt_entry, MPT_LW_EN, 3255 3325 mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0); 3256 3326 } 3257 3327 ··· 3277 3357 return -ENOBUFS; 3278 3358 } 3279 3359 3280 - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, 3281 - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); 3282 - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, 3283 - V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1); 3284 - roce_set_field(mpt_entry->byte_4_pd_hop_st, 3285 - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, 3286 - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, 3287 - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); 3288 - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, 3289 - V2_MPT_BYTE_4_PD_S, mr->pd); 3360 + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE); 3361 + hr_reg_write(mpt_entry, MPT_PD, mr->pd); 3290 3362 3291 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1); 3292 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); 3293 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); 3363 + hr_reg_enable(mpt_entry, MPT_RA_EN); 3364 + hr_reg_enable(mpt_entry, MPT_R_INV_EN); 3365 + hr_reg_enable(mpt_entry, MPT_L_INV_EN); 3294 3366 3295 - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1); 3296 - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); 3297 - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0); 3298 - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); 3367 + hr_reg_enable(mpt_entry, MPT_FRE); 3368 + hr_reg_clear(mpt_entry, MPT_MR_MW); 3369 + hr_reg_enable(mpt_entry, MPT_BPD); 3370 + hr_reg_clear(mpt_entry, MPT_PA); 3371 + 3372 + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1); 3373 + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, 3374 + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); 3375 + hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, 3376 + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); 3299 3377 3300 3378 mpt_entry->pbl_size = cpu_to_le32(mr->npages); 3301 3379 3302 3380 mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3)); 3303 - roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M, 3304 - V2_MPT_BYTE_48_PBL_BA_H_S, 3305 - upper_32_bits(pbl_ba >> 3)); 3306 - 3307 - roce_set_field(mpt_entry->byte_64_buf_pa1, 3308 - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, 3309 - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, 3310 - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); 3381 + hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); 3311 3382 3312 3383 return 0; 3313 3384 } ··· 3310 3399 mpt_entry = mb_buf; 3311 3400 memset(mpt_entry, 0, sizeof(*mpt_entry)); 3312 3401 3313 - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, 3314 - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); 3315 - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, 3316 - V2_MPT_BYTE_4_PD_S, mw->pdn); 3317 - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, 3318 - V2_MPT_BYTE_4_PBL_HOP_NUM_S, 3319 - mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : 3320 - mw->pbl_hop_num); 3321 - roce_set_field(mpt_entry->byte_4_pd_hop_st, 3322 - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, 3323 - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, 3324 - mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET); 3402 + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE); 3403 + hr_reg_write(mpt_entry, MPT_PD, mw->pdn); 3325 3404 3326 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); 3327 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); 3328 - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 1); 3405 + hr_reg_enable(mpt_entry, MPT_R_INV_EN); 3406 + hr_reg_enable(mpt_entry, MPT_L_INV_EN); 3407 + hr_reg_enable(mpt_entry, MPT_LW_EN); 3329 3408 3330 - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); 3331 - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1); 3332 - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); 3333 - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S, 3409 + hr_reg_enable(mpt_entry, MPT_MR_MW); 3410 + hr_reg_enable(mpt_entry, MPT_BPD); 3411 + hr_reg_clear(mpt_entry, MPT_PA); 3412 + hr_reg_write(mpt_entry, MPT_BQP, 3334 3413 mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1); 3335 3414 3336 - roce_set_field(mpt_entry->byte_64_buf_pa1, 3337 - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, 3338 - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, 3339 - mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET); 3340 - 3341 3415 mpt_entry->lkey = cpu_to_le32(mw->rkey); 3416 + 3417 + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 3418 + mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : 3419 + mw->pbl_hop_num); 3420 + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, 3421 + mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET); 3422 + hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, 3423 + mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET); 3342 3424 3343 3425 return 0; 3344 3426 } ··· 4870 4966 if (ret) 4871 4967 return ret; 4872 4968 4873 - if (gid_attr) 4874 - is_udp = (gid_attr->gid_type == 4875 - IB_GID_TYPE_ROCE_UDP_ENCAP); 4969 + is_udp = (gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); 4876 4970 } 4877 4971 4878 4972 /* Only HIP08 needs to set the vlan_en bits in QPC */ ··· 5851 5949 (eq->cons_index & (eq->entries - 1)) * 5852 5950 eq->eqe_size); 5853 5951 5854 - return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ 5952 + return (hr_reg_read(aeqe, AEQE_OWNER) ^ 5855 5953 !!(eq->cons_index & eq->entries)) ? aeqe : NULL; 5856 5954 } 5857 5955 ··· 5871 5969 */ 5872 5970 dma_rmb(); 5873 5971 5874 - event_type = roce_get_field(aeqe->asyn, 5875 - HNS_ROCE_V2_AEQE_EVENT_TYPE_M, 5876 - HNS_ROCE_V2_AEQE_EVENT_TYPE_S); 5877 - sub_type = roce_get_field(aeqe->asyn, 5878 - HNS_ROCE_V2_AEQE_SUB_TYPE_M, 5879 - HNS_ROCE_V2_AEQE_SUB_TYPE_S); 5880 - queue_num = roce_get_field(aeqe->event.queue_event.num, 5881 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, 5882 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); 5972 + event_type = hr_reg_read(aeqe, AEQE_EVENT_TYPE); 5973 + sub_type = hr_reg_read(aeqe, AEQE_SUB_TYPE); 5974 + queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM); 5883 5975 5884 5976 switch (event_type) { 5885 5977 case HNS_ROCE_EVENT_TYPE_PATH_MIG: ··· 5933 6037 (eq->cons_index & (eq->entries - 1)) * 5934 6038 eq->eqe_size); 5935 6039 5936 - return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ 5937 - (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; 6040 + return (hr_reg_read(ceqe, CEQE_OWNER) ^ 6041 + !!(eq->cons_index & eq->entries)) ? ceqe : NULL; 5938 6042 } 5939 6043 5940 6044 static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, ··· 5950 6054 */ 5951 6055 dma_rmb(); 5952 6056 5953 - cqn = roce_get_field(ceqe->comp, HNS_ROCE_V2_CEQE_COMP_CQN_M, 5954 - HNS_ROCE_V2_CEQE_COMP_CQN_S); 6057 + cqn = hr_reg_read(ceqe, CEQE_CQN); 5955 6058 5956 6059 hns_roce_cq_completion(hr_dev, cqn); 5957 6060
+98 -220
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
··· 41 41 #define HNS_ROCE_V2_MAX_SRQ_WR 0x8000 42 42 #define HNS_ROCE_V2_MAX_SRQ_SGE 64 43 43 #define HNS_ROCE_V2_MAX_CQ_NUM 0x100000 44 - #define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100 44 + #define HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM 0x100 45 45 #define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 46 46 #define HNS_ROCE_V2_MAX_CQE_NUM 0x400000 47 47 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 64 ··· 302 302 303 303 #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 304 304 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 305 - 306 - #define V2_CQC_BYTE_4_ARM_ST_S 6 307 - #define V2_CQC_BYTE_4_ARM_ST_M GENMASK(7, 6) 308 - 309 - #define V2_CQC_BYTE_4_CEQN_S 15 310 - #define V2_CQC_BYTE_4_CEQN_M GENMASK(23, 15) 311 - 312 - #define V2_CQC_BYTE_8_CQN_S 0 313 - #define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0) 314 - 315 - #define V2_CQC_BYTE_16_CQE_HOP_NUM_S 30 316 - #define V2_CQC_BYTE_16_CQE_HOP_NUM_M GENMASK(31, 30) 317 - 318 - #define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S 0 319 - #define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M GENMASK(23, 0) 320 - 321 - #define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S 0 322 - #define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M GENMASK(23, 0) 323 - 324 - #define V2_CQC_BYTE_52_CQE_CNT_S 0 325 - #define V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0) 326 - 327 - #define V2_CQC_BYTE_56_CQ_MAX_CNT_S 0 328 - #define V2_CQC_BYTE_56_CQ_MAX_CNT_M GENMASK(15, 0) 329 - 330 - #define V2_CQC_BYTE_56_CQ_PERIOD_S 16 331 - #define V2_CQC_BYTE_56_CQ_PERIOD_M GENMASK(31, 16) 332 305 333 306 #define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l) 334 307 ··· 761 788 #define MPT_LKEY MPT_FIELD_LOC(223, 192) 762 789 #define MPT_VA MPT_FIELD_LOC(287, 224) 763 790 #define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288) 764 - #define MPT_PBL_BA MPT_FIELD_LOC(380, 320) 791 + #define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320) 792 + #define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352) 765 793 #define MPT_BLK_MODE MPT_FIELD_LOC(381, 381) 766 794 #define MPT_RSV0 MPT_FIELD_LOC(383, 382) 767 - #define MPT_PA0 MPT_FIELD_LOC(441, 384) 795 + #define MPT_PA0_L MPT_FIELD_LOC(415, 384) 796 + #define MPT_PA0_H MPT_FIELD_LOC(441, 416) 768 797 #define MPT_BOUND_VA MPT_FIELD_LOC(447, 442) 769 - #define MPT_PA1 MPT_FIELD_LOC(505, 448) 798 + #define MPT_PA1_L MPT_FIELD_LOC(479, 448) 799 + #define MPT_PA1_H MPT_FIELD_LOC(505, 480) 770 800 #define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506) 771 801 #define MPT_RSV2 MPT_FIELD_LOC(507, 507) 772 802 #define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508) ··· 875 899 u8 dgid[GID_LEN_V2]; 876 900 }; 877 901 878 - #define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 879 - #define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) 902 + #define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l) 880 903 881 - #define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7 882 - 883 - #define V2_UD_SEND_WQE_BYTE_4_CQE_S 8 884 - 885 - #define V2_UD_SEND_WQE_BYTE_4_SE_S 11 886 - 887 - #define V2_UD_SEND_WQE_BYTE_16_PD_S 0 888 - #define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0) 889 - 890 - #define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24 891 - #define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24) 892 - 893 - #define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 894 - #define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) 895 - 896 - #define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16 897 - #define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16) 898 - 899 - #define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0 900 - #define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0) 901 - 902 - #define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0 903 - #define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0) 904 - 905 - #define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16 906 - #define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16) 907 - 908 - #define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24 909 - #define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24) 910 - 911 - #define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0 912 - #define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0) 913 - 914 - #define V2_UD_SEND_WQE_BYTE_40_SL_S 20 915 - #define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20) 916 - 917 - #define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 918 - 919 - #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 904 + #define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0) 905 + #define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7) 906 + #define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8) 907 + #define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11) 908 + #define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96) 909 + #define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120) 910 + #define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128) 911 + #define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176) 912 + #define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224) 913 + #define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256) 914 + #define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272) 915 + #define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280) 916 + #define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288) 917 + #define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308) 918 + #define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318) 919 + #define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319) 920 920 921 921 struct hns_roce_v2_rc_send_wqe { 922 922 __le32 byte_4; ··· 907 955 __le64 va; 908 956 }; 909 957 910 - #define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0 911 - #define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) 958 + #define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l) 912 959 913 - #define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5 914 - #define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) 915 - 916 - #define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13 917 - #define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) 918 - 919 - #define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15 920 - #define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) 921 - 922 - #define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7 923 - 924 - #define V2_RC_SEND_WQE_BYTE_4_CQE_S 8 925 - 926 - #define V2_RC_SEND_WQE_BYTE_4_FENCE_S 9 927 - 928 - #define V2_RC_SEND_WQE_BYTE_4_SO_S 10 929 - 930 - #define V2_RC_SEND_WQE_BYTE_4_SE_S 11 931 - 932 - #define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 933 - 934 - #define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31 935 - 936 - #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 937 - #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) 938 - 939 - #define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S 24 940 - #define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24) 941 - 942 - #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 943 - #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) 944 - 945 - #define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31 960 + #define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0) 961 + #define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5) 962 + #define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13) 963 + #define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7) 964 + #define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8) 965 + #define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9) 966 + #define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10) 967 + #define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11) 968 + #define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12) 969 + #define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15) 970 + #define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31) 971 + #define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96) 972 + #define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120) 973 + #define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128) 974 + #define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159) 946 975 947 976 struct hns_roce_wqe_frmr_seg { 948 977 __le32 pbl_size; ··· 966 1033 __le32 rsv[4]; 967 1034 }; 968 1035 969 - #define FUNC_CLEAR_RST_FUN_DONE_S 0 1036 + #define FUNC_CLEAR_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_func_clear, h, l) 1037 + 1038 + #define FUNC_CLEAR_RST_FUN_DONE FUNC_CLEAR_FIELD_LOC(32, 32) 1039 + 970 1040 /* Each physical function manages up to 248 virtual functions, it takes up to 971 1041 * 100ms for each function to execute clear. If an abnormal reset occurs, it is 972 1042 * executed twice at most, so it takes up to 249 * 2 * 100ms. ··· 1048 1112 __le32 resv3; 1049 1113 }; 1050 1114 1051 - #define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3 1052 - #define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3) 1115 + #define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l) 1053 1116 1054 - #define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1 1055 - #define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2 1056 - #define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3 1117 + #define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35) 1118 + #define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65) 1119 + #define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66) 1120 + #define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67) 1057 1121 1058 1122 struct hns_roce_post_mbox { 1059 1123 __le32 in_param_l; ··· 1116 1180 __le32 vf_sgid_type_rsv; 1117 1181 }; 1118 1182 1119 - #define CFG_SGID_TB_TABLE_IDX_S 0 1120 - #define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) 1183 + #define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l) 1121 1184 1122 - #define CFG_SGID_TB_VF_SGID_TYPE_S 0 1123 - #define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0) 1185 + #define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0) 1186 + #define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160) 1124 1187 1125 1188 struct hns_roce_cfg_smac_tb { 1126 1189 __le32 tb_idx_rsv; ··· 1127 1192 __le32 vf_smac_h_rsv; 1128 1193 __le32 rsv[3]; 1129 1194 }; 1130 - #define CFG_SMAC_TB_IDX_S 0 1131 - #define CFG_SMAC_TB_IDX_M GENMASK(7, 0) 1132 1195 1133 - #define CFG_SMAC_TB_VF_SMAC_H_S 0 1134 - #define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) 1196 + #define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l) 1197 + 1198 + #define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0) 1199 + #define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64) 1135 1200 1136 1201 struct hns_roce_cfg_gmv_tb_a { 1137 1202 __le32 vf_sgid_l; ··· 1142 1207 __le32 resv; 1143 1208 }; 1144 1209 1145 - #define CFG_GMV_TB_SGID_IDX_S 0 1146 - #define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0) 1210 + #define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l) 1147 1211 1148 - #define CFG_GMV_TB_VF_SGID_TYPE_S 0 1149 - #define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0) 1150 - 1151 - #define CFG_GMV_TB_VF_VLAN_EN_S 2 1152 - 1153 - #define CFG_GMV_TB_VF_VLAN_ID_S 16 1154 - #define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16) 1212 + #define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128) 1213 + #define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130) 1214 + #define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144) 1155 1215 1156 1216 struct hns_roce_cfg_gmv_tb_b { 1157 1217 __le32 vf_smac_l; ··· 1155 1225 __le32 resv[3]; 1156 1226 }; 1157 1227 1158 - #define CFG_GMV_TB_SMAC_H_S 0 1159 - #define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0) 1228 + #define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l) 1229 + 1230 + #define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32) 1231 + #define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64) 1160 1232 1161 1233 #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 1162 1234 struct hns_roce_query_pf_caps_a { ··· 1210 1278 __le16 rq_depth; 1211 1279 }; 1212 1280 1213 - #define V2_QUERY_PF_CAPS_C_NUM_PDS_S 0 1214 - #define V2_QUERY_PF_CAPS_C_NUM_PDS_M GENMASK(19, 0) 1281 + #define PF_CAPS_C_FIELD_LOC(h, l) \ 1282 + FIELD_LOC(struct hns_roce_query_pf_caps_c, h, l) 1215 1283 1216 - #define V2_QUERY_PF_CAPS_C_CAP_FLAGS_S 20 1217 - #define V2_QUERY_PF_CAPS_C_CAP_FLAGS_M GENMASK(31, 20) 1218 - 1219 - #define V2_QUERY_PF_CAPS_C_NUM_CQS_S 0 1220 - #define V2_QUERY_PF_CAPS_C_NUM_CQS_M GENMASK(19, 0) 1221 - 1222 - #define V2_QUERY_PF_CAPS_C_MAX_GID_S 20 1223 - #define V2_QUERY_PF_CAPS_C_MAX_GID_M GENMASK(28, 20) 1224 - 1225 - #define V2_QUERY_PF_CAPS_C_CQ_DEPTH_S 0 1226 - #define V2_QUERY_PF_CAPS_C_CQ_DEPTH_M GENMASK(22, 0) 1227 - 1228 - #define V2_QUERY_PF_CAPS_C_NUM_MRWS_S 0 1229 - #define V2_QUERY_PF_CAPS_C_NUM_MRWS_M GENMASK(19, 0) 1230 - 1231 - #define V2_QUERY_PF_CAPS_C_NUM_QPS_S 0 1232 - #define V2_QUERY_PF_CAPS_C_NUM_QPS_M GENMASK(19, 0) 1233 - 1234 - #define V2_QUERY_PF_CAPS_C_MAX_ORD_S 20 1235 - #define V2_QUERY_PF_CAPS_C_MAX_ORD_M GENMASK(27, 20) 1284 + #define PF_CAPS_C_NUM_PDS PF_CAPS_C_FIELD_LOC(19, 0) 1285 + #define PF_CAPS_C_CAP_FLAGS PF_CAPS_C_FIELD_LOC(31, 20) 1286 + #define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32) 1287 + #define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52) 1288 + #define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64) 1289 + #define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96) 1290 + #define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128) 1291 + #define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148) 1236 1292 1237 1293 struct hns_roce_query_pf_caps_d { 1238 1294 __le32 wq_hop_num_max_srqs; ··· 1231 1311 __le32 num_uars_rsv_pds; 1232 1312 __le32 rsv_uars_rsv_qps; 1233 1313 }; 1234 - #define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0 1235 - #define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(19, 0) 1236 1314 1237 - #define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20 1238 - #define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20) 1315 + #define PF_CAPS_D_FIELD_LOC(h, l) \ 1316 + FIELD_LOC(struct hns_roce_query_pf_caps_d, h, l) 1239 1317 1240 - #define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S 22 1241 - #define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M GENMASK(23, 22) 1318 + #define PF_CAPS_D_NUM_SRQS PF_CAPS_D_FIELD_LOC(19, 0) 1319 + #define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20) 1320 + #define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22) 1321 + #define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24) 1322 + #define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26) 1323 + #define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64) 1324 + #define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86) 1325 + #define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96) 1326 + #define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118) 1327 + #define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120) 1328 + #define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128) 1329 + #define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148) 1330 + #define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160) 1331 + #define PF_CAPS_D_RSV_UARS PF_CAPS_D_FIELD_LOC(187, 180) 1242 1332 1243 - #define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S 24 1244 - #define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M GENMASK(25, 24) 1245 - 1246 - #define V2_QUERY_PF_CAPS_D_CONG_TYPE_S 26 1247 - #define V2_QUERY_PF_CAPS_D_CONG_TYPE_M GENMASK(29, 26) 1333 + #define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 1248 1334 1249 1335 struct hns_roce_congestion_algorithm { 1250 1336 u8 alg_sel; ··· 1258 1332 u8 dip_vld; 1259 1333 u8 wnd_mode_sel; 1260 1334 }; 1261 - 1262 - #define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S 0 1263 - #define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M GENMASK(21, 0) 1264 - 1265 - #define V2_QUERY_PF_CAPS_D_NUM_CEQS_S 22 1266 - #define V2_QUERY_PF_CAPS_D_NUM_CEQS_M GENMASK(31, 22) 1267 - 1268 - #define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S 0 1269 - #define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M GENMASK(21, 0) 1270 - 1271 - #define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S 22 1272 - #define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M GENMASK(23, 22) 1273 - 1274 - #define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S 24 1275 - #define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M GENMASK(25, 24) 1276 - 1277 - #define V2_QUERY_PF_CAPS_D_RSV_PDS_S 0 1278 - #define V2_QUERY_PF_CAPS_D_RSV_PDS_M GENMASK(19, 0) 1279 - 1280 - #define V2_QUERY_PF_CAPS_D_NUM_UARS_S 20 1281 - #define V2_QUERY_PF_CAPS_D_NUM_UARS_M GENMASK(27, 20) 1282 - 1283 - #define V2_QUERY_PF_CAPS_D_RSV_QPS_S 0 1284 - #define V2_QUERY_PF_CAPS_D_RSV_QPS_M GENMASK(19, 0) 1285 - 1286 - #define V2_QUERY_PF_CAPS_D_RSV_UARS_S 20 1287 - #define V2_QUERY_PF_CAPS_D_RSV_UARS_M GENMASK(27, 20) 1288 1335 1289 1336 struct hns_roce_query_pf_caps_e { 1290 1337 __le32 chunk_size_shift_rsv_mrws; ··· 1270 1371 __le16 aeq_period; 1271 1372 }; 1272 1373 1273 - #define V2_QUERY_PF_CAPS_E_RSV_MRWS_S 0 1274 - #define V2_QUERY_PF_CAPS_E_RSV_MRWS_M GENMASK(19, 0) 1374 + #define PF_CAPS_E_FIELD_LOC(h, l) \ 1375 + FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l) 1275 1376 1276 - #define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S 20 1277 - #define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M GENMASK(31, 20) 1278 - 1279 - #define V2_QUERY_PF_CAPS_E_RSV_CQS_S 0 1280 - #define V2_QUERY_PF_CAPS_E_RSV_CQS_M GENMASK(19, 0) 1281 - 1282 - #define V2_QUERY_PF_CAPS_E_RSV_SRQS_S 0 1283 - #define V2_QUERY_PF_CAPS_E_RSV_SRQS_M GENMASK(19, 0) 1284 - 1285 - #define V2_QUERY_PF_CAPS_E_RSV_LKEYS_S 0 1286 - #define V2_QUERY_PF_CAPS_E_RSV_LKEYS_M GENMASK(19, 0) 1377 + #define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0) 1378 + #define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20) 1379 + #define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32) 1380 + #define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64) 1381 + #define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96) 1287 1382 1288 1383 struct hns_roce_cmq_req { 1289 1384 __le32 data[6]; ··· 1378 1485 #define HNS_ROCE_EQ_INIT_CONS_IDX 0 1379 1486 #define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0 1380 1487 1381 - #define HNS_ROCE_V2_CEQ_CEQE_OWNER_S 31 1382 - #define HNS_ROCE_V2_AEQ_AEQE_OWNER_S 31 1383 - 1384 1488 #define HNS_ROCE_V2_COMP_EQE_NUM 0x1000 1385 1489 #define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000 1386 1490 ··· 1433 1543 #define EQC_NEX_EQE_BA_L EQC_FIELD_LOC(319, 288) 1434 1544 #define EQC_NEX_EQE_BA_H EQC_FIELD_LOC(339, 320) 1435 1545 #define EQC_EQE_SIZE EQC_FIELD_LOC(341, 340) 1436 - 1437 - #define HNS_ROCE_V2_CEQE_COMP_CQN_S 0 1438 - #define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0) 1439 - 1440 - #define HNS_ROCE_V2_AEQE_EVENT_TYPE_S 0 1441 - #define HNS_ROCE_V2_AEQE_EVENT_TYPE_M GENMASK(7, 0) 1442 - 1443 - #define HNS_ROCE_V2_AEQE_SUB_TYPE_S 8 1444 - #define HNS_ROCE_V2_AEQE_SUB_TYPE_M GENMASK(15, 8) 1445 - 1446 - #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 1447 - #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) 1448 1546 1449 1547 #define MAX_SERVICE_LEVEL 0x7 1450 1548
+1 -1
drivers/infiniband/hw/hns/hns_roce_main.c
··· 737 737 ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table, 738 738 HEM_TYPE_CQC_TIMER, 739 739 hr_dev->caps.cqc_timer_entry_sz, 740 - hr_dev->caps.num_cqc_timer, 1); 740 + hr_dev->caps.cqc_timer_bt_num, 1); 741 741 if (ret) { 742 742 dev_err(dev, 743 743 "Failed to init CQC timer memory, aborting.\n");
+1 -2
drivers/infiniband/hw/hns/hns_roce_mr.c
··· 340 340 { 341 341 struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); 342 342 struct hns_roce_mr *mr = to_hr_mr(ibmr); 343 - int ret = 0; 344 343 345 344 if (hr_dev->hw->dereg_mr) 346 345 hr_dev->hw->dereg_mr(hr_dev); ··· 347 348 hns_roce_mr_free(hr_dev, mr); 348 349 kfree(mr); 349 350 350 - return ret; 351 + return 0; 351 352 } 352 353 353 354 struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
-20
drivers/infiniband/hw/hns/hns_roce_qp.c
··· 243 243 return 0; 244 244 } 245 245 246 - enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) 247 - { 248 - switch (state) { 249 - case IB_QPS_RESET: 250 - return HNS_ROCE_QP_STATE_RST; 251 - case IB_QPS_INIT: 252 - return HNS_ROCE_QP_STATE_INIT; 253 - case IB_QPS_RTR: 254 - return HNS_ROCE_QP_STATE_RTR; 255 - case IB_QPS_RTS: 256 - return HNS_ROCE_QP_STATE_RTS; 257 - case IB_QPS_SQD: 258 - return HNS_ROCE_QP_STATE_SQD; 259 - case IB_QPS_ERR: 260 - return HNS_ROCE_QP_STATE_ERR; 261 - default: 262 - return HNS_ROCE_QP_NUM_STATE; 263 - } 264 - } 265 - 266 246 static void add_qp_to_list(struct hns_roce_dev *hr_dev, 267 247 struct hns_roce_qp *hr_qp, 268 248 struct ib_cq *send_cq, struct ib_cq *recv_cq)
+14 -35
drivers/infiniband/hw/hns/hns_roce_restrack.c
··· 13 13 struct hns_roce_v2_cq_context *context) 14 14 { 15 15 if (rdma_nl_put_driver_u32(msg, "state", 16 - roce_get_field(context->byte_4_pg_ceqn, 17 - V2_CQC_BYTE_4_ARM_ST_M, 18 - V2_CQC_BYTE_4_ARM_ST_S))) 16 + hr_reg_read(context, CQC_ARM_ST))) 17 + 19 18 goto err; 20 19 21 20 if (rdma_nl_put_driver_u32(msg, "ceqn", 22 - roce_get_field(context->byte_4_pg_ceqn, 23 - V2_CQC_BYTE_4_CEQN_M, 24 - V2_CQC_BYTE_4_CEQN_S))) 21 + hr_reg_read(context, CQC_CEQN))) 25 22 goto err; 26 23 27 24 if (rdma_nl_put_driver_u32(msg, "cqn", 28 - roce_get_field(context->byte_8_cqn, 29 - V2_CQC_BYTE_8_CQN_M, 30 - V2_CQC_BYTE_8_CQN_S))) 25 + hr_reg_read(context, CQC_CQN))) 31 26 goto err; 32 27 33 28 if (rdma_nl_put_driver_u32(msg, "hopnum", 34 - roce_get_field(context->byte_16_hop_addr, 35 - V2_CQC_BYTE_16_CQE_HOP_NUM_M, 36 - V2_CQC_BYTE_16_CQE_HOP_NUM_S))) 29 + hr_reg_read(context, CQC_CQE_HOP_NUM))) 37 30 goto err; 38 31 39 - if (rdma_nl_put_driver_u32( 40 - msg, "pi", 41 - roce_get_field(context->byte_28_cq_pi, 42 - V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M, 43 - V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S))) 32 + if (rdma_nl_put_driver_u32(msg, "pi", 33 + hr_reg_read(context, CQC_CQ_PRODUCER_IDX))) 44 34 goto err; 45 35 46 - if (rdma_nl_put_driver_u32( 47 - msg, "ci", 48 - roce_get_field(context->byte_32_cq_ci, 49 - V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M, 50 - V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S))) 36 + if (rdma_nl_put_driver_u32(msg, "ci", 37 + hr_reg_read(context, CQC_CQ_CONSUMER_IDX))) 51 38 goto err; 52 39 53 - if (rdma_nl_put_driver_u32( 54 - msg, "coalesce", 55 - roce_get_field(context->byte_56_cqe_period_maxcnt, 56 - V2_CQC_BYTE_56_CQ_MAX_CNT_M, 57 - V2_CQC_BYTE_56_CQ_MAX_CNT_S))) 40 + if (rdma_nl_put_driver_u32(msg, "coalesce", 41 + hr_reg_read(context, CQC_CQ_MAX_CNT))) 58 42 goto err; 59 43 60 - if (rdma_nl_put_driver_u32( 61 - msg, "period", 62 - roce_get_field(context->byte_56_cqe_period_maxcnt, 63 - V2_CQC_BYTE_56_CQ_PERIOD_M, 64 - V2_CQC_BYTE_56_CQ_PERIOD_S))) 44 + if (rdma_nl_put_driver_u32(msg, "period", 45 + hr_reg_read(context, CQC_CQ_PERIOD))) 65 46 goto err; 66 47 67 48 if (rdma_nl_put_driver_u32(msg, "cnt", 68 - roce_get_field(context->byte_52_cqe_cnt, 69 - V2_CQC_BYTE_52_CQE_CNT_M, 70 - V2_CQC_BYTE_52_CQE_CNT_S))) 49 + hr_reg_read(context, CQC_CQE_CNT))) 71 50 goto err; 72 51 73 52 return 0;
+18 -17
drivers/infiniband/hw/irdma/hw.c
··· 61 61 struct irdma_cq *cq = iwcq->back_cq; 62 62 63 63 if (!cq->user_mode) 64 - cq->armed = false; 64 + atomic_set(&cq->armed, 0); 65 65 if (cq->ibcq.comp_handler) 66 66 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 67 67 } ··· 1827 1827 rf->rsrc_created = true; 1828 1828 } 1829 1829 1830 - iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | 1831 - IB_DEVICE_MEM_WINDOW | 1832 - IB_DEVICE_MEM_MGT_EXTENSIONS; 1833 - 1834 1830 if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) 1835 1831 irdma_alloc_set_mac(iwdev); 1836 1832 irdma_add_ip(iwdev); ··· 2689 2693 info.sq = flush_mask & IRDMA_FLUSH_SQ; 2690 2694 info.rq = flush_mask & IRDMA_FLUSH_RQ; 2691 2695 2692 - if (flush_mask & IRDMA_REFLUSH) { 2693 - if (info.sq) 2694 - iwqp->sc_qp.flush_sq = false; 2695 - if (info.rq) 2696 - iwqp->sc_qp.flush_rq = false; 2697 - } 2698 - 2699 2696 /* Generate userflush errors in CQE */ 2700 2697 info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR; 2701 2698 info.sq_minor_code = FLUSH_GENERAL_ERR; 2702 2699 info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR; 2703 2700 info.rq_minor_code = FLUSH_GENERAL_ERR; 2704 2701 info.userflushcode = true; 2705 - if (flush_code) { 2706 - if (info.sq && iwqp->sc_qp.sq_flush_code) 2707 - info.sq_minor_code = flush_code; 2708 - if (info.rq && iwqp->sc_qp.rq_flush_code) 2709 - info.rq_minor_code = flush_code; 2702 + 2703 + if (flush_mask & IRDMA_REFLUSH) { 2704 + if (info.sq) 2705 + iwqp->sc_qp.flush_sq = false; 2706 + if (info.rq) 2707 + iwqp->sc_qp.flush_rq = false; 2708 + } else { 2709 + if (flush_code) { 2710 + if (info.sq && iwqp->sc_qp.sq_flush_code) 2711 + info.sq_minor_code = flush_code; 2712 + if (info.rq && iwqp->sc_qp.rq_flush_code) 2713 + info.rq_minor_code = flush_code; 2714 + } 2715 + if (!iwqp->user_mode) 2716 + queue_delayed_work(iwqp->iwdev->cleanup_wq, 2717 + &iwqp->dwork_flush, 2718 + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); 2710 2719 } 2711 2720 2712 2721 /* Issue flush */
-1
drivers/infiniband/hw/irdma/main.h
··· 338 338 u32 roce_ackcreds; 339 339 u32 vendor_id; 340 340 u32 vendor_part_id; 341 - u32 device_cap_flags; 342 341 u32 push_mode; 343 342 u32 rcv_wnd; 344 343 u16 mac_ip_table_idx;
+2 -5
drivers/infiniband/hw/irdma/puda.c
··· 191 191 static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp, 192 192 u32 *wqe_idx) 193 193 { 194 - __le64 *wqe = NULL; 195 194 int ret_code = 0; 196 195 197 196 *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); ··· 198 199 qp->swqe_polarity = !qp->swqe_polarity; 199 200 IRDMA_RING_MOVE_HEAD(qp->sq_ring, ret_code); 200 201 if (ret_code) 201 - return wqe; 202 + return NULL; 202 203 203 - wqe = qp->sq_base[*wqe_idx].elem; 204 - 205 - return wqe; 204 + return qp->sq_base[*wqe_idx].elem; 206 205 } 207 206 208 207 /**
+147
drivers/infiniband/hw/irdma/utils.c
··· 2495 2495 2496 2496 return polarity != ukcq->polarity; 2497 2497 } 2498 + 2499 + void irdma_remove_cmpls_list(struct irdma_cq *iwcq) 2500 + { 2501 + struct irdma_cmpl_gen *cmpl_node; 2502 + struct list_head *tmp_node, *list_node; 2503 + 2504 + list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) { 2505 + cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list); 2506 + list_del(&cmpl_node->list); 2507 + kfree(cmpl_node); 2508 + } 2509 + } 2510 + 2511 + int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info) 2512 + { 2513 + struct irdma_cmpl_gen *cmpl; 2514 + 2515 + if (list_empty(&iwcq->cmpl_generated)) 2516 + return -ENOENT; 2517 + cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list); 2518 + list_del(&cmpl->list); 2519 + memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info)); 2520 + kfree(cmpl); 2521 + 2522 + ibdev_dbg(iwcq->ibcq.device, 2523 + "VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n", 2524 + __func__, cq_poll_info->qp_id, cq_poll_info->op_type, 2525 + cq_poll_info->wr_id); 2526 + 2527 + return 0; 2528 + } 2529 + 2530 + /** 2531 + * irdma_set_cpi_common_values - fill in values for polling info struct 2532 + * @cpi: resulting structure of cq_poll_info type 2533 + * @qp: QPair 2534 + * @qp_num: id of the QP 2535 + */ 2536 + static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi, 2537 + struct irdma_qp_uk *qp, u32 qp_num) 2538 + { 2539 + cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED; 2540 + cpi->error = true; 2541 + cpi->major_err = IRDMA_FLUSH_MAJOR_ERR; 2542 + cpi->minor_err = FLUSH_GENERAL_ERR; 2543 + cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp; 2544 + cpi->qp_id = qp_num; 2545 + } 2546 + 2547 + static inline void irdma_comp_handler(struct irdma_cq *cq) 2548 + { 2549 + if (!cq->ibcq.comp_handler) 2550 + return; 2551 + if (atomic_cmpxchg(&cq->armed, 1, 0)) 2552 + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 2553 + } 2554 + 2555 + void irdma_generate_flush_completions(struct irdma_qp *iwqp) 2556 + { 2557 + struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk; 2558 + struct irdma_ring *sq_ring = &qp->sq_ring; 2559 + struct irdma_ring *rq_ring = &qp->rq_ring; 2560 + struct irdma_cmpl_gen *cmpl; 2561 + __le64 *sw_wqe; 2562 + u64 wqe_qword; 2563 + u32 wqe_idx; 2564 + bool compl_generated = false; 2565 + unsigned long flags1; 2566 + 2567 + spin_lock_irqsave(&iwqp->iwscq->lock, flags1); 2568 + if (irdma_cq_empty(iwqp->iwscq)) { 2569 + unsigned long flags2; 2570 + 2571 + spin_lock_irqsave(&iwqp->lock, flags2); 2572 + while (IRDMA_RING_MORE_WORK(*sq_ring)) { 2573 + cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC); 2574 + if (!cmpl) { 2575 + spin_unlock_irqrestore(&iwqp->lock, flags2); 2576 + spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); 2577 + return; 2578 + } 2579 + 2580 + wqe_idx = sq_ring->tail; 2581 + irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); 2582 + 2583 + cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; 2584 + sw_wqe = qp->sq_base[wqe_idx].elem; 2585 + get_64bit_val(sw_wqe, 24, &wqe_qword); 2586 + cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE); 2587 + /* remove the SQ WR by moving SQ tail*/ 2588 + IRDMA_RING_SET_TAIL(*sq_ring, 2589 + sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); 2590 + 2591 + ibdev_dbg(iwqp->iwscq->ibcq.device, 2592 + "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n", 2593 + __func__, cmpl->cpi.wr_id, qp->qp_id); 2594 + list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated); 2595 + compl_generated = true; 2596 + } 2597 + spin_unlock_irqrestore(&iwqp->lock, flags2); 2598 + spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); 2599 + if (compl_generated) 2600 + irdma_comp_handler(iwqp->iwrcq); 2601 + } else { 2602 + spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); 2603 + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, 2604 + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); 2605 + } 2606 + 2607 + spin_lock_irqsave(&iwqp->iwrcq->lock, flags1); 2608 + if (irdma_cq_empty(iwqp->iwrcq)) { 2609 + unsigned long flags2; 2610 + 2611 + spin_lock_irqsave(&iwqp->lock, flags2); 2612 + while (IRDMA_RING_MORE_WORK(*rq_ring)) { 2613 + cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC); 2614 + if (!cmpl) { 2615 + spin_unlock_irqrestore(&iwqp->lock, flags2); 2616 + spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); 2617 + return; 2618 + } 2619 + 2620 + wqe_idx = rq_ring->tail; 2621 + irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); 2622 + 2623 + cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx]; 2624 + cmpl->cpi.op_type = IRDMA_OP_TYPE_REC; 2625 + /* remove the RQ WR by moving RQ tail */ 2626 + IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); 2627 + ibdev_dbg(iwqp->iwrcq->ibcq.device, 2628 + "DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n", 2629 + __func__, cmpl->cpi.wr_id, qp->qp_id, 2630 + wqe_idx); 2631 + list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated); 2632 + 2633 + compl_generated = true; 2634 + } 2635 + spin_unlock_irqrestore(&iwqp->lock, flags2); 2636 + spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); 2637 + if (compl_generated) 2638 + irdma_comp_handler(iwqp->iwrcq); 2639 + } else { 2640 + spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); 2641 + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, 2642 + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); 2643 + } 2644 + }
+36 -24
drivers/infiniband/hw/irdma/verbs.c
··· 25 25 iwdev->netdev->dev_addr); 26 26 props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 | 27 27 irdma_fw_minor_ver(&rf->sc_dev); 28 - props->device_cap_flags = iwdev->device_cap_flags; 28 + props->device_cap_flags = IB_DEVICE_MEM_WINDOW | 29 + IB_DEVICE_MEM_MGT_EXTENSIONS; 30 + props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; 29 31 props->vendor_id = pcidev->vendor; 30 32 props->vendor_part_id = pcidev->device; 31 33 ··· 535 533 if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) 536 534 irdma_modify_qp_to_err(&iwqp->sc_qp); 537 535 536 + if (!iwqp->user_mode) 537 + cancel_delayed_work_sync(&iwqp->dwork_flush); 538 + 538 539 irdma_qp_rem_ref(&iwqp->ibqp); 539 540 wait_for_completion(&iwqp->free_qp); 540 541 irdma_free_lsmm_rsrc(iwqp); ··· 793 788 return 0; 794 789 } 795 790 791 + static void irdma_flush_worker(struct work_struct *work) 792 + { 793 + struct delayed_work *dwork = to_delayed_work(work); 794 + struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush); 795 + 796 + irdma_generate_flush_completions(iwqp); 797 + } 798 + 796 799 /** 797 800 * irdma_create_qp - create qp 798 801 * @ibqp: ptr of qp ··· 920 907 init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; 921 908 irdma_setup_virt_qp(iwdev, iwqp, &init_info); 922 909 } else { 910 + INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); 923 911 init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; 924 912 err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); 925 913 } ··· 1412 1398 } 1413 1399 if (iwqp->ibqp_state > IB_QPS_RTS && 1414 1400 !iwqp->flush_issued) { 1415 - iwqp->flush_issued = 1; 1416 1401 spin_unlock_irqrestore(&iwqp->lock, flags); 1417 1402 irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | 1418 1403 IRDMA_FLUSH_RQ | 1419 1404 IRDMA_FLUSH_WAIT); 1405 + iwqp->flush_issued = 1; 1420 1406 } else { 1421 1407 spin_unlock_irqrestore(&iwqp->lock, flags); 1422 1408 } ··· 1769 1755 unsigned long flags; 1770 1756 1771 1757 spin_lock_irqsave(&iwcq->lock, flags); 1758 + if (!list_empty(&iwcq->cmpl_generated)) 1759 + irdma_remove_cmpls_list(iwcq); 1772 1760 if (!list_empty(&iwcq->resize_list)) 1773 1761 irdma_process_resize_list(iwcq, iwdev, NULL); 1774 1762 spin_unlock_irqrestore(&iwcq->lock, flags); ··· 1975 1959 cq->back_cq = iwcq; 1976 1960 spin_lock_init(&iwcq->lock); 1977 1961 INIT_LIST_HEAD(&iwcq->resize_list); 1962 + INIT_LIST_HEAD(&iwcq->cmpl_generated); 1978 1963 info.dev = dev; 1979 1964 ukinfo->cq_size = max(entries, 4); 1980 1965 ukinfo->cq_id = cq_num; ··· 3061 3044 unsigned long flags; 3062 3045 bool inv_stag; 3063 3046 struct irdma_ah *ah; 3064 - bool reflush = false; 3065 3047 3066 3048 iwqp = to_iwqp(ibqp); 3067 3049 ukqp = &iwqp->sc_qp.qp_uk; 3068 3050 dev = &iwqp->iwdev->rf->sc_dev; 3069 3051 3070 3052 spin_lock_irqsave(&iwqp->lock, flags); 3071 - if (iwqp->flush_issued && ukqp->sq_flush_complete) 3072 - reflush = true; 3073 3053 while (ib_wr) { 3074 3054 memset(&info, 0, sizeof(info)); 3075 3055 inv_stag = false; ··· 3216 3202 ib_wr = ib_wr->next; 3217 3203 } 3218 3204 3219 - if (!iwqp->flush_issued && iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) { 3220 - irdma_uk_qp_post_wr(ukqp); 3205 + if (!iwqp->flush_issued) { 3206 + if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) 3207 + irdma_uk_qp_post_wr(ukqp); 3221 3208 spin_unlock_irqrestore(&iwqp->lock, flags); 3222 - } else if (reflush) { 3223 - ukqp->sq_flush_complete = false; 3224 - spin_unlock_irqrestore(&iwqp->lock, flags); 3225 - irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_REFLUSH); 3226 3209 } else { 3227 3210 spin_unlock_irqrestore(&iwqp->lock, flags); 3211 + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, 3212 + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); 3228 3213 } 3229 3214 if (err) 3230 3215 *bad_wr = ib_wr; ··· 3246 3233 struct irdma_post_rq_info post_recv = {}; 3247 3234 unsigned long flags; 3248 3235 int err = 0; 3249 - bool reflush = false; 3250 3236 3251 3237 iwqp = to_iwqp(ibqp); 3252 3238 ukqp = &iwqp->sc_qp.qp_uk; 3253 3239 3254 3240 spin_lock_irqsave(&iwqp->lock, flags); 3255 - if (iwqp->flush_issued && ukqp->rq_flush_complete) 3256 - reflush = true; 3257 3241 while (ib_wr) { 3258 3242 post_recv.num_sges = ib_wr->num_sge; 3259 3243 post_recv.wr_id = ib_wr->wr_id; ··· 3266 3256 } 3267 3257 3268 3258 out: 3269 - if (reflush) { 3270 - ukqp->rq_flush_complete = false; 3271 - spin_unlock_irqrestore(&iwqp->lock, flags); 3272 - irdma_flush_wqes(iwqp, IRDMA_FLUSH_RQ | IRDMA_REFLUSH); 3273 - } else { 3274 - spin_unlock_irqrestore(&iwqp->lock, flags); 3275 - } 3259 + spin_unlock_irqrestore(&iwqp->lock, flags); 3260 + if (iwqp->flush_issued) 3261 + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, 3262 + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); 3276 3263 3277 3264 if (err) 3278 3265 *bad_wr = ib_wr; ··· 3481 3474 /* check the current CQ for new cqes */ 3482 3475 while (npolled < num_entries) { 3483 3476 ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled); 3477 + if (ret == -ENOENT) { 3478 + ret = irdma_generated_cmpls(iwcq, cur_cqe); 3479 + if (!ret) 3480 + irdma_process_cqe(entry + npolled, cur_cqe); 3481 + } 3484 3482 if (!ret) { 3485 3483 ++npolled; 3486 3484 cq_new_cqe = true; ··· 3567 3555 if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED) 3568 3556 promo_event = true; 3569 3557 3570 - if (!iwcq->armed || promo_event) { 3571 - iwcq->armed = true; 3558 + if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) { 3572 3559 iwcq->last_notify = cq_notify; 3573 3560 irdma_uk_cq_request_notification(ukcq, cq_notify); 3574 3561 } 3575 3562 3576 - if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq)) 3563 + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && 3564 + (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated))) 3577 3565 ret = 1; 3578 3566 spin_unlock_irqrestore(&iwcq->lock, flags); 3579 3567
+12 -1
drivers/infiniband/hw/irdma/verbs.h
··· 4 4 #define IRDMA_VERBS_H 5 5 6 6 #define IRDMA_MAX_SAVED_PHY_PGADDR 4 7 + #define IRDMA_FLUSH_DELAY_MS 20 7 8 8 9 #define IRDMA_PKEY_TBL_SZ 1 9 10 #define IRDMA_DEFAULT_PKEY 0xFFFF ··· 116 115 u16 cq_size; 117 116 u16 cq_num; 118 117 bool user_mode; 119 - bool armed; 118 + atomic_t armed; 120 119 enum irdma_cmpl_notify last_notify; 121 120 u32 polled_cmpls; 122 121 u32 cq_mem_size; ··· 127 126 struct irdma_pbl *iwpbl_shadow; 128 127 struct list_head resize_list; 129 128 struct irdma_cq_poll_info cur_cqe; 129 + struct list_head cmpl_generated; 130 + }; 131 + 132 + struct irdma_cmpl_gen { 133 + struct list_head list; 134 + struct irdma_cq_poll_info cpi; 130 135 }; 131 136 132 137 struct disconn_work { ··· 173 166 refcount_t refcnt; 174 167 struct iw_cm_id *cm_id; 175 168 struct irdma_cm_node *cm_node; 169 + struct delayed_work dwork_flush; 176 170 struct ib_mr *lsmm_mr; 177 171 atomic_t hw_mod_qp_pend; 178 172 enum ib_qp_state ibqp_state; ··· 237 229 void irdma_ib_unregister_device(struct irdma_device *iwdev); 238 230 void irdma_ib_dealloc_device(struct ib_device *ibdev); 239 231 void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event); 232 + void irdma_generate_flush_completions(struct irdma_qp *iwqp); 233 + void irdma_remove_cmpls_list(struct irdma_cq *iwcq); 234 + int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info); 240 235 #endif /* IRDMA_VERBS_H */
+22 -7
drivers/infiniband/hw/mlx4/cm.c
··· 80 80 union ib_gid primary_path_sgid; 81 81 }; 82 82 83 + static struct workqueue_struct *cm_wq; 83 84 84 85 static void set_local_comm_id(struct ib_mad *mad, u32 cm_id) 85 86 { ··· 289 288 /*make sure that there is no schedule inside the scheduled work.*/ 290 289 if (!sriov->is_going_down && !id->scheduled_delete) { 291 290 id->scheduled_delete = 1; 292 - schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); 291 + queue_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); 293 292 } else if (id->scheduled_delete) { 294 293 /* Adjust timeout if already scheduled */ 295 - mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); 294 + mod_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); 296 295 } 297 296 spin_unlock_irqrestore(&sriov->going_down_lock, flags); 298 297 spin_unlock(&sriov->id_map_lock); ··· 371 370 ret = xa_err(item); 372 371 else 373 372 /* If a retry, adjust delayed work */ 374 - mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); 373 + mod_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); 375 374 goto err_or_exists; 376 375 } 377 376 xa_unlock(&sriov->xa_rej_tmout); ··· 394 393 return xa_err(old); 395 394 } 396 395 397 - schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT); 396 + queue_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); 398 397 399 398 return 0; 400 399 ··· 501 500 xa_lock(&sriov->xa_rej_tmout); 502 501 xa_for_each(&sriov->xa_rej_tmout, id, item) { 503 502 if (slave < 0 || slave == item->slave) { 504 - mod_delayed_work(system_wq, &item->timeout, 0); 503 + mod_delayed_work(cm_wq, &item->timeout, 0); 505 504 flush_needed = true; 506 505 ++cnt; 507 506 } ··· 509 508 xa_unlock(&sriov->xa_rej_tmout); 510 509 511 510 if (flush_needed) { 512 - flush_scheduled_work(); 511 + flush_workqueue(cm_wq); 513 512 pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n", 514 513 cnt, slave); 515 514 } ··· 541 540 spin_unlock(&sriov->id_map_lock); 542 541 543 542 if (need_flush) 544 - flush_scheduled_work(); /* make sure all timers were flushed */ 543 + flush_workqueue(cm_wq); /* make sure all timers were flushed */ 545 544 546 545 /* now, remove all leftover entries from databases*/ 547 546 spin_lock(&sriov->id_map_lock); ··· 587 586 } 588 587 589 588 rej_tmout_xa_cleanup(sriov, slave); 589 + } 590 + 591 + int mlx4_ib_cm_init(void) 592 + { 593 + cm_wq = alloc_workqueue("mlx4_ib_cm", 0, 0); 594 + if (!cm_wq) 595 + return -ENOMEM; 596 + 597 + return 0; 598 + } 599 + 600 + void mlx4_ib_cm_destroy(void) 601 + { 602 + destroy_workqueue(cm_wq); 590 603 }
+13 -5
drivers/infiniband/hw/mlx4/main.c
··· 479 479 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | 480 480 IB_DEVICE_PORT_ACTIVE_EVENT | 481 481 IB_DEVICE_SYS_IMAGE_GUID | 482 - IB_DEVICE_RC_RNR_NAK_GEN | 483 - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 482 + IB_DEVICE_RC_RNR_NAK_GEN; 483 + props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK; 484 484 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) 485 485 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; 486 486 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) ··· 494 494 if (dev->dev->caps.max_gso_sz && 495 495 (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) && 496 496 (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)) 497 - props->device_cap_flags |= IB_DEVICE_UD_TSO; 497 + props->kernel_cap_flags |= IBK_UD_TSO; 498 498 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) 499 - props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; 499 + props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY; 500 500 if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && 501 501 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && 502 502 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) ··· 3307 3307 if (!wq) 3308 3308 return -ENOMEM; 3309 3309 3310 - err = mlx4_ib_mcg_init(); 3310 + err = mlx4_ib_cm_init(); 3311 3311 if (err) 3312 3312 goto clean_wq; 3313 + 3314 + err = mlx4_ib_mcg_init(); 3315 + if (err) 3316 + goto clean_cm; 3313 3317 3314 3318 err = mlx4_register_interface(&mlx4_ib_interface); 3315 3319 if (err) ··· 3324 3320 clean_mcg: 3325 3321 mlx4_ib_mcg_destroy(); 3326 3322 3323 + clean_cm: 3324 + mlx4_ib_cm_destroy(); 3325 + 3327 3326 clean_wq: 3328 3327 destroy_workqueue(wq); 3329 3328 return err; ··· 3336 3329 { 3337 3330 mlx4_unregister_interface(&mlx4_ib_interface); 3338 3331 mlx4_ib_mcg_destroy(); 3332 + mlx4_ib_cm_destroy(); 3339 3333 destroy_workqueue(wq); 3340 3334 } 3341 3335
+3
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 937 937 int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, 938 938 int *num_of_mtts); 939 939 940 + int mlx4_ib_cm_init(void); 941 + void mlx4_ib_cm_destroy(void); 942 + 940 943 #endif /* MLX4_IB_H */
+1
drivers/infiniband/hw/mlx5/Makefile
··· 19 19 restrack.o \ 20 20 srq.o \ 21 21 srq_cmd.o \ 22 + umr.o \ 22 23 wr.o 23 24 24 25 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
-5
drivers/infiniband/hw/mlx5/fs.c
··· 1095 1095 1096 1096 spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); 1097 1097 1098 - if (is_egress) { 1099 - err = -EINVAL; 1100 - goto free; 1101 - } 1102 - 1103 1098 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1104 1099 struct mlx5_ib_mcounters *mcounters; 1105 1100
+12 -112
drivers/infiniband/hw/mlx5/main.c
··· 41 41 #include "wr.h" 42 42 #include "restrack.h" 43 43 #include "counters.h" 44 + #include "umr.h" 44 45 #include <rdma/uverbs_std_types.h> 45 46 #include <rdma/uverbs_ioctl.h> 46 47 #include <rdma/mlx5_user_ioctl_verbs.h> ··· 855 854 IB_DEVICE_MEM_WINDOW_TYPE_2B; 856 855 props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 857 856 /* We support 'Gappy' memory registration too */ 858 - props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; 857 + props->kernel_cap_flags |= IBK_SG_GAPS_REG; 859 858 } 860 859 /* IB_WR_REG_MR always requires changing the entity size with UMR */ 861 860 if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) 862 861 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 863 862 if (MLX5_CAP_GEN(mdev, sho)) { 864 - props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; 863 + props->kernel_cap_flags |= IBK_INTEGRITY_HANDOVER; 865 864 /* At this stage no support for signature handover */ 866 865 props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | 867 866 IB_PROT_T10DIF_TYPE_2 | ··· 870 869 IB_GUARD_T10DIF_CSUM; 871 870 } 872 871 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 873 - props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 872 + props->kernel_cap_flags |= IBK_BLOCK_MULTICAST_LOOPBACK; 874 873 875 874 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) { 876 875 if (MLX5_CAP_ETH(mdev, csum_cap)) { ··· 917 916 918 917 if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { 919 918 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 920 - props->device_cap_flags |= IB_DEVICE_UD_TSO; 919 + props->kernel_cap_flags |= IBK_UD_TSO; 921 920 } 922 921 923 922 if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && ··· 993 992 994 993 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 995 994 if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) 996 - props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; 995 + props->kernel_cap_flags |= IBK_ON_DEMAND_PAGING; 997 996 props->odp_caps = dev->odp_caps; 998 997 if (!uhw) { 999 998 /* ODP for kernel QPs is not implemented for receive ··· 1014 1013 } 1015 1014 } 1016 1015 1017 - if (MLX5_CAP_GEN(mdev, cd)) 1018 - props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; 1019 - 1020 1016 if (mlx5_core_is_vf(mdev)) 1021 - props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; 1017 + props->kernel_cap_flags |= IBK_VIRTUAL_FUNCTION; 1022 1018 1023 1019 if (mlx5_ib_port_link_layer(ibdev, 1) == 1024 1020 IB_LINK_LAYER_ETHERNET && raw_support) { ··· 4006 4008 if (err) 4007 4009 mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 4008 4010 4009 - if (dev->umrc.qp) 4010 - ib_destroy_qp(dev->umrc.qp); 4011 - if (dev->umrc.cq) 4012 - ib_free_cq(dev->umrc.cq); 4013 - if (dev->umrc.pd) 4014 - ib_dealloc_pd(dev->umrc.pd); 4011 + mlx5r_umr_resource_cleanup(dev); 4015 4012 } 4016 4013 4017 4014 static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) ··· 4014 4021 ib_unregister_device(&dev->ib_dev); 4015 4022 } 4016 4023 4017 - enum { 4018 - MAX_UMR_WR = 128, 4019 - }; 4020 - 4021 4024 static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) 4022 4025 { 4023 - struct ib_qp_init_attr *init_attr = NULL; 4024 - struct ib_qp_attr *attr = NULL; 4025 - struct ib_pd *pd; 4026 - struct ib_cq *cq; 4027 - struct ib_qp *qp; 4028 4026 int ret; 4029 4027 4030 - attr = kzalloc(sizeof(*attr), GFP_KERNEL); 4031 - init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); 4032 - if (!attr || !init_attr) { 4033 - ret = -ENOMEM; 4034 - goto error_0; 4035 - } 4028 + ret = mlx5r_umr_resource_init(dev); 4029 + if (ret) 4030 + return ret; 4036 4031 4037 - pd = ib_alloc_pd(&dev->ib_dev, 0); 4038 - if (IS_ERR(pd)) { 4039 - mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 4040 - ret = PTR_ERR(pd); 4041 - goto error_0; 4042 - } 4043 - 4044 - cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); 4045 - if (IS_ERR(cq)) { 4046 - mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 4047 - ret = PTR_ERR(cq); 4048 - goto error_2; 4049 - } 4050 - 4051 - init_attr->send_cq = cq; 4052 - init_attr->recv_cq = cq; 4053 - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 4054 - init_attr->cap.max_send_wr = MAX_UMR_WR; 4055 - init_attr->cap.max_send_sge = 1; 4056 - init_attr->qp_type = MLX5_IB_QPT_REG_UMR; 4057 - init_attr->port_num = 1; 4058 - qp = ib_create_qp(pd, init_attr); 4059 - if (IS_ERR(qp)) { 4060 - mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); 4061 - ret = PTR_ERR(qp); 4062 - goto error_3; 4063 - } 4064 - 4065 - attr->qp_state = IB_QPS_INIT; 4066 - attr->port_num = 1; 4067 - ret = ib_modify_qp(qp, attr, 4068 - IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT); 4069 - if (ret) { 4070 - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); 4071 - goto error_4; 4072 - } 4073 - 4074 - memset(attr, 0, sizeof(*attr)); 4075 - attr->qp_state = IB_QPS_RTR; 4076 - attr->path_mtu = IB_MTU_256; 4077 - 4078 - ret = ib_modify_qp(qp, attr, IB_QP_STATE); 4079 - if (ret) { 4080 - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); 4081 - goto error_4; 4082 - } 4083 - 4084 - memset(attr, 0, sizeof(*attr)); 4085 - attr->qp_state = IB_QPS_RTS; 4086 - ret = ib_modify_qp(qp, attr, IB_QP_STATE); 4087 - if (ret) { 4088 - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); 4089 - goto error_4; 4090 - } 4091 - 4092 - dev->umrc.qp = qp; 4093 - dev->umrc.cq = cq; 4094 - dev->umrc.pd = pd; 4095 - 4096 - sema_init(&dev->umrc.sem, MAX_UMR_WR); 4097 4032 ret = mlx5_mr_cache_init(dev); 4098 4033 if (ret) { 4099 4034 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); 4100 - goto error_4; 4035 + mlx5r_umr_resource_cleanup(dev); 4101 4036 } 4102 - 4103 - kfree(attr); 4104 - kfree(init_attr); 4105 - 4106 - return 0; 4107 - 4108 - error_4: 4109 - ib_destroy_qp(qp); 4110 - dev->umrc.qp = NULL; 4111 - 4112 - error_3: 4113 - ib_free_cq(cq); 4114 - dev->umrc.cq = NULL; 4115 - 4116 - error_2: 4117 - ib_dealloc_pd(pd); 4118 - dev->umrc.pd = NULL; 4119 - 4120 - error_0: 4121 - kfree(attr); 4122 - kfree(init_attr); 4123 4037 return ret; 4124 4038 } 4125 4039
+1 -88
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 291 291 }; 292 292 293 293 /* Use macros here so that don't have to duplicate 294 - * enum ib_send_flags and enum ib_qp_type for low-level driver 294 + * enum ib_qp_type for low-level driver 295 295 */ 296 - 297 - #define MLX5_IB_SEND_UMR_ENABLE_MR (IB_SEND_RESERVED_START << 0) 298 - #define MLX5_IB_SEND_UMR_DISABLE_MR (IB_SEND_RESERVED_START << 1) 299 - #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 2) 300 - #define MLX5_IB_SEND_UMR_UPDATE_XLT (IB_SEND_RESERVED_START << 3) 301 - #define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 4) 302 - #define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS IB_SEND_RESERVED_END 303 296 304 297 #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 305 298 /* ··· 303 310 #define MLX5_IB_QPT_DCI IB_QPT_RESERVED3 304 311 #define MLX5_IB_QPT_DCT IB_QPT_RESERVED4 305 312 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 306 - 307 - #define MLX5_IB_UMR_OCTOWORD 16 308 - #define MLX5_IB_UMR_XLT_ALIGNMENT 64 309 313 310 314 #define MLX5_IB_UPD_XLT_ZAP BIT(0) 311 315 #define MLX5_IB_UPD_XLT_ENABLE BIT(1) ··· 528 538 int cqe_size; 529 539 int nent; 530 540 }; 531 - 532 - struct mlx5_umr_wr { 533 - struct ib_send_wr wr; 534 - u64 virt_addr; 535 - u64 offset; 536 - struct ib_pd *pd; 537 - unsigned int page_shift; 538 - unsigned int xlt_size; 539 - u64 length; 540 - int access_flags; 541 - u32 mkey; 542 - u8 ignore_free_state:1; 543 - }; 544 - 545 - static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr) 546 - { 547 - return container_of(wr, struct mlx5_umr_wr, wr); 548 - } 549 541 550 542 enum mlx5_ib_cq_pr_flags { 551 543 MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0, ··· 1263 1291 struct uverbs_attr_bundle *attrs); 1264 1292 int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); 1265 1293 int mlx5_ib_dealloc_mw(struct ib_mw *mw); 1266 - int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 1267 - int page_shift, int flags); 1268 - int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); 1269 1294 struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, 1270 1295 int access_flags); 1271 1296 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); ··· 1441 1472 return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI; 1442 1473 } 1443 1474 1444 - #define MLX5_MAX_UMR_SHIFT 16 1445 - #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) 1446 - 1447 1475 static inline u32 check_cq_create_flags(u32 flags) 1448 1476 { 1449 1477 /* ··· 1511 1545 int bfregn_to_uar_index(struct mlx5_ib_dev *dev, 1512 1546 struct mlx5_bfreg_info *bfregi, u32 bfregn, 1513 1547 bool dyn_bfreg); 1514 - 1515 - static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev, 1516 - size_t length) 1517 - { 1518 - /* 1519 - * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is 1520 - * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka 1521 - * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey 1522 - * can never be enabled without this capability. Simplify this weird 1523 - * quirky hardware by just saying it can't use PAS lists with UMR at 1524 - * all. 1525 - */ 1526 - if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) 1527 - return false; 1528 - 1529 - /* 1530 - * length is the size of the MR in bytes when mlx5_ib_update_xlt() is 1531 - * used. 1532 - */ 1533 - if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && 1534 - length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE) 1535 - return false; 1536 - return true; 1537 - } 1538 - 1539 - /* 1540 - * true if an existing MR can be reconfigured to new access_flags using UMR. 1541 - * Older HW cannot use UMR to update certain elements of the MKC. See 1542 - * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask() 1543 - */ 1544 - static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, 1545 - unsigned int current_access_flags, 1546 - unsigned int target_access_flags) 1547 - { 1548 - unsigned int diffs = current_access_flags ^ target_access_flags; 1549 - 1550 - if ((diffs & IB_ACCESS_REMOTE_ATOMIC) && 1551 - MLX5_CAP_GEN(dev->mdev, atomic) && 1552 - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 1553 - return false; 1554 - 1555 - if ((diffs & IB_ACCESS_RELAXED_ORDERING) && 1556 - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && 1557 - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 1558 - return false; 1559 - 1560 - if ((diffs & IB_ACCESS_RELAXED_ORDERING) && 1561 - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && 1562 - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 1563 - return false; 1564 - 1565 - return true; 1566 - } 1567 1548 1568 1549 static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, 1569 1550 struct mlx5_ib_mkey *mmkey)
+18 -403
drivers/infiniband/hw/mlx5/mr.c
··· 44 44 #include <rdma/ib_verbs.h> 45 45 #include "dm.h" 46 46 #include "mlx5_ib.h" 47 - 48 - /* 49 - * We can't use an array for xlt_emergency_page because dma_map_single doesn't 50 - * work on kernel modules memory 51 - */ 52 - void *xlt_emergency_page; 53 - static DEFINE_MUTEX(xlt_emergency_page_mutex); 47 + #include "umr.h" 54 48 55 49 enum { 56 50 MAX_PENDING_REG_MR = 8, ··· 121 127 122 128 static int mr_cache_max_order(struct mlx5_ib_dev *dev); 123 129 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); 124 - 125 - static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) 126 - { 127 - return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); 128 - } 129 130 130 131 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 131 132 { ··· 589 600 struct mlx5_ib_mr *mr; 590 601 591 602 /* Matches access in alloc_cache_mr() */ 592 - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) 603 + if (!mlx5r_umr_can_reconfig(dev, 0, access_flags)) 593 604 return ERR_PTR(-EOPNOTSUPP); 594 605 595 606 spin_lock_irq(&ent->lock); ··· 730 741 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; 731 742 if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && 732 743 !dev->is_rep && mlx5_core_is_pf(dev->mdev) && 733 - mlx5_ib_can_load_pas_with_umr(dev, 0)) 744 + mlx5r_umr_can_load_pas(dev, 0)) 734 745 ent->limit = dev->mdev->profile.mr_cache[i].limit; 735 746 else 736 747 ent->limit = 0; ··· 837 848 return MLX5_MAX_UMR_SHIFT; 838 849 } 839 850 840 - static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) 841 - { 842 - struct mlx5_ib_umr_context *context = 843 - container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 844 - 845 - context->status = wc->status; 846 - complete(&context->done); 847 - } 848 - 849 - static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) 850 - { 851 - context->cqe.done = mlx5_ib_umr_done; 852 - context->status = -1; 853 - init_completion(&context->done); 854 - } 855 - 856 - static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, 857 - struct mlx5_umr_wr *umrwr) 858 - { 859 - struct umr_common *umrc = &dev->umrc; 860 - const struct ib_send_wr *bad; 861 - int err; 862 - struct mlx5_ib_umr_context umr_context; 863 - 864 - mlx5_ib_init_umr_context(&umr_context); 865 - umrwr->wr.wr_cqe = &umr_context.cqe; 866 - 867 - down(&umrc->sem); 868 - err = ib_post_send(umrc->qp, &umrwr->wr, &bad); 869 - if (err) { 870 - mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); 871 - } else { 872 - wait_for_completion(&umr_context.done); 873 - if (umr_context.status != IB_WC_SUCCESS) { 874 - mlx5_ib_warn(dev, "reg umr failed (%u)\n", 875 - umr_context.status); 876 - err = -EFAULT; 877 - } 878 - } 879 - up(&umrc->sem); 880 - return err; 881 - } 882 - 883 851 static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, 884 852 unsigned int order) 885 853 { ··· 895 949 * cache then synchronously create an uncached one. 896 950 */ 897 951 if (!ent || ent->limit == 0 || 898 - !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { 952 + !mlx5r_umr_can_reconfig(dev, 0, access_flags)) { 899 953 mutex_lock(&dev->slow_path_mutex); 900 954 mr = reg_create(pd, umem, iova, access_flags, page_size, false); 901 955 mutex_unlock(&dev->slow_path_mutex); ··· 912 966 set_mr_fields(dev, mr, umem->length, access_flags, iova); 913 967 914 968 return mr; 915 - } 916 - 917 - #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ 918 - MLX5_UMR_MTT_ALIGNMENT) 919 - #define MLX5_SPARE_UMR_CHUNK 0x10000 920 - 921 - /* 922 - * Allocate a temporary buffer to hold the per-page information to transfer to 923 - * HW. For efficiency this should be as large as it can be, but buffer 924 - * allocation failure is not allowed, so try smaller sizes. 925 - */ 926 - static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) 927 - { 928 - const size_t xlt_chunk_align = 929 - MLX5_UMR_MTT_ALIGNMENT / ent_size; 930 - size_t size; 931 - void *res = NULL; 932 - 933 - static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); 934 - 935 - /* 936 - * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the 937 - * allocation can't trigger any kind of reclaim. 938 - */ 939 - might_sleep(); 940 - 941 - gfp_mask |= __GFP_ZERO | __GFP_NORETRY; 942 - 943 - /* 944 - * If the system already has a suitable high order page then just use 945 - * that, but don't try hard to create one. This max is about 1M, so a 946 - * free x86 huge page will satisfy it. 947 - */ 948 - size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), 949 - MLX5_MAX_UMR_CHUNK); 950 - *nents = size / ent_size; 951 - res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 952 - get_order(size)); 953 - if (res) 954 - return res; 955 - 956 - if (size > MLX5_SPARE_UMR_CHUNK) { 957 - size = MLX5_SPARE_UMR_CHUNK; 958 - *nents = size / ent_size; 959 - res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 960 - get_order(size)); 961 - if (res) 962 - return res; 963 - } 964 - 965 - *nents = PAGE_SIZE / ent_size; 966 - res = (void *)__get_free_page(gfp_mask); 967 - if (res) 968 - return res; 969 - 970 - mutex_lock(&xlt_emergency_page_mutex); 971 - memset(xlt_emergency_page, 0, PAGE_SIZE); 972 - return xlt_emergency_page; 973 - } 974 - 975 - static void mlx5_ib_free_xlt(void *xlt, size_t length) 976 - { 977 - if (xlt == xlt_emergency_page) { 978 - mutex_unlock(&xlt_emergency_page_mutex); 979 - return; 980 - } 981 - 982 - free_pages((unsigned long)xlt, get_order(length)); 983 - } 984 - 985 - /* 986 - * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for 987 - * submission. 988 - */ 989 - static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, 990 - struct mlx5_umr_wr *wr, struct ib_sge *sg, 991 - size_t nents, size_t ent_size, 992 - unsigned int flags) 993 - { 994 - struct mlx5_ib_dev *dev = mr_to_mdev(mr); 995 - struct device *ddev = &dev->mdev->pdev->dev; 996 - dma_addr_t dma; 997 - void *xlt; 998 - 999 - xlt = mlx5_ib_alloc_xlt(&nents, ent_size, 1000 - flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : 1001 - GFP_KERNEL); 1002 - sg->length = nents * ent_size; 1003 - dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); 1004 - if (dma_mapping_error(ddev, dma)) { 1005 - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 1006 - mlx5_ib_free_xlt(xlt, sg->length); 1007 - return NULL; 1008 - } 1009 - sg->addr = dma; 1010 - sg->lkey = dev->umrc.pd->local_dma_lkey; 1011 - 1012 - memset(wr, 0, sizeof(*wr)); 1013 - wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; 1014 - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 1015 - wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1016 - wr->wr.sg_list = sg; 1017 - wr->wr.num_sge = 1; 1018 - wr->wr.opcode = MLX5_IB_WR_UMR; 1019 - wr->pd = mr->ibmr.pd; 1020 - wr->mkey = mr->mmkey.key; 1021 - wr->length = mr->ibmr.length; 1022 - wr->virt_addr = mr->ibmr.iova; 1023 - wr->access_flags = mr->access_flags; 1024 - wr->page_shift = mr->page_shift; 1025 - wr->xlt_size = sg->length; 1026 - return xlt; 1027 - } 1028 - 1029 - static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, 1030 - struct ib_sge *sg) 1031 - { 1032 - struct device *ddev = &dev->mdev->pdev->dev; 1033 - 1034 - dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); 1035 - mlx5_ib_free_xlt(xlt, sg->length); 1036 - } 1037 - 1038 - static unsigned int xlt_wr_final_send_flags(unsigned int flags) 1039 - { 1040 - unsigned int res = 0; 1041 - 1042 - if (flags & MLX5_IB_UPD_XLT_ENABLE) 1043 - res |= MLX5_IB_SEND_UMR_ENABLE_MR | 1044 - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | 1045 - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1046 - if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) 1047 - res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1048 - if (flags & MLX5_IB_UPD_XLT_ADDR) 1049 - res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1050 - return res; 1051 - } 1052 - 1053 - int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 1054 - int page_shift, int flags) 1055 - { 1056 - struct mlx5_ib_dev *dev = mr_to_mdev(mr); 1057 - struct device *ddev = &dev->mdev->pdev->dev; 1058 - void *xlt; 1059 - struct mlx5_umr_wr wr; 1060 - struct ib_sge sg; 1061 - int err = 0; 1062 - int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) 1063 - ? sizeof(struct mlx5_klm) 1064 - : sizeof(struct mlx5_mtt); 1065 - const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; 1066 - const int page_mask = page_align - 1; 1067 - size_t pages_mapped = 0; 1068 - size_t pages_to_map = 0; 1069 - size_t pages_iter; 1070 - size_t size_to_map = 0; 1071 - size_t orig_sg_length; 1072 - 1073 - if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && 1074 - !umr_can_use_indirect_mkey(dev)) 1075 - return -EPERM; 1076 - 1077 - if (WARN_ON(!mr->umem->is_odp)) 1078 - return -EINVAL; 1079 - 1080 - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 1081 - * so we need to align the offset and length accordingly 1082 - */ 1083 - if (idx & page_mask) { 1084 - npages += idx & page_mask; 1085 - idx &= ~page_mask; 1086 - } 1087 - pages_to_map = ALIGN(npages, page_align); 1088 - 1089 - xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); 1090 - if (!xlt) 1091 - return -ENOMEM; 1092 - pages_iter = sg.length / desc_size; 1093 - orig_sg_length = sg.length; 1094 - 1095 - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { 1096 - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 1097 - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; 1098 - 1099 - pages_to_map = min_t(size_t, pages_to_map, max_pages); 1100 - } 1101 - 1102 - wr.page_shift = page_shift; 1103 - 1104 - for (pages_mapped = 0; 1105 - pages_mapped < pages_to_map && !err; 1106 - pages_mapped += pages_iter, idx += pages_iter) { 1107 - npages = min_t(int, pages_iter, pages_to_map - pages_mapped); 1108 - size_to_map = npages * desc_size; 1109 - dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 1110 - DMA_TO_DEVICE); 1111 - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); 1112 - dma_sync_single_for_device(ddev, sg.addr, sg.length, 1113 - DMA_TO_DEVICE); 1114 - 1115 - sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); 1116 - 1117 - if (pages_mapped + pages_iter >= pages_to_map) 1118 - wr.wr.send_flags |= xlt_wr_final_send_flags(flags); 1119 - 1120 - wr.offset = idx * desc_size; 1121 - wr.xlt_size = sg.length; 1122 - 1123 - err = mlx5_ib_post_send_wait(dev, &wr); 1124 - } 1125 - sg.length = orig_sg_length; 1126 - mlx5_ib_unmap_free_xlt(dev, xlt, &sg); 1127 - return err; 1128 - } 1129 - 1130 - /* 1131 - * Send the DMA list to the HW for a normal MR using UMR. 1132 - * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP 1133 - * flag may be used. 1134 - */ 1135 - int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) 1136 - { 1137 - struct mlx5_ib_dev *dev = mr_to_mdev(mr); 1138 - struct device *ddev = &dev->mdev->pdev->dev; 1139 - struct ib_block_iter biter; 1140 - struct mlx5_mtt *cur_mtt; 1141 - struct mlx5_umr_wr wr; 1142 - size_t orig_sg_length; 1143 - struct mlx5_mtt *mtt; 1144 - size_t final_size; 1145 - struct ib_sge sg; 1146 - int err = 0; 1147 - 1148 - if (WARN_ON(mr->umem->is_odp)) 1149 - return -EINVAL; 1150 - 1151 - mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, 1152 - ib_umem_num_dma_blocks(mr->umem, 1153 - 1 << mr->page_shift), 1154 - sizeof(*mtt), flags); 1155 - if (!mtt) 1156 - return -ENOMEM; 1157 - orig_sg_length = sg.length; 1158 - 1159 - cur_mtt = mtt; 1160 - rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter, 1161 - mr->umem->sgt_append.sgt.nents, 1162 - BIT(mr->page_shift)) { 1163 - if (cur_mtt == (void *)mtt + sg.length) { 1164 - dma_sync_single_for_device(ddev, sg.addr, sg.length, 1165 - DMA_TO_DEVICE); 1166 - err = mlx5_ib_post_send_wait(dev, &wr); 1167 - if (err) 1168 - goto err; 1169 - dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 1170 - DMA_TO_DEVICE); 1171 - wr.offset += sg.length; 1172 - cur_mtt = mtt; 1173 - } 1174 - 1175 - cur_mtt->ptag = 1176 - cpu_to_be64(rdma_block_iter_dma_address(&biter) | 1177 - MLX5_IB_MTT_PRESENT); 1178 - 1179 - if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) 1180 - cur_mtt->ptag = 0; 1181 - 1182 - cur_mtt++; 1183 - } 1184 - 1185 - final_size = (void *)cur_mtt - (void *)mtt; 1186 - sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); 1187 - memset(cur_mtt, 0, sg.length - final_size); 1188 - wr.wr.send_flags |= xlt_wr_final_send_flags(flags); 1189 - wr.xlt_size = sg.length; 1190 - 1191 - dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); 1192 - err = mlx5_ib_post_send_wait(dev, &wr); 1193 - 1194 - err: 1195 - sg.length = orig_sg_length; 1196 - mlx5_ib_unmap_free_xlt(dev, mtt, &sg); 1197 - return err; 1198 969 } 1199 970 1200 971 /* ··· 1104 1441 bool xlt_with_umr; 1105 1442 int err; 1106 1443 1107 - xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); 1444 + xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length); 1108 1445 if (xlt_with_umr) { 1109 1446 mr = alloc_cacheable_mr(pd, umem, iova, access_flags); 1110 1447 } else { ··· 1130 1467 * configured properly but left disabled. It is safe to go ahead 1131 1468 * and configure it again via UMR while enabling it. 1132 1469 */ 1133 - err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); 1470 + err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); 1134 1471 if (err) { 1135 1472 mlx5_ib_dereg_mr(&mr->ibmr, NULL); 1136 1473 return ERR_PTR(err); ··· 1167 1504 } 1168 1505 1169 1506 /* ODP requires xlt update via umr to work. */ 1170 - if (!mlx5_ib_can_load_pas_with_umr(dev, length)) 1507 + if (!mlx5r_umr_can_load_pas(dev, length)) 1171 1508 return ERR_PTR(-EINVAL); 1172 1509 1173 1510 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, ··· 1229 1566 if (!umem_dmabuf->sgt) 1230 1567 return; 1231 1568 1232 - mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); 1569 + mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); 1233 1570 ib_umem_dmabuf_unmap_pages(umem_dmabuf); 1234 1571 } 1235 1572 ··· 1257 1594 offset, virt_addr, length, fd, access_flags); 1258 1595 1259 1596 /* dmabuf requires xlt update via umr to work. */ 1260 - if (!mlx5_ib_can_load_pas_with_umr(dev, length)) 1597 + if (!mlx5r_umr_can_load_pas(dev, length)) 1261 1598 return ERR_PTR(-EINVAL); 1262 1599 1263 1600 umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd, ··· 1294 1631 return ERR_PTR(err); 1295 1632 } 1296 1633 1297 - /** 1298 - * revoke_mr - Fence all DMA on the MR 1299 - * @mr: The MR to fence 1300 - * 1301 - * Upon return the NIC will not be doing any DMA to the pages under the MR, 1302 - * and any DMA in progress will be completed. Failure of this function 1303 - * indicates the HW has failed catastrophically. 1304 - */ 1305 - static int revoke_mr(struct mlx5_ib_mr *mr) 1306 - { 1307 - struct mlx5_umr_wr umrwr = {}; 1308 - 1309 - if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 1310 - return 0; 1311 - 1312 - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | 1313 - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1314 - umrwr.wr.opcode = MLX5_IB_WR_UMR; 1315 - umrwr.pd = mr_to_mdev(mr)->umrc.pd; 1316 - umrwr.mkey = mr->mmkey.key; 1317 - umrwr.ignore_free_state = 1; 1318 - 1319 - return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); 1320 - } 1321 - 1322 1634 /* 1323 1635 * True if the change in access flags can be done via UMR, only some access 1324 1636 * flags can be updated. ··· 1307 1669 if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | 1308 1670 IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) 1309 1671 return false; 1310 - return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, 1311 - target_access_flags); 1312 - } 1313 - 1314 - static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, 1315 - int access_flags) 1316 - { 1317 - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1318 - struct mlx5_umr_wr umrwr = { 1319 - .wr = { 1320 - .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | 1321 - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, 1322 - .opcode = MLX5_IB_WR_UMR, 1323 - }, 1324 - .mkey = mr->mmkey.key, 1325 - .pd = pd, 1326 - .access_flags = access_flags, 1327 - }; 1328 - int err; 1329 - 1330 - err = mlx5_ib_post_send_wait(dev, &umrwr); 1331 - if (err) 1332 - return err; 1333 - 1334 - mr->access_flags = access_flags; 1335 - return 0; 1672 + return mlx5r_umr_can_reconfig(dev, current_access_flags, 1673 + target_access_flags); 1336 1674 } 1337 1675 1338 1676 static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, ··· 1321 1707 /* We only track the allocated sizes of MRs from the cache */ 1322 1708 if (!mr->cache_ent) 1323 1709 return false; 1324 - if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) 1710 + if (!mlx5r_umr_can_load_pas(dev, new_umem->length)) 1325 1711 return false; 1326 1712 1327 1713 *page_size = ··· 1346 1732 * with it. This ensure the change is atomic relative to any use of the 1347 1733 * MR. 1348 1734 */ 1349 - err = revoke_mr(mr); 1735 + err = mlx5r_umr_revoke_mr(mr); 1350 1736 if (err) 1351 1737 return err; 1352 1738 ··· 1364 1750 mr->ibmr.length = new_umem->length; 1365 1751 mr->page_shift = order_base_2(page_size); 1366 1752 mr->umem = new_umem; 1367 - err = mlx5_ib_update_mr_pas(mr, upd_flags); 1753 + err = mlx5r_umr_update_mr_pas(mr, upd_flags); 1368 1754 if (err) { 1369 1755 /* 1370 1756 * The MR is revoked at this point so there is no issue to free ··· 1411 1797 /* Fast path for PD/access change */ 1412 1798 if (can_use_umr_rereg_access(dev, mr->access_flags, 1413 1799 new_access_flags)) { 1414 - err = umr_rereg_pd_access(mr, new_pd, new_access_flags); 1800 + err = mlx5r_umr_rereg_pd_access(mr, new_pd, 1801 + new_access_flags); 1415 1802 if (err) 1416 1803 return ERR_PTR(err); 1417 1804 return NULL; ··· 1425 1810 * Only one active MR can refer to a umem at one time, revoke 1426 1811 * the old MR before assigning the umem to the new one. 1427 1812 */ 1428 - err = revoke_mr(mr); 1813 + err = mlx5r_umr_revoke_mr(mr); 1429 1814 if (err) 1430 1815 return ERR_PTR(err); 1431 1816 umem = mr->umem; ··· 1570 1955 1571 1956 /* Stop DMA */ 1572 1957 if (mr->cache_ent) { 1573 - if (revoke_mr(mr)) { 1958 + if (mlx5r_umr_revoke_mr(mr)) { 1574 1959 spin_lock_irq(&mr->cache_ent->lock); 1575 1960 mr->cache_ent->total_mrs--; 1576 1961 spin_unlock_irq(&mr->cache_ent->lock);
+31 -33
drivers/infiniband/hw/mlx5/odp.c
··· 38 38 39 39 #include "mlx5_ib.h" 40 40 #include "cmd.h" 41 + #include "umr.h" 41 42 #include "qp.h" 42 43 43 44 #include <linux/mlx5/eq.h> ··· 118 117 * 119 118 * xa_store() 120 119 * mutex_lock(umem_mutex) 121 - * mlx5_ib_update_xlt() 120 + * mlx5r_umr_update_xlt() 122 121 * mutex_unlock(umem_mutex) 123 122 * destroy lkey 124 123 * ··· 199 198 mlx5r_deref_wait_odp_mkey(&mr->mmkey); 200 199 201 200 mutex_lock(&odp_imr->umem_mutex); 202 - mlx5_ib_update_xlt(mr->parent, ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 203 - 1, 0, 204 - MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); 201 + mlx5r_umr_update_xlt(mr->parent, 202 + ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 1, 0, 203 + MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); 205 204 mutex_unlock(&odp_imr->umem_mutex); 206 205 mlx5_ib_dereg_mr(&mr->ibmr, NULL); 207 206 ··· 283 282 u64 umr_offset = idx & umr_block_mask; 284 283 285 284 if (in_block && umr_offset == 0) { 286 - mlx5_ib_update_xlt(mr, blk_start_idx, 287 - idx - blk_start_idx, 0, 288 - MLX5_IB_UPD_XLT_ZAP | 289 - MLX5_IB_UPD_XLT_ATOMIC); 285 + mlx5r_umr_update_xlt(mr, blk_start_idx, 286 + idx - blk_start_idx, 0, 287 + MLX5_IB_UPD_XLT_ZAP | 288 + MLX5_IB_UPD_XLT_ATOMIC); 290 289 in_block = 0; 291 290 } 292 291 } 293 292 } 294 293 if (in_block) 295 - mlx5_ib_update_xlt(mr, blk_start_idx, 296 - idx - blk_start_idx + 1, 0, 297 - MLX5_IB_UPD_XLT_ZAP | 298 - MLX5_IB_UPD_XLT_ATOMIC); 294 + mlx5r_umr_update_xlt(mr, blk_start_idx, 295 + idx - blk_start_idx + 1, 0, 296 + MLX5_IB_UPD_XLT_ZAP | 297 + MLX5_IB_UPD_XLT_ATOMIC); 299 298 300 299 mlx5_update_odp_stats(mr, invalidations, invalidations); 301 300 ··· 324 323 325 324 memset(caps, 0, sizeof(*caps)); 326 325 327 - if (!MLX5_CAP_GEN(dev->mdev, pg) || 328 - !mlx5_ib_can_load_pas_with_umr(dev, 0)) 326 + if (!MLX5_CAP_GEN(dev->mdev, pg) || !mlx5r_umr_can_load_pas(dev, 0)) 329 327 return; 330 328 331 329 caps->general_caps = IB_ODP_SUPPORT; ··· 442 442 */ 443 443 refcount_set(&mr->mmkey.usecount, 2); 444 444 445 - err = mlx5_ib_update_xlt(mr, 0, 446 - MLX5_IMR_MTT_ENTRIES, 447 - PAGE_SHIFT, 448 - MLX5_IB_UPD_XLT_ZAP | 449 - MLX5_IB_UPD_XLT_ENABLE); 445 + err = mlx5r_umr_update_xlt(mr, 0, 446 + MLX5_IMR_MTT_ENTRIES, 447 + PAGE_SHIFT, 448 + MLX5_IB_UPD_XLT_ZAP | 449 + MLX5_IB_UPD_XLT_ENABLE); 450 450 if (err) { 451 451 ret = ERR_PTR(err); 452 452 goto out_mr; ··· 487 487 struct mlx5_ib_mr *imr; 488 488 int err; 489 489 490 - if (!mlx5_ib_can_load_pas_with_umr(dev, 491 - MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) 490 + if (!mlx5r_umr_can_load_pas(dev, MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) 492 491 return ERR_PTR(-EOPNOTSUPP); 493 492 494 493 umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); ··· 509 510 imr->ibmr.lkey = imr->mmkey.key; 510 511 imr->ibmr.rkey = imr->mmkey.key; 511 512 imr->ibmr.device = &dev->ib_dev; 512 - imr->umem = &umem_odp->umem; 513 513 imr->is_odp_implicit = true; 514 514 xa_init(&imr->implicit_children); 515 515 516 - err = mlx5_ib_update_xlt(imr, 0, 517 - mlx5_imr_ksm_entries, 518 - MLX5_KSM_PAGE_SHIFT, 519 - MLX5_IB_UPD_XLT_INDIRECT | 520 - MLX5_IB_UPD_XLT_ZAP | 521 - MLX5_IB_UPD_XLT_ENABLE); 516 + err = mlx5r_umr_update_xlt(imr, 0, 517 + mlx5_imr_ksm_entries, 518 + MLX5_KSM_PAGE_SHIFT, 519 + MLX5_IB_UPD_XLT_INDIRECT | 520 + MLX5_IB_UPD_XLT_ZAP | 521 + MLX5_IB_UPD_XLT_ENABLE); 522 522 if (err) 523 523 goto out_mr; 524 524 ··· 580 582 * No need to check whether the MTTs really belong to this MR, since 581 583 * ib_umem_odp_map_dma_and_lock already checks this. 582 584 */ 583 - ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags); 585 + ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags); 584 586 mutex_unlock(&odp->umem_mutex); 585 587 586 588 if (ret < 0) { ··· 678 680 * next pagefault handler will see the new information. 679 681 */ 680 682 mutex_lock(&odp_imr->umem_mutex); 681 - err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0, 682 - MLX5_IB_UPD_XLT_INDIRECT | 683 - MLX5_IB_UPD_XLT_ATOMIC); 683 + err = mlx5r_umr_update_xlt(imr, upd_start_idx, upd_len, 0, 684 + MLX5_IB_UPD_XLT_INDIRECT | 685 + MLX5_IB_UPD_XLT_ATOMIC); 684 686 mutex_unlock(&odp_imr->umem_mutex); 685 687 if (err) { 686 688 mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n"); ··· 714 716 ib_umem_dmabuf_unmap_pages(umem_dmabuf); 715 717 err = -EINVAL; 716 718 } else { 717 - err = mlx5_ib_update_mr_pas(mr, xlt_flags); 719 + err = mlx5r_umr_update_mr_pas(mr, xlt_flags); 718 720 } 719 721 dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); 720 722
+1
drivers/infiniband/hw/mlx5/qp.c
··· 40 40 #include "ib_rep.h" 41 41 #include "counters.h" 42 42 #include "cmd.h" 43 + #include "umr.h" 43 44 #include "qp.h" 44 45 #include "wr.h" 45 46
+700
drivers/infiniband/hw/mlx5/umr.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 + /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ 3 + 4 + #include <rdma/ib_umem_odp.h> 5 + #include "mlx5_ib.h" 6 + #include "umr.h" 7 + #include "wr.h" 8 + 9 + /* 10 + * We can't use an array for xlt_emergency_page because dma_map_single doesn't 11 + * work on kernel modules memory 12 + */ 13 + void *xlt_emergency_page; 14 + static DEFINE_MUTEX(xlt_emergency_page_mutex); 15 + 16 + static __be64 get_umr_enable_mr_mask(void) 17 + { 18 + u64 result; 19 + 20 + result = MLX5_MKEY_MASK_KEY | 21 + MLX5_MKEY_MASK_FREE; 22 + 23 + return cpu_to_be64(result); 24 + } 25 + 26 + static __be64 get_umr_disable_mr_mask(void) 27 + { 28 + u64 result; 29 + 30 + result = MLX5_MKEY_MASK_FREE; 31 + 32 + return cpu_to_be64(result); 33 + } 34 + 35 + static __be64 get_umr_update_translation_mask(void) 36 + { 37 + u64 result; 38 + 39 + result = MLX5_MKEY_MASK_LEN | 40 + MLX5_MKEY_MASK_PAGE_SIZE | 41 + MLX5_MKEY_MASK_START_ADDR; 42 + 43 + return cpu_to_be64(result); 44 + } 45 + 46 + static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev) 47 + { 48 + u64 result; 49 + 50 + result = MLX5_MKEY_MASK_LR | 51 + MLX5_MKEY_MASK_LW | 52 + MLX5_MKEY_MASK_RR | 53 + MLX5_MKEY_MASK_RW; 54 + 55 + if (MLX5_CAP_GEN(dev->mdev, atomic)) 56 + result |= MLX5_MKEY_MASK_A; 57 + 58 + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 59 + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; 60 + 61 + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 62 + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; 63 + 64 + return cpu_to_be64(result); 65 + } 66 + 67 + static __be64 get_umr_update_pd_mask(void) 68 + { 69 + u64 result; 70 + 71 + result = MLX5_MKEY_MASK_PD; 72 + 73 + return cpu_to_be64(result); 74 + } 75 + 76 + static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) 77 + { 78 + if (mask & MLX5_MKEY_MASK_PAGE_SIZE && 79 + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) 80 + return -EPERM; 81 + 82 + if (mask & MLX5_MKEY_MASK_A && 83 + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 84 + return -EPERM; 85 + 86 + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && 87 + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 88 + return -EPERM; 89 + 90 + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && 91 + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 92 + return -EPERM; 93 + 94 + return 0; 95 + } 96 + 97 + enum { 98 + MAX_UMR_WR = 128, 99 + }; 100 + 101 + static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp) 102 + { 103 + struct ib_qp_attr attr = {}; 104 + int ret; 105 + 106 + attr.qp_state = IB_QPS_INIT; 107 + attr.port_num = 1; 108 + ret = ib_modify_qp(qp, &attr, 109 + IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT); 110 + if (ret) { 111 + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); 112 + return ret; 113 + } 114 + 115 + memset(&attr, 0, sizeof(attr)); 116 + attr.qp_state = IB_QPS_RTR; 117 + 118 + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 119 + if (ret) { 120 + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); 121 + return ret; 122 + } 123 + 124 + memset(&attr, 0, sizeof(attr)); 125 + attr.qp_state = IB_QPS_RTS; 126 + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 127 + if (ret) { 128 + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); 129 + return ret; 130 + } 131 + 132 + return 0; 133 + } 134 + 135 + int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) 136 + { 137 + struct ib_qp_init_attr init_attr = {}; 138 + struct ib_pd *pd; 139 + struct ib_cq *cq; 140 + struct ib_qp *qp; 141 + int ret; 142 + 143 + pd = ib_alloc_pd(&dev->ib_dev, 0); 144 + if (IS_ERR(pd)) { 145 + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 146 + return PTR_ERR(pd); 147 + } 148 + 149 + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); 150 + if (IS_ERR(cq)) { 151 + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 152 + ret = PTR_ERR(cq); 153 + goto destroy_pd; 154 + } 155 + 156 + init_attr.send_cq = cq; 157 + init_attr.recv_cq = cq; 158 + init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; 159 + init_attr.cap.max_send_wr = MAX_UMR_WR; 160 + init_attr.cap.max_send_sge = 1; 161 + init_attr.qp_type = MLX5_IB_QPT_REG_UMR; 162 + init_attr.port_num = 1; 163 + qp = ib_create_qp(pd, &init_attr); 164 + if (IS_ERR(qp)) { 165 + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); 166 + ret = PTR_ERR(qp); 167 + goto destroy_cq; 168 + } 169 + 170 + ret = mlx5r_umr_qp_rst2rts(dev, qp); 171 + if (ret) 172 + goto destroy_qp; 173 + 174 + dev->umrc.qp = qp; 175 + dev->umrc.cq = cq; 176 + dev->umrc.pd = pd; 177 + 178 + sema_init(&dev->umrc.sem, MAX_UMR_WR); 179 + 180 + return 0; 181 + 182 + destroy_qp: 183 + ib_destroy_qp(qp); 184 + destroy_cq: 185 + ib_free_cq(cq); 186 + destroy_pd: 187 + ib_dealloc_pd(pd); 188 + return ret; 189 + } 190 + 191 + void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) 192 + { 193 + ib_destroy_qp(dev->umrc.qp); 194 + ib_free_cq(dev->umrc.cq); 195 + ib_dealloc_pd(dev->umrc.pd); 196 + } 197 + 198 + static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, 199 + struct mlx5r_umr_wqe *wqe, bool with_data) 200 + { 201 + unsigned int wqe_size = 202 + with_data ? sizeof(struct mlx5r_umr_wqe) : 203 + sizeof(struct mlx5r_umr_wqe) - 204 + sizeof(struct mlx5_wqe_data_seg); 205 + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 206 + struct mlx5_core_dev *mdev = dev->mdev; 207 + struct mlx5_ib_qp *qp = to_mqp(ibqp); 208 + struct mlx5_wqe_ctrl_seg *ctrl; 209 + union { 210 + struct ib_cqe *ib_cqe; 211 + u64 wr_id; 212 + } id; 213 + void *cur_edge, *seg; 214 + unsigned long flags; 215 + unsigned int idx; 216 + int size, err; 217 + 218 + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) 219 + return -EIO; 220 + 221 + spin_lock_irqsave(&qp->sq.lock, flags); 222 + 223 + err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0, 224 + cpu_to_be32(mkey), false, false); 225 + if (WARN_ON(err)) 226 + goto out; 227 + 228 + qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; 229 + 230 + mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size); 231 + 232 + id.ib_cqe = cqe; 233 + mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0, 234 + MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR); 235 + 236 + mlx5r_ring_db(qp, 1, ctrl); 237 + 238 + out: 239 + spin_unlock_irqrestore(&qp->sq.lock, flags); 240 + 241 + return err; 242 + } 243 + 244 + static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) 245 + { 246 + struct mlx5_ib_umr_context *context = 247 + container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 248 + 249 + context->status = wc->status; 250 + complete(&context->done); 251 + } 252 + 253 + static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context) 254 + { 255 + context->cqe.done = mlx5r_umr_done; 256 + init_completion(&context->done); 257 + } 258 + 259 + static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, 260 + struct mlx5r_umr_wqe *wqe, bool with_data) 261 + { 262 + struct umr_common *umrc = &dev->umrc; 263 + struct mlx5r_umr_context umr_context; 264 + int err; 265 + 266 + err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask)); 267 + if (WARN_ON(err)) 268 + return err; 269 + 270 + mlx5r_umr_init_context(&umr_context); 271 + 272 + down(&umrc->sem); 273 + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, 274 + with_data); 275 + if (err) 276 + mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); 277 + else { 278 + wait_for_completion(&umr_context.done); 279 + if (umr_context.status != IB_WC_SUCCESS) { 280 + mlx5_ib_warn(dev, "reg umr failed (%u)\n", 281 + umr_context.status); 282 + err = -EFAULT; 283 + } 284 + } 285 + up(&umrc->sem); 286 + return err; 287 + } 288 + 289 + /** 290 + * mlx5r_umr_revoke_mr - Fence all DMA on the MR 291 + * @mr: The MR to fence 292 + * 293 + * Upon return the NIC will not be doing any DMA to the pages under the MR, 294 + * and any DMA in progress will be completed. Failure of this function 295 + * indicates the HW has failed catastrophically. 296 + */ 297 + int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr) 298 + { 299 + struct mlx5_ib_dev *dev = mr_to_mdev(mr); 300 + struct mlx5r_umr_wqe wqe = {}; 301 + 302 + if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 303 + return 0; 304 + 305 + wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); 306 + wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask(); 307 + wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; 308 + 309 + MLX5_SET(mkc, &wqe.mkey_seg, free, 1); 310 + MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn); 311 + MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); 312 + MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, 313 + mlx5_mkey_variant(mr->mmkey.key)); 314 + 315 + return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); 316 + } 317 + 318 + static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev, 319 + struct mlx5_mkey_seg *seg, 320 + unsigned int access_flags) 321 + { 322 + MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 323 + MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 324 + MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 325 + MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 326 + MLX5_SET(mkc, seg, lr, 1); 327 + MLX5_SET(mkc, seg, relaxed_ordering_write, 328 + !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); 329 + MLX5_SET(mkc, seg, relaxed_ordering_read, 330 + !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); 331 + } 332 + 333 + int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, 334 + int access_flags) 335 + { 336 + struct mlx5_ib_dev *dev = mr_to_mdev(mr); 337 + struct mlx5r_umr_wqe wqe = {}; 338 + int err; 339 + 340 + wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev); 341 + wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); 342 + wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE; 343 + wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; 344 + 345 + mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags); 346 + MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn); 347 + MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); 348 + MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, 349 + mlx5_mkey_variant(mr->mmkey.key)); 350 + 351 + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); 352 + if (err) 353 + return err; 354 + 355 + mr->access_flags = access_flags; 356 + return 0; 357 + } 358 + 359 + #define MLX5_MAX_UMR_CHUNK \ 360 + ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT) 361 + #define MLX5_SPARE_UMR_CHUNK 0x10000 362 + 363 + /* 364 + * Allocate a temporary buffer to hold the per-page information to transfer to 365 + * HW. For efficiency this should be as large as it can be, but buffer 366 + * allocation failure is not allowed, so try smaller sizes. 367 + */ 368 + static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) 369 + { 370 + const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size; 371 + size_t size; 372 + void *res = NULL; 373 + 374 + static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); 375 + 376 + /* 377 + * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the 378 + * allocation can't trigger any kind of reclaim. 379 + */ 380 + might_sleep(); 381 + 382 + gfp_mask |= __GFP_ZERO | __GFP_NORETRY; 383 + 384 + /* 385 + * If the system already has a suitable high order page then just use 386 + * that, but don't try hard to create one. This max is about 1M, so a 387 + * free x86 huge page will satisfy it. 388 + */ 389 + size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), 390 + MLX5_MAX_UMR_CHUNK); 391 + *nents = size / ent_size; 392 + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 393 + get_order(size)); 394 + if (res) 395 + return res; 396 + 397 + if (size > MLX5_SPARE_UMR_CHUNK) { 398 + size = MLX5_SPARE_UMR_CHUNK; 399 + *nents = size / ent_size; 400 + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 401 + get_order(size)); 402 + if (res) 403 + return res; 404 + } 405 + 406 + *nents = PAGE_SIZE / ent_size; 407 + res = (void *)__get_free_page(gfp_mask); 408 + if (res) 409 + return res; 410 + 411 + mutex_lock(&xlt_emergency_page_mutex); 412 + memset(xlt_emergency_page, 0, PAGE_SIZE); 413 + return xlt_emergency_page; 414 + } 415 + 416 + static void mlx5r_umr_free_xlt(void *xlt, size_t length) 417 + { 418 + if (xlt == xlt_emergency_page) { 419 + mutex_unlock(&xlt_emergency_page_mutex); 420 + return; 421 + } 422 + 423 + free_pages((unsigned long)xlt, get_order(length)); 424 + } 425 + 426 + static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, 427 + struct ib_sge *sg) 428 + { 429 + struct device *ddev = &dev->mdev->pdev->dev; 430 + 431 + dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); 432 + mlx5r_umr_free_xlt(xlt, sg->length); 433 + } 434 + 435 + /* 436 + * Create an XLT buffer ready for submission. 437 + */ 438 + static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg, 439 + size_t nents, size_t ent_size, 440 + unsigned int flags) 441 + { 442 + struct device *ddev = &dev->mdev->pdev->dev; 443 + dma_addr_t dma; 444 + void *xlt; 445 + 446 + xlt = mlx5r_umr_alloc_xlt(&nents, ent_size, 447 + flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : 448 + GFP_KERNEL); 449 + sg->length = nents * ent_size; 450 + dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); 451 + if (dma_mapping_error(ddev, dma)) { 452 + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 453 + mlx5r_umr_free_xlt(xlt, sg->length); 454 + return NULL; 455 + } 456 + sg->addr = dma; 457 + sg->lkey = dev->umrc.pd->local_dma_lkey; 458 + 459 + return xlt; 460 + } 461 + 462 + static void 463 + mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, 464 + unsigned int flags, struct ib_sge *sg) 465 + { 466 + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 467 + /* fail if free */ 468 + ctrl_seg->flags = MLX5_UMR_CHECK_FREE; 469 + else 470 + /* fail if not free */ 471 + ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE; 472 + ctrl_seg->xlt_octowords = 473 + cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); 474 + } 475 + 476 + static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev, 477 + struct mlx5_mkey_seg *mkey_seg, 478 + struct mlx5_ib_mr *mr, 479 + unsigned int page_shift) 480 + { 481 + mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags); 482 + MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn); 483 + MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova); 484 + MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length); 485 + MLX5_SET(mkc, mkey_seg, log_page_size, page_shift); 486 + MLX5_SET(mkc, mkey_seg, qpn, 0xffffff); 487 + MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key)); 488 + } 489 + 490 + static void 491 + mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg, 492 + struct ib_sge *sg) 493 + { 494 + data_seg->byte_count = cpu_to_be32(sg->length); 495 + data_seg->lkey = cpu_to_be32(sg->lkey); 496 + data_seg->addr = cpu_to_be64(sg->addr); 497 + } 498 + 499 + static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, 500 + u64 offset) 501 + { 502 + u64 octo_offset = mlx5r_umr_get_xlt_octo(offset); 503 + 504 + ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff); 505 + ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16); 506 + ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; 507 + } 508 + 509 + static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev, 510 + struct mlx5r_umr_wqe *wqe, 511 + struct mlx5_ib_mr *mr, struct ib_sge *sg, 512 + unsigned int flags) 513 + { 514 + bool update_pd_access, update_translation; 515 + 516 + if (flags & MLX5_IB_UPD_XLT_ENABLE) 517 + wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask(); 518 + 519 + update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE || 520 + flags & MLX5_IB_UPD_XLT_PD || 521 + flags & MLX5_IB_UPD_XLT_ACCESS; 522 + 523 + if (update_pd_access) { 524 + wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev); 525 + wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); 526 + } 527 + 528 + update_translation = 529 + flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR; 530 + 531 + if (update_translation) { 532 + wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(); 533 + if (!mr->ibmr.length) 534 + MLX5_SET(mkc, &wqe->mkey_seg, length64, 1); 535 + } 536 + 537 + wqe->ctrl_seg.xlt_octowords = 538 + cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); 539 + wqe->data_seg.byte_count = cpu_to_be32(sg->length); 540 + } 541 + 542 + /* 543 + * Send the DMA list to the HW for a normal MR using UMR. 544 + * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP 545 + * flag may be used. 546 + */ 547 + int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) 548 + { 549 + struct mlx5_ib_dev *dev = mr_to_mdev(mr); 550 + struct device *ddev = &dev->mdev->pdev->dev; 551 + struct mlx5r_umr_wqe wqe = {}; 552 + struct ib_block_iter biter; 553 + struct mlx5_mtt *cur_mtt; 554 + size_t orig_sg_length; 555 + struct mlx5_mtt *mtt; 556 + size_t final_size; 557 + struct ib_sge sg; 558 + u64 offset = 0; 559 + int err = 0; 560 + 561 + if (WARN_ON(mr->umem->is_odp)) 562 + return -EINVAL; 563 + 564 + mtt = mlx5r_umr_create_xlt( 565 + dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift), 566 + sizeof(*mtt), flags); 567 + if (!mtt) 568 + return -ENOMEM; 569 + 570 + orig_sg_length = sg.length; 571 + 572 + mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); 573 + mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, 574 + mr->page_shift); 575 + mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); 576 + 577 + cur_mtt = mtt; 578 + rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter, 579 + mr->umem->sgt_append.sgt.nents, 580 + BIT(mr->page_shift)) { 581 + if (cur_mtt == (void *)mtt + sg.length) { 582 + dma_sync_single_for_device(ddev, sg.addr, sg.length, 583 + DMA_TO_DEVICE); 584 + 585 + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, 586 + true); 587 + if (err) 588 + goto err; 589 + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 590 + DMA_TO_DEVICE); 591 + offset += sg.length; 592 + mlx5r_umr_update_offset(&wqe.ctrl_seg, offset); 593 + 594 + cur_mtt = mtt; 595 + } 596 + 597 + cur_mtt->ptag = 598 + cpu_to_be64(rdma_block_iter_dma_address(&biter) | 599 + MLX5_IB_MTT_PRESENT); 600 + 601 + if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) 602 + cur_mtt->ptag = 0; 603 + 604 + cur_mtt++; 605 + } 606 + 607 + final_size = (void *)cur_mtt - (void *)mtt; 608 + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); 609 + memset(cur_mtt, 0, sg.length - final_size); 610 + mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); 611 + 612 + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); 613 + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); 614 + 615 + err: 616 + sg.length = orig_sg_length; 617 + mlx5r_umr_unmap_free_xlt(dev, mtt, &sg); 618 + return err; 619 + } 620 + 621 + static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) 622 + { 623 + return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); 624 + } 625 + 626 + int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 627 + int page_shift, int flags) 628 + { 629 + int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) 630 + ? sizeof(struct mlx5_klm) 631 + : sizeof(struct mlx5_mtt); 632 + const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; 633 + struct mlx5_ib_dev *dev = mr_to_mdev(mr); 634 + struct device *ddev = &dev->mdev->pdev->dev; 635 + const int page_mask = page_align - 1; 636 + struct mlx5r_umr_wqe wqe = {}; 637 + size_t pages_mapped = 0; 638 + size_t pages_to_map = 0; 639 + size_t size_to_map = 0; 640 + size_t orig_sg_length; 641 + size_t pages_iter; 642 + struct ib_sge sg; 643 + int err = 0; 644 + void *xlt; 645 + 646 + if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && 647 + !umr_can_use_indirect_mkey(dev)) 648 + return -EPERM; 649 + 650 + if (WARN_ON(!mr->umem->is_odp)) 651 + return -EINVAL; 652 + 653 + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 654 + * so we need to align the offset and length accordingly 655 + */ 656 + if (idx & page_mask) { 657 + npages += idx & page_mask; 658 + idx &= ~page_mask; 659 + } 660 + pages_to_map = ALIGN(npages, page_align); 661 + 662 + xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags); 663 + if (!xlt) 664 + return -ENOMEM; 665 + 666 + pages_iter = sg.length / desc_size; 667 + orig_sg_length = sg.length; 668 + 669 + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { 670 + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 671 + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; 672 + 673 + pages_to_map = min_t(size_t, pages_to_map, max_pages); 674 + } 675 + 676 + mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); 677 + mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift); 678 + mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); 679 + 680 + for (pages_mapped = 0; 681 + pages_mapped < pages_to_map && !err; 682 + pages_mapped += pages_iter, idx += pages_iter) { 683 + npages = min_t(int, pages_iter, pages_to_map - pages_mapped); 684 + size_to_map = npages * desc_size; 685 + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 686 + DMA_TO_DEVICE); 687 + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); 688 + dma_sync_single_for_device(ddev, sg.addr, sg.length, 689 + DMA_TO_DEVICE); 690 + sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); 691 + 692 + if (pages_mapped + pages_iter >= pages_to_map) 693 + mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); 694 + mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size); 695 + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); 696 + } 697 + sg.length = orig_sg_length; 698 + mlx5r_umr_unmap_free_xlt(dev, xlt, &sg); 699 + return err; 700 + }
+97
drivers/infiniband/hw/mlx5/umr.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 2 + /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ 3 + 4 + #ifndef _MLX5_IB_UMR_H 5 + #define _MLX5_IB_UMR_H 6 + 7 + #include "mlx5_ib.h" 8 + 9 + 10 + #define MLX5_MAX_UMR_SHIFT 16 11 + #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) 12 + 13 + #define MLX5_IB_UMR_OCTOWORD 16 14 + #define MLX5_IB_UMR_XLT_ALIGNMENT 64 15 + 16 + int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev); 17 + void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev); 18 + 19 + static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev, 20 + size_t length) 21 + { 22 + /* 23 + * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is 24 + * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka 25 + * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey 26 + * can never be enabled without this capability. Simplify this weird 27 + * quirky hardware by just saying it can't use PAS lists with UMR at 28 + * all. 29 + */ 30 + if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) 31 + return false; 32 + 33 + /* 34 + * length is the size of the MR in bytes when mlx5_ib_update_xlt() is 35 + * used. 36 + */ 37 + if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && 38 + length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE) 39 + return false; 40 + return true; 41 + } 42 + 43 + /* 44 + * true if an existing MR can be reconfigured to new access_flags using UMR. 45 + * Older HW cannot use UMR to update certain elements of the MKC. See 46 + * get_umr_update_access_mask() and umr_check_mkey_mask() 47 + */ 48 + static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev, 49 + unsigned int current_access_flags, 50 + unsigned int target_access_flags) 51 + { 52 + unsigned int diffs = current_access_flags ^ target_access_flags; 53 + 54 + if ((diffs & IB_ACCESS_REMOTE_ATOMIC) && 55 + MLX5_CAP_GEN(dev->mdev, atomic) && 56 + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 57 + return false; 58 + 59 + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && 60 + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && 61 + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 62 + return false; 63 + 64 + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && 65 + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && 66 + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 67 + return false; 68 + 69 + return true; 70 + } 71 + 72 + static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes) 73 + { 74 + return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / 75 + MLX5_IB_UMR_OCTOWORD; 76 + } 77 + 78 + struct mlx5r_umr_context { 79 + struct ib_cqe cqe; 80 + enum ib_wc_status status; 81 + struct completion done; 82 + }; 83 + 84 + struct mlx5r_umr_wqe { 85 + struct mlx5_wqe_umr_ctrl_seg ctrl_seg; 86 + struct mlx5_mkey_seg mkey_seg; 87 + struct mlx5_wqe_data_seg data_seg; 88 + }; 89 + 90 + int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr); 91 + int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, 92 + int access_flags); 93 + int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); 94 + int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 95 + int page_shift, int flags); 96 + 97 + #endif /* _MLX5_IB_UMR_H */
+61 -316
drivers/infiniband/hw/mlx5/wr.c
··· 7 7 #include <linux/mlx5/qp.h> 8 8 #include <linux/mlx5/driver.h> 9 9 #include "wr.h" 10 + #include "umr.h" 10 11 11 12 static const u32 mlx5_ib_opcode[] = { 12 13 [IB_WR_SEND] = MLX5_OPCODE_SEND, ··· 26 25 [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, 27 26 }; 28 27 29 - /* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the 30 - * next nearby edge and get new address translation for current WQE position. 31 - * @sq - SQ buffer. 32 - * @seg: Current WQE position (16B aligned). 33 - * @wqe_sz: Total current WQE size [16B]. 34 - * @cur_edge: Updated current edge. 35 - */ 36 - static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, 37 - u32 wqe_sz, void **cur_edge) 38 - { 39 - u32 idx; 40 - 41 - if (likely(*seg != *cur_edge)) 42 - return; 43 - 44 - idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); 45 - *cur_edge = get_sq_edge(sq, idx); 46 - 47 - *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); 48 - } 49 - 50 - /* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's 51 - * pointers. At the end @seg is aligned to 16B regardless the copied size. 52 - * @sq - SQ buffer. 53 - * @cur_edge: Updated current edge. 54 - * @seg: Current WQE position (16B aligned). 55 - * @wqe_sz: Total current WQE size [16B]. 56 - * @src: Pointer to copy from. 57 - * @n: Number of bytes to copy. 58 - */ 59 - static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, 60 - void **seg, u32 *wqe_sz, const void *src, 61 - size_t n) 62 - { 63 - while (likely(n)) { 64 - size_t leftlen = *cur_edge - *seg; 65 - size_t copysz = min_t(size_t, leftlen, n); 66 - size_t stride; 67 - 68 - memcpy(*seg, src, copysz); 69 - 70 - n -= copysz; 71 - src += copysz; 72 - stride = !n ? ALIGN(copysz, 16) : copysz; 73 - *seg += stride; 74 - *wqe_sz += stride >> 4; 75 - handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); 76 - } 77 - } 78 - 79 - static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, 80 - struct ib_cq *ib_cq) 28 + int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) 81 29 { 82 30 struct mlx5_ib_cq *cq; 83 31 unsigned int cur; ··· 72 122 eseg->mss = cpu_to_be16(ud_wr->mss); 73 123 eseg->inline_hdr.sz = cpu_to_be16(left); 74 124 75 - /* memcpy_send_wqe should get a 16B align address. Hence, we 76 - * first copy up to the current edge and then, if needed, 77 - * continue to memcpy_send_wqe. 125 + /* mlx5r_memcpy_send_wqe should get a 16B align address. Hence, 126 + * we first copy up to the current edge and then, if needed, 127 + * continue to mlx5r_memcpy_send_wqe. 78 128 */ 79 129 copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, 80 130 left); ··· 88 138 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 89 139 left -= copysz; 90 140 pdata += copysz; 91 - memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, 92 - left); 141 + mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, 142 + pdata, left); 93 143 } 94 144 95 145 return; ··· 113 163 dseg->byte_count = cpu_to_be32(sg->length); 114 164 dseg->lkey = cpu_to_be32(sg->lkey); 115 165 dseg->addr = cpu_to_be64(sg->addr); 116 - } 117 - 118 - static u64 get_xlt_octo(u64 bytes) 119 - { 120 - return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / 121 - MLX5_IB_UMR_OCTOWORD; 122 166 } 123 167 124 168 static __be64 frwr_mkey_mask(bool atomic) ··· 166 222 memset(umr, 0, sizeof(*umr)); 167 223 168 224 umr->flags = flags; 169 - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); 225 + umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size)); 170 226 umr->mkey_mask = frwr_mkey_mask(atomic); 171 227 } 172 228 ··· 175 231 memset(umr, 0, sizeof(*umr)); 176 232 umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); 177 233 umr->flags = MLX5_UMR_INLINE; 178 - } 179 - 180 - static __be64 get_umr_enable_mr_mask(void) 181 - { 182 - u64 result; 183 - 184 - result = MLX5_MKEY_MASK_KEY | 185 - MLX5_MKEY_MASK_FREE; 186 - 187 - return cpu_to_be64(result); 188 - } 189 - 190 - static __be64 get_umr_disable_mr_mask(void) 191 - { 192 - u64 result; 193 - 194 - result = MLX5_MKEY_MASK_FREE; 195 - 196 - return cpu_to_be64(result); 197 - } 198 - 199 - static __be64 get_umr_update_translation_mask(void) 200 - { 201 - u64 result; 202 - 203 - result = MLX5_MKEY_MASK_LEN | 204 - MLX5_MKEY_MASK_PAGE_SIZE | 205 - MLX5_MKEY_MASK_START_ADDR; 206 - 207 - return cpu_to_be64(result); 208 - } 209 - 210 - static __be64 get_umr_update_access_mask(int atomic, 211 - int relaxed_ordering_write, 212 - int relaxed_ordering_read) 213 - { 214 - u64 result; 215 - 216 - result = MLX5_MKEY_MASK_LR | 217 - MLX5_MKEY_MASK_LW | 218 - MLX5_MKEY_MASK_RR | 219 - MLX5_MKEY_MASK_RW; 220 - 221 - if (atomic) 222 - result |= MLX5_MKEY_MASK_A; 223 - 224 - if (relaxed_ordering_write) 225 - result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; 226 - 227 - if (relaxed_ordering_read) 228 - result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; 229 - 230 - return cpu_to_be64(result); 231 - } 232 - 233 - static __be64 get_umr_update_pd_mask(void) 234 - { 235 - u64 result; 236 - 237 - result = MLX5_MKEY_MASK_PD; 238 - 239 - return cpu_to_be64(result); 240 - } 241 - 242 - static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) 243 - { 244 - if (mask & MLX5_MKEY_MASK_PAGE_SIZE && 245 - MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) 246 - return -EPERM; 247 - 248 - if (mask & MLX5_MKEY_MASK_A && 249 - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 250 - return -EPERM; 251 - 252 - if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && 253 - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 254 - return -EPERM; 255 - 256 - if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && 257 - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 258 - return -EPERM; 259 - 260 - return 0; 261 - } 262 - 263 - static int set_reg_umr_segment(struct mlx5_ib_dev *dev, 264 - struct mlx5_wqe_umr_ctrl_seg *umr, 265 - const struct ib_send_wr *wr) 266 - { 267 - const struct mlx5_umr_wr *umrwr = umr_wr(wr); 268 - 269 - memset(umr, 0, sizeof(*umr)); 270 - 271 - if (!umrwr->ignore_free_state) { 272 - if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) 273 - /* fail if free */ 274 - umr->flags = MLX5_UMR_CHECK_FREE; 275 - else 276 - /* fail if not free */ 277 - umr->flags = MLX5_UMR_CHECK_NOT_FREE; 278 - } 279 - 280 - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); 281 - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { 282 - u64 offset = get_xlt_octo(umrwr->offset); 283 - 284 - umr->xlt_offset = cpu_to_be16(offset & 0xffff); 285 - umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16); 286 - umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; 287 - } 288 - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) 289 - umr->mkey_mask |= get_umr_update_translation_mask(); 290 - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { 291 - umr->mkey_mask |= get_umr_update_access_mask( 292 - !!(MLX5_CAP_GEN(dev->mdev, atomic)), 293 - !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)), 294 - !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))); 295 - umr->mkey_mask |= get_umr_update_pd_mask(); 296 - } 297 - if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) 298 - umr->mkey_mask |= get_umr_enable_mr_mask(); 299 - if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) 300 - umr->mkey_mask |= get_umr_disable_mr_mask(); 301 - 302 - if (!wr->num_sge) 303 - umr->flags |= MLX5_UMR_INLINE; 304 - 305 - return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); 306 234 } 307 235 308 236 static u8 get_umr_flags(int acc) ··· 212 396 { 213 397 memset(seg, 0, sizeof(*seg)); 214 398 seg->status = MLX5_MKEY_STATUS_FREE; 215 - } 216 - 217 - static void set_reg_mkey_segment(struct mlx5_ib_dev *dev, 218 - struct mlx5_mkey_seg *seg, 219 - const struct ib_send_wr *wr) 220 - { 221 - const struct mlx5_umr_wr *umrwr = umr_wr(wr); 222 - 223 - memset(seg, 0, sizeof(*seg)); 224 - if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) 225 - MLX5_SET(mkc, seg, free, 1); 226 - 227 - MLX5_SET(mkc, seg, a, 228 - !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC)); 229 - MLX5_SET(mkc, seg, rw, 230 - !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE)); 231 - MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); 232 - MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); 233 - MLX5_SET(mkc, seg, lr, 1); 234 - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 235 - MLX5_SET(mkc, seg, relaxed_ordering_write, 236 - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); 237 - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 238 - MLX5_SET(mkc, seg, relaxed_ordering_read, 239 - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); 240 - 241 - if (umrwr->pd) 242 - MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); 243 - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && 244 - !umrwr->length) 245 - MLX5_SET(mkc, seg, length64, 1); 246 - 247 - MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr); 248 - MLX5_SET64(mkc, seg, len, umrwr->length); 249 - MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift); 250 - MLX5_SET(mkc, seg, qpn, 0xffffff); 251 - MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey)); 252 399 } 253 400 254 401 static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, ··· 539 760 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | 540 761 MLX5_MKEY_BSF_EN | pdn); 541 762 seg->len = cpu_to_be64(length); 542 - seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size)); 763 + seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size)); 543 764 seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); 544 765 } 545 766 ··· 549 770 memset(umr, 0, sizeof(*umr)); 550 771 551 772 umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; 552 - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); 773 + umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size)); 553 774 umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); 554 775 umr->mkey_mask = sig_mkey_mask(); 555 776 } ··· 649 870 * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and 650 871 * kernel ULPs are not aware of it, so we don't set it here. 651 872 */ 652 - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) { 873 + if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) { 653 874 mlx5_ib_warn( 654 875 to_mdev(qp->ibqp.device), 655 876 "Fast update for MR access flags is not possible\n"); ··· 678 899 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 679 900 680 901 if (umr_inline) { 681 - memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, 682 - mr_list_size); 902 + mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, 903 + mr_list_size); 683 904 *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); 684 905 } else { 685 906 set_reg_data_seg(*seg, mr, pd); ··· 721 942 } 722 943 } 723 944 724 - static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, 725 - struct mlx5_wqe_ctrl_seg **ctrl, 726 - const struct ib_send_wr *wr, unsigned int *idx, 727 - int *size, void **cur_edge, int nreq, 728 - bool send_signaled, bool solicited) 945 + int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg, 946 + struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx, 947 + int *size, void **cur_edge, int nreq, __be32 general_id, 948 + bool send_signaled, bool solicited) 729 949 { 730 - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) 950 + if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) 731 951 return -ENOMEM; 732 952 733 953 *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); 734 954 *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); 735 955 *ctrl = *seg; 736 956 *(uint32_t *)(*seg + 8) = 0; 737 - (*ctrl)->imm = send_ieth(wr); 957 + (*ctrl)->general_id = general_id; 738 958 (*ctrl)->fm_ce_se = qp->sq_signal_bits | 739 - (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | 740 - (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); 959 + (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | 960 + (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); 741 961 742 962 *seg += sizeof(**ctrl); 743 963 *size = sizeof(**ctrl) / 16; ··· 750 972 const struct ib_send_wr *wr, unsigned int *idx, int *size, 751 973 void **cur_edge, int nreq) 752 974 { 753 - return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, 754 - wr->send_flags & IB_SEND_SIGNALED, 755 - wr->send_flags & IB_SEND_SOLICITED); 975 + return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq, 976 + send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED, 977 + wr->send_flags & IB_SEND_SOLICITED); 756 978 } 757 979 758 - static void finish_wqe(struct mlx5_ib_qp *qp, 759 - struct mlx5_wqe_ctrl_seg *ctrl, 760 - void *seg, u8 size, void *cur_edge, 761 - unsigned int idx, u64 wr_id, int nreq, u8 fence, 762 - u32 mlx5_opcode) 980 + void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, 981 + void *seg, u8 size, void *cur_edge, unsigned int idx, 982 + u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode) 763 983 { 764 984 u8 opmod = 0; 765 985 ··· 821 1045 /* 822 1046 * SET_PSV WQEs are not signaled and solicited on error. 823 1047 */ 824 - err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, 825 - false, true); 1048 + err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq, 1049 + send_ieth(wr), false, true); 826 1050 if (unlikely(err)) { 827 1051 mlx5_ib_warn(dev, "\n"); 828 1052 err = -ENOMEM; ··· 833 1057 mlx5_ib_warn(dev, "\n"); 834 1058 goto out; 835 1059 } 836 - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, 837 - next_fence, MLX5_OPCODE_SET_PSV); 1060 + mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, 1061 + nreq, next_fence, MLX5_OPCODE_SET_PSV); 838 1062 839 1063 out: 840 1064 return err; ··· 874 1098 if (unlikely(err)) 875 1099 goto out; 876 1100 877 - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, 878 - nreq, fence, MLX5_OPCODE_UMR); 1101 + mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, 1102 + wr->wr_id, nreq, fence, MLX5_OPCODE_UMR); 879 1103 880 1104 err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq); 881 1105 if (unlikely(err)) { ··· 906 1130 mlx5_ib_warn(dev, "\n"); 907 1131 goto out; 908 1132 } 909 - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, 910 - fence, MLX5_OPCODE_UMR); 1133 + mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, 1134 + nreq, fence, MLX5_OPCODE_UMR); 911 1135 912 1136 sig_attrs = mr->ibmr.sig_attrs; 913 1137 err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, ··· 1022 1246 } 1023 1247 } 1024 1248 1025 - static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, 1026 - const struct ib_send_wr *wr, 1027 - struct mlx5_wqe_ctrl_seg **ctrl, void **seg, 1028 - int *size, void **cur_edge, unsigned int idx) 1249 + void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq, 1250 + struct mlx5_wqe_ctrl_seg *ctrl) 1029 1251 { 1030 - int err = 0; 1252 + struct mlx5_bf *bf = &qp->bf; 1031 1253 1032 - if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) { 1033 - err = -EINVAL; 1034 - mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode); 1035 - goto out; 1036 - } 1254 + qp->sq.head += nreq; 1037 1255 1038 - qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; 1039 - (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); 1040 - err = set_reg_umr_segment(dev, *seg, wr); 1041 - if (unlikely(err)) 1042 - goto out; 1043 - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 1044 - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 1045 - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 1046 - set_reg_mkey_segment(dev, *seg, wr); 1047 - *seg += sizeof(struct mlx5_mkey_seg); 1048 - *size += sizeof(struct mlx5_mkey_seg) / 16; 1049 - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 1050 - out: 1051 - return err; 1256 + /* Make sure that descriptors are written before 1257 + * updating doorbell record and ringing the doorbell 1258 + */ 1259 + wmb(); 1260 + 1261 + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); 1262 + 1263 + /* Make sure doorbell record is visible to the HCA before 1264 + * we hit doorbell. 1265 + */ 1266 + wmb(); 1267 + 1268 + mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); 1269 + /* Make sure doorbells don't leak out of SQ spinlock 1270 + * and reach the HCA out of order. 1271 + */ 1272 + bf->offset ^= bf->buf_size; 1052 1273 } 1053 1274 1054 1275 int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, ··· 1056 1283 struct mlx5_core_dev *mdev = dev->mdev; 1057 1284 struct mlx5_ib_qp *qp = to_mqp(ibqp); 1058 1285 struct mlx5_wqe_xrc_seg *xrc; 1059 - struct mlx5_bf *bf; 1060 1286 void *cur_edge; 1061 1287 int size; 1062 1288 unsigned long flags; ··· 1076 1304 1077 1305 if (qp->type == IB_QPT_GSI) 1078 1306 return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); 1079 - 1080 - bf = &qp->bf; 1081 1307 1082 1308 spin_lock_irqsave(&qp->sq.lock, flags); 1083 1309 ··· 1154 1384 case IB_QPT_UD: 1155 1385 handle_qpt_ud(qp, wr, &seg, &size, &cur_edge); 1156 1386 break; 1157 - case MLX5_IB_QPT_REG_UMR: 1158 - err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg, 1159 - &size, &cur_edge, idx); 1160 - if (unlikely(err)) 1161 - goto out; 1162 - break; 1163 1387 1164 1388 default: 1165 1389 break; ··· 1182 1418 } 1183 1419 1184 1420 qp->next_fence = next_fence; 1185 - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, 1186 - fence, mlx5_ib_opcode[wr->opcode]); 1421 + mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, 1422 + nreq, fence, mlx5_ib_opcode[wr->opcode]); 1187 1423 skip_psv: 1188 1424 if (0) 1189 1425 dump_wqe(qp, idx, size); 1190 1426 } 1191 1427 1192 1428 out: 1193 - if (likely(nreq)) { 1194 - qp->sq.head += nreq; 1195 - 1196 - /* Make sure that descriptors are written before 1197 - * updating doorbell record and ringing the doorbell 1198 - */ 1199 - wmb(); 1200 - 1201 - qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); 1202 - 1203 - /* Make sure doorbell record is visible to the HCA before 1204 - * we hit doorbell. 1205 - */ 1206 - wmb(); 1207 - 1208 - mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); 1209 - /* Make sure doorbells don't leak out of SQ spinlock 1210 - * and reach the HCA out of order. 1211 - */ 1212 - bf->offset ^= bf->buf_size; 1213 - } 1429 + if (likely(nreq)) 1430 + mlx5r_ring_db(qp, nreq, ctrl); 1214 1431 1215 1432 spin_unlock_irqrestore(&qp->sq.lock, flags); 1216 1433 ··· 1231 1486 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 1232 1487 1233 1488 for (nreq = 0; wr; nreq++, wr = wr->next) { 1234 - if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { 1489 + if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { 1235 1490 err = -ENOMEM; 1236 1491 *bad_wr = wr; 1237 1492 goto out;
+60
drivers/infiniband/hw/mlx5/wr.h
··· 41 41 return fragment_end + MLX5_SEND_WQE_BB; 42 42 } 43 43 44 + /* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the 45 + * next nearby edge and get new address translation for current WQE position. 46 + * @sq: SQ buffer. 47 + * @seg: Current WQE position (16B aligned). 48 + * @wqe_sz: Total current WQE size [16B]. 49 + * @cur_edge: Updated current edge. 50 + */ 51 + static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, 52 + u32 wqe_sz, void **cur_edge) 53 + { 54 + u32 idx; 55 + 56 + if (likely(*seg != *cur_edge)) 57 + return; 58 + 59 + idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); 60 + *cur_edge = get_sq_edge(sq, idx); 61 + 62 + *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); 63 + } 64 + 65 + /* mlx5r_memcpy_send_wqe - copy data from src to WQE and update the relevant 66 + * WQ's pointers. At the end @seg is aligned to 16B regardless the copied size. 67 + * @sq: SQ buffer. 68 + * @cur_edge: Updated current edge. 69 + * @seg: Current WQE position (16B aligned). 70 + * @wqe_sz: Total current WQE size [16B]. 71 + * @src: Pointer to copy from. 72 + * @n: Number of bytes to copy. 73 + */ 74 + static inline void mlx5r_memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, 75 + void **seg, u32 *wqe_sz, 76 + const void *src, size_t n) 77 + { 78 + while (likely(n)) { 79 + size_t leftlen = *cur_edge - *seg; 80 + size_t copysz = min_t(size_t, leftlen, n); 81 + size_t stride; 82 + 83 + memcpy(*seg, src, copysz); 84 + 85 + n -= copysz; 86 + src += copysz; 87 + stride = !n ? ALIGN(copysz, 16) : copysz; 88 + *seg += stride; 89 + *wqe_sz += stride >> 4; 90 + handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); 91 + } 92 + } 93 + 94 + int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq); 95 + int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg, 96 + struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx, 97 + int *size, void **cur_edge, int nreq, __be32 general_id, 98 + bool send_signaled, bool solicited); 99 + void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, 100 + void *seg, u8 size, void *cur_edge, unsigned int idx, 101 + u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode); 102 + void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq, 103 + struct mlx5_wqe_ctrl_seg *ctrl); 44 104 int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, 45 105 const struct ib_send_wr **bad_wr, bool drain); 46 106 int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+1 -1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
··· 90 90 IB_DEVICE_RC_RNR_NAK_GEN | 91 91 IB_DEVICE_SHUTDOWN_PORT | 92 92 IB_DEVICE_SYS_IMAGE_GUID | 93 - IB_DEVICE_LOCAL_DMA_LKEY | 94 93 IB_DEVICE_MEM_MGT_EXTENSIONS; 94 + attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; 95 95 attr->max_send_sge = dev->attr.max_send_sge; 96 96 attr->max_recv_sge = dev->attr.max_recv_sge; 97 97 attr->max_sge_rd = dev->attr.max_rdma_sge;
-1
drivers/infiniband/hw/qedr/main.c
··· 500 500 if (dev->int_info.msix_cnt) { 501 501 idx = i * dev->num_hwfns + dev->affin_hwfn_idx; 502 502 vector = dev->int_info.msix[idx].vector; 503 - synchronize_irq(vector); 504 503 free_irq(vector, &dev->cnq_array[i]); 505 504 } 506 505 }
+2 -1
drivers/infiniband/hw/qedr/verbs.c
··· 134 134 attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe); 135 135 attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | 136 136 IB_DEVICE_RC_RNR_NAK_GEN | 137 - IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; 137 + IB_DEVICE_MEM_MGT_EXTENSIONS; 138 + attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; 138 139 139 140 if (!rdma_protocol_iwarp(&dev->ibdev, 1)) 140 141 attr->device_cap_flags |= IB_DEVICE_XRC;
+1 -1
drivers/infiniband/hw/qib/qib.h
··· 678 678 /* Observers. Not to be taken lightly, possibly not to ship. */ 679 679 /* 680 680 * If a diag read or write is to (bottom <= offset <= top), 681 - * the "hoook" is called, allowing, e.g. shadows to be 681 + * the "hook" is called, allowing, e.g. shadows to be 682 682 * updated in sync with the driver. struct diag_observer 683 683 * is the "visible" part. 684 684 */
+5 -6
drivers/infiniband/hw/usnic/usnic_ib_main.c
··· 534 534 struct usnic_ib_vf *vf; 535 535 enum usnic_vnic_res_type res_type; 536 536 537 + if (!device_iommu_mapped(&pdev->dev)) { 538 + usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n"); 539 + return -EPERM; 540 + } 541 + 537 542 vf = kzalloc(sizeof(*vf), GFP_KERNEL); 538 543 if (!vf) 539 544 return -ENOMEM; ··· 646 641 int err; 647 642 648 643 printk_once(KERN_INFO "%s", usnic_version); 649 - 650 - err = usnic_uiom_init(DRV_NAME); 651 - if (err) { 652 - usnic_err("Unable to initialize umem with err %d\n", err); 653 - return err; 654 - } 655 644 656 645 err = pci_register_driver(&usnic_ib_pci_driver); 657 646 if (err) {
+3 -3
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
··· 305 305 props->max_qp = qp_per_vf * 306 306 kref_read(&us_ibdev->vf_cnt); 307 307 props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | 308 - IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 308 + IB_DEVICE_SYS_IMAGE_GUID; 309 + props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK; 309 310 props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] * 310 311 kref_read(&us_ibdev->vf_cnt); 311 312 props->max_pd = USNIC_UIOM_MAX_PD_CNT; ··· 443 442 { 444 443 struct usnic_ib_pd *pd = to_upd(ibpd); 445 444 446 - pd->umem_pd = usnic_uiom_alloc_pd(); 445 + pd->umem_pd = usnic_uiom_alloc_pd(ibpd->device->dev.parent); 447 446 if (IS_ERR(pd->umem_pd)) 448 447 return PTR_ERR(pd->umem_pd); 449 448 ··· 707 706 usnic_err("No VF %u found\n", vfid); 708 707 return -EINVAL; 709 708 } 710 -
+2 -13
drivers/infiniband/hw/usnic/usnic_uiom.c
··· 40 40 #include <linux/iommu.h> 41 41 #include <linux/workqueue.h> 42 42 #include <linux/list.h> 43 - #include <linux/pci.h> 44 43 #include <rdma/ib_verbs.h> 45 44 46 45 #include "usnic_log.h" ··· 438 439 __usnic_uiom_release_tail(uiomr); 439 440 } 440 441 441 - struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) 442 + struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev) 442 443 { 443 444 struct usnic_uiom_pd *pd; 444 445 void *domain; ··· 447 448 if (!pd) 448 449 return ERR_PTR(-ENOMEM); 449 450 450 - pd->domain = domain = iommu_domain_alloc(&pci_bus_type); 451 + pd->domain = domain = iommu_domain_alloc(dev->bus); 451 452 if (!domain) { 452 453 usnic_err("Failed to allocate IOMMU domain"); 453 454 kfree(pd); ··· 554 555 void usnic_uiom_free_dev_list(struct device **devs) 555 556 { 556 557 kfree(devs); 557 - } 558 - 559 - int usnic_uiom_init(char *drv_name) 560 - { 561 - if (!iommu_present(&pci_bus_type)) { 562 - usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n"); 563 - return -EPERM; 564 - } 565 - 566 - return 0; 567 558 }
+1 -2
drivers/infiniband/hw/usnic/usnic_uiom.h
··· 80 80 struct scatterlist page_list[]; 81 81 }; 82 82 83 - struct usnic_uiom_pd *usnic_uiom_alloc_pd(void); 83 + struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev); 84 84 void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd); 85 85 int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev); 86 86 void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, ··· 91 91 unsigned long addr, size_t size, 92 92 int access, int dmasync); 93 93 void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr); 94 - int usnic_uiom_init(char *drv_name); 95 94 #endif /* USNIC_UIOM_H_ */
+5 -1
drivers/infiniband/sw/rdmavt/qp.c
··· 2775 2775 EXPORT_SYMBOL(rvt_qp_iter); 2776 2776 2777 2777 /* 2778 - * This should be called with s_lock held. 2778 + * This should be called with s_lock and r_lock held. 2779 2779 */ 2780 2780 void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 2781 2781 enum ib_wc_status status) ··· 3134 3134 rvp->n_loop_pkts++; 3135 3135 flush_send: 3136 3136 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 3137 + spin_lock(&sqp->r_lock); 3137 3138 rvt_send_complete(sqp, wqe, send_status); 3139 + spin_unlock(&sqp->r_lock); 3138 3140 if (local_ops) { 3139 3141 atomic_dec(&sqp->local_ops_pending); 3140 3142 local_ops = 0; ··· 3190 3188 spin_unlock_irqrestore(&qp->r_lock, flags); 3191 3189 serr_no_r_lock: 3192 3190 spin_lock_irqsave(&sqp->s_lock, flags); 3191 + spin_lock(&sqp->r_lock); 3193 3192 rvt_send_complete(sqp, wqe, send_status); 3193 + spin_unlock(&sqp->r_lock); 3194 3194 if (sqp->ibqp.qp_type == IB_QPT_RC) { 3195 3195 int lastwqe; 3196 3196
+1
drivers/infiniband/sw/rxe/rxe.c
··· 46 46 rxe->attr.max_qp = RXE_MAX_QP; 47 47 rxe->attr.max_qp_wr = RXE_MAX_QP_WR; 48 48 rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; 49 + rxe->attr.kernel_cap_flags = IBK_ALLOW_USER_UNREG; 49 50 rxe->attr.max_send_sge = RXE_MAX_SGE; 50 51 rxe->attr.max_recv_sge = RXE_MAX_SGE; 51 52 rxe->attr.max_sge_rd = RXE_MAX_SGE_RD;
+2 -1
drivers/infiniband/sw/rxe/rxe_comp.c
··· 562 562 enum comp_state state; 563 563 int ret = 0; 564 564 565 - rxe_get(qp); 565 + if (!rxe_get(qp)) 566 + return -EAGAIN; 566 567 567 568 if (!qp->valid || qp->req.state == QP_STATE_ERROR || 568 569 qp->req.state == QP_STATE_RESET) {
+7 -10
drivers/infiniband/sw/rxe/rxe_loc.h
··· 37 37 38 38 void rxe_cq_disable(struct rxe_cq *cq); 39 39 40 - void rxe_cq_cleanup(struct rxe_pool_elem *arg); 40 + void rxe_cq_cleanup(struct rxe_pool_elem *elem); 41 41 42 42 /* rxe_mcast.c */ 43 43 struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid); ··· 81 81 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); 82 82 int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr); 83 83 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); 84 - void rxe_mr_cleanup(struct rxe_pool_elem *arg); 84 + void rxe_mr_cleanup(struct rxe_pool_elem *elem); 85 85 86 86 /* rxe_mw.c */ 87 87 int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata); ··· 89 89 int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe); 90 90 int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey); 91 91 struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey); 92 - void rxe_mw_cleanup(struct rxe_pool_elem *arg); 92 + void rxe_mw_cleanup(struct rxe_pool_elem *elem); 93 93 94 94 /* rxe_net.c */ 95 95 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, ··· 114 114 int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); 115 115 void rxe_qp_error(struct rxe_qp *qp); 116 116 int rxe_qp_chk_destroy(struct rxe_qp *qp); 117 - void rxe_qp_destroy(struct rxe_qp *qp); 118 117 void rxe_qp_cleanup(struct rxe_pool_elem *elem); 119 118 120 119 static inline int qp_num(struct rxe_qp *qp) ··· 158 159 void rnr_nak_timer(struct timer_list *t); 159 160 160 161 /* rxe_srq.c */ 161 - #define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT) 162 - 163 - int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, 164 - struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); 165 - 162 + int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init); 166 163 int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, 167 164 struct ib_srq_init_attr *init, struct ib_udata *udata, 168 165 struct rxe_create_srq_resp __user *uresp); 169 - 166 + int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, 167 + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); 170 168 int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, 171 169 struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, 172 170 struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata); 171 + void rxe_srq_cleanup(struct rxe_pool_elem *elem); 173 172 174 173 void rxe_dealloc(struct ib_device *ib_dev); 175 174
+4 -2
drivers/infiniband/sw/rxe/rxe_mcast.c
··· 206 206 207 207 /* speculative alloc of new mcg */ 208 208 mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); 209 - if (!mcg) 210 - return ERR_PTR(-ENOMEM); 209 + if (!mcg) { 210 + err = -ENOMEM; 211 + goto err_dec; 212 + } 211 213 212 214 spin_lock_bh(&rxe->mcg_lock); 213 215 /* re-check to see if someone else just added it */
+4 -6
drivers/infiniband/sw/rxe/rxe_mr.c
··· 683 683 { 684 684 struct rxe_mr *mr = to_rmr(ibmr); 685 685 686 - if (atomic_read(&mr->num_mw) > 0) { 687 - pr_warn("%s: Attempt to deregister an MR while bound to MWs\n", 688 - __func__); 686 + /* See IBA 10.6.7.2.6 */ 687 + if (atomic_read(&mr->num_mw) > 0) 689 688 return -EINVAL; 690 - } 691 689 692 - mr->state = RXE_MR_STATE_INVALID; 693 - rxe_put(mr_pd(mr)); 694 690 rxe_put(mr); 695 691 696 692 return 0; ··· 695 699 void rxe_mr_cleanup(struct rxe_pool_elem *elem) 696 700 { 697 701 struct rxe_mr *mr = container_of(elem, typeof(*mr), elem); 702 + 703 + rxe_put(mr_pd(mr)); 698 704 699 705 ib_umem_release(mr->umem); 700 706
+36 -29
drivers/infiniband/sw/rxe/rxe_mw.c
··· 3 3 * Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved. 4 4 */ 5 5 6 + /* 7 + * The rdma_rxe driver supports type 1 or type 2B memory windows. 8 + * Type 1 MWs are created by ibv_alloc_mw() verbs calls and bound by 9 + * ibv_bind_mw() calls. Type 2 MWs are also created by ibv_alloc_mw() 10 + * but bound by bind_mw work requests. The ibv_bind_mw() call is converted 11 + * by libibverbs to a bind_mw work request. 12 + */ 13 + 6 14 #include "rxe.h" 7 15 8 16 int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) ··· 36 28 return 0; 37 29 } 38 30 39 - static void rxe_do_dealloc_mw(struct rxe_mw *mw) 40 - { 41 - if (mw->mr) { 42 - struct rxe_mr *mr = mw->mr; 43 - 44 - mw->mr = NULL; 45 - atomic_dec(&mr->num_mw); 46 - rxe_put(mr); 47 - } 48 - 49 - if (mw->qp) { 50 - struct rxe_qp *qp = mw->qp; 51 - 52 - mw->qp = NULL; 53 - rxe_put(qp); 54 - } 55 - 56 - mw->access = 0; 57 - mw->addr = 0; 58 - mw->length = 0; 59 - mw->state = RXE_MW_STATE_INVALID; 60 - } 61 - 62 31 int rxe_dealloc_mw(struct ib_mw *ibmw) 63 32 { 64 33 struct rxe_mw *mw = to_rmw(ibmw); 65 - struct rxe_pd *pd = to_rpd(ibmw->pd); 66 - 67 - spin_lock_bh(&mw->lock); 68 - rxe_do_dealloc_mw(mw); 69 - spin_unlock_bh(&mw->lock); 70 34 71 35 rxe_put(mw); 72 - rxe_put(pd); 73 36 74 37 return 0; 75 38 } ··· 306 327 } 307 328 308 329 return mw; 330 + } 331 + 332 + void rxe_mw_cleanup(struct rxe_pool_elem *elem) 333 + { 334 + struct rxe_mw *mw = container_of(elem, typeof(*mw), elem); 335 + struct rxe_pd *pd = to_rpd(mw->ibmw.pd); 336 + 337 + rxe_put(pd); 338 + 339 + if (mw->mr) { 340 + struct rxe_mr *mr = mw->mr; 341 + 342 + mw->mr = NULL; 343 + atomic_dec(&mr->num_mw); 344 + rxe_put(mr); 345 + } 346 + 347 + if (mw->qp) { 348 + struct rxe_qp *qp = mw->qp; 349 + 350 + mw->qp = NULL; 351 + rxe_put(qp); 352 + } 353 + 354 + mw->access = 0; 355 + mw->addr = 0; 356 + mw->length = 0; 357 + mw->state = RXE_MW_STATE_INVALID; 309 358 }
-2
drivers/infiniband/sw/rxe/rxe_opcode.c
··· 29 29 [IB_WR_SEND] = { 30 30 .name = "IB_WR_SEND", 31 31 .mask = { 32 - [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, 33 32 [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, 34 33 [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, 35 34 [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, ··· 38 39 [IB_WR_SEND_WITH_IMM] = { 39 40 .name = "IB_WR_SEND_WITH_IMM", 40 41 .mask = { 41 - [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, 42 42 [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, 43 43 [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, 44 44 [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
-2
drivers/infiniband/sw/rxe/rxe_param.h
··· 50 50 | IB_DEVICE_RC_RNR_NAK_GEN 51 51 | IB_DEVICE_SRQ_RESIZE 52 52 | IB_DEVICE_MEM_MGT_EXTENSIONS 53 - | IB_DEVICE_ALLOW_USER_UNREG 54 53 | IB_DEVICE_MEM_WINDOW 55 - | IB_DEVICE_MEM_WINDOW_TYPE_2A 56 54 | IB_DEVICE_MEM_WINDOW_TYPE_2B, 57 55 RXE_MAX_SGE = 32, 58 56 RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) +
+5 -6
drivers/infiniband/sw/rxe/rxe_pool.c
··· 13 13 size_t size; 14 14 size_t elem_offset; 15 15 void (*cleanup)(struct rxe_pool_elem *elem); 16 - enum rxe_pool_flags flags; 17 16 u32 min_index; 18 17 u32 max_index; 19 18 u32 max_elem; ··· 45 46 .name = "srq", 46 47 .size = sizeof(struct rxe_srq), 47 48 .elem_offset = offsetof(struct rxe_srq, elem), 49 + .cleanup = rxe_srq_cleanup, 48 50 .min_index = RXE_MIN_SRQ_INDEX, 49 51 .max_index = RXE_MAX_SRQ_INDEX, 50 52 .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1, ··· 73 73 .size = sizeof(struct rxe_mr), 74 74 .elem_offset = offsetof(struct rxe_mr, elem), 75 75 .cleanup = rxe_mr_cleanup, 76 - .flags = RXE_POOL_ALLOC, 77 76 .min_index = RXE_MIN_MR_INDEX, 78 77 .max_index = RXE_MAX_MR_INDEX, 79 78 .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1, ··· 81 82 .name = "mw", 82 83 .size = sizeof(struct rxe_mw), 83 84 .elem_offset = offsetof(struct rxe_mw, elem), 85 + .cleanup = rxe_mw_cleanup, 84 86 .min_index = RXE_MIN_MW_INDEX, 85 87 .max_index = RXE_MAX_MW_INDEX, 86 88 .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1, ··· 101 101 pool->max_elem = info->max_elem; 102 102 pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN); 103 103 pool->elem_offset = info->elem_offset; 104 - pool->flags = info->flags; 105 104 pool->cleanup = info->cleanup; 106 105 107 106 atomic_set(&pool->num_elem, 0); ··· 121 122 void *obj; 122 123 int err; 123 124 124 - if (WARN_ON(!(pool->flags & RXE_POOL_ALLOC))) 125 + if (WARN_ON(!(pool->type == RXE_TYPE_MR))) 125 126 return NULL; 126 127 127 128 if (atomic_inc_return(&pool->num_elem) > pool->max_elem) ··· 155 156 { 156 157 int err; 157 158 158 - if (WARN_ON(pool->flags & RXE_POOL_ALLOC)) 159 + if (WARN_ON(pool->type == RXE_TYPE_MR)) 159 160 return -EINVAL; 160 161 161 162 if (atomic_inc_return(&pool->num_elem) > pool->max_elem) ··· 205 206 if (pool->cleanup) 206 207 pool->cleanup(elem); 207 208 208 - if (pool->flags & RXE_POOL_ALLOC) 209 + if (pool->type == RXE_TYPE_MR) 209 210 kfree(elem->obj); 210 211 211 212 atomic_dec(&pool->num_elem);
-5
drivers/infiniband/sw/rxe/rxe_pool.h
··· 7 7 #ifndef RXE_POOL_H 8 8 #define RXE_POOL_H 9 9 10 - enum rxe_pool_flags { 11 - RXE_POOL_ALLOC = BIT(1), 12 - }; 13 - 14 10 enum rxe_elem_type { 15 11 RXE_TYPE_UC, 16 12 RXE_TYPE_PD, ··· 31 35 struct rxe_dev *rxe; 32 36 const char *name; 33 37 void (*cleanup)(struct rxe_pool_elem *elem); 34 - enum rxe_pool_flags flags; 35 38 enum rxe_elem_type type; 36 39 37 40 unsigned int max_elem;
+15 -21
drivers/infiniband/sw/rxe/rxe_qp.c
··· 63 63 int port_num = init->port_num; 64 64 65 65 switch (init->qp_type) { 66 - case IB_QPT_SMI: 67 66 case IB_QPT_GSI: 68 67 case IB_QPT_RC: 69 68 case IB_QPT_UC: ··· 80 81 if (rxe_qp_chk_cap(rxe, cap, !!init->srq)) 81 82 goto err1; 82 83 83 - if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { 84 + if (init->qp_type == IB_QPT_GSI) { 84 85 if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { 85 86 pr_warn("invalid port = %d\n", port_num); 86 87 goto err1; 87 88 } 88 89 89 90 port = &rxe->port; 90 - 91 - if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) { 92 - pr_warn("SMI QP exists for port %d\n", port_num); 93 - goto err1; 94 - } 95 91 96 92 if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { 97 93 pr_warn("GSI QP exists for port %d\n", port_num); ··· 161 167 port = &rxe->port; 162 168 163 169 switch (init->qp_type) { 164 - case IB_QPT_SMI: 165 - qp->ibqp.qp_num = 0; 166 - port->qp_smi_index = qpn; 167 - qp->attr.port_num = init->port_num; 168 - break; 169 - 170 170 case IB_QPT_GSI: 171 171 qp->ibqp.qp_num = 1; 172 172 port->qp_gsi_index = qpn; ··· 322 334 qp->scq = scq; 323 335 qp->srq = srq; 324 336 337 + atomic_inc(&rcq->num_wq); 338 + atomic_inc(&scq->num_wq); 339 + 325 340 rxe_qp_init_misc(rxe, qp, init); 326 341 327 342 err = rxe_qp_init_req(rxe, qp, init, udata, uresp); ··· 344 353 rxe_queue_cleanup(qp->sq.queue); 345 354 qp->sq.queue = NULL; 346 355 err1: 356 + atomic_dec(&rcq->num_wq); 357 + atomic_dec(&scq->num_wq); 358 + 347 359 qp->pd = NULL; 348 360 qp->rcq = NULL; 349 361 qp->scq = NULL; ··· 771 777 return 0; 772 778 } 773 779 774 - /* called by the destroy qp verb */ 775 - void rxe_qp_destroy(struct rxe_qp *qp) 780 + /* called when the last reference to the qp is dropped */ 781 + static void rxe_qp_do_cleanup(struct work_struct *work) 776 782 { 783 + struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); 784 + 777 785 qp->valid = 0; 778 786 qp->qp_timeout_jiffies = 0; 779 787 rxe_cleanup_task(&qp->resp.task); ··· 794 798 __rxe_do_task(&qp->comp.task); 795 799 __rxe_do_task(&qp->req.task); 796 800 } 797 - } 798 - 799 - /* called when the last reference to the qp is dropped */ 800 - static void rxe_qp_do_cleanup(struct work_struct *work) 801 - { 802 - struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); 803 801 804 802 if (qp->sq.queue) 805 803 rxe_queue_cleanup(qp->sq.queue); ··· 804 814 if (qp->rq.queue) 805 815 rxe_queue_cleanup(qp->rq.queue); 806 816 817 + atomic_dec(&qp->scq->num_wq); 807 818 if (qp->scq) 808 819 rxe_put(qp->scq); 820 + 821 + atomic_dec(&qp->rcq->num_wq); 809 822 if (qp->rcq) 810 823 rxe_put(qp->rcq); 824 + 811 825 if (qp->pd) 812 826 rxe_put(qp->pd); 813 827
-1
drivers/infiniband/sw/rxe/rxe_recv.c
··· 34 34 } 35 35 break; 36 36 case IB_QPT_UD: 37 - case IB_QPT_SMI: 38 37 case IB_QPT_GSI: 39 38 if (unlikely(pkt_type != IB_OPCODE_UD)) { 40 39 pr_warn_ratelimited("bad qp type\n");
+12 -16
drivers/infiniband/sw/rxe/rxe_req.c
··· 33 33 } else { 34 34 advance_dma_data(&wqe->dma, to_send); 35 35 } 36 - if (mask & WR_WRITE_MASK) 37 - wqe->iova += qp->mtu; 38 36 } 39 37 } 40 38 ··· 306 308 case IB_QPT_UC: 307 309 return next_opcode_uc(qp, opcode, fits); 308 310 309 - case IB_QPT_SMI: 310 311 case IB_QPT_UD: 311 312 case IB_QPT_GSI: 312 313 switch (opcode) { ··· 411 414 412 415 if (pkt->mask & RXE_ATMETH_MASK) { 413 416 atmeth_set_va(pkt, wqe->iova); 414 - if (opcode == IB_OPCODE_RC_COMPARE_SWAP || 415 - opcode == IB_OPCODE_RD_COMPARE_SWAP) { 417 + if (opcode == IB_OPCODE_RC_COMPARE_SWAP) { 416 418 atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap); 417 419 atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add); 418 420 } else { ··· 433 437 434 438 static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, 435 439 struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, 436 - struct sk_buff *skb, u32 paylen) 440 + struct sk_buff *skb, u32 payload) 437 441 { 438 442 int err; 439 443 ··· 445 449 if (wqe->wr.send_flags & IB_SEND_INLINE) { 446 450 u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; 447 451 448 - memcpy(payload_addr(pkt), tmp, paylen); 452 + memcpy(payload_addr(pkt), tmp, payload); 449 453 450 - wqe->dma.resid -= paylen; 451 - wqe->dma.sge_offset += paylen; 454 + wqe->dma.resid -= payload; 455 + wqe->dma.sge_offset += payload; 452 456 } else { 453 457 err = copy_data(qp->pd, 0, &wqe->dma, 454 - payload_addr(pkt), paylen, 458 + payload_addr(pkt), payload, 455 459 RXE_FROM_MR_OBJ); 456 460 if (err) 457 461 return err; 458 462 } 459 463 if (bth_pad(pkt)) { 460 - u8 *pad = payload_addr(pkt) + paylen; 464 + u8 *pad = payload_addr(pkt) + payload; 461 465 462 466 memset(pad, 0, bth_pad(pkt)); 463 467 } ··· 523 527 qp->req.psn = rollback_psn; 524 528 } 525 529 526 - static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 527 - struct rxe_pkt_info *pkt) 530 + static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 528 531 { 529 532 qp->req.opcode = pkt->opcode; 530 533 ··· 606 611 struct rxe_ah *ah; 607 612 struct rxe_av *av; 608 613 609 - rxe_get(qp); 614 + if (!rxe_get(qp)) 615 + return -EAGAIN; 610 616 611 617 next_wqe: 612 618 if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) ··· 657 661 opcode = next_opcode(qp, wqe, wqe->wr.opcode); 658 662 if (unlikely(opcode < 0)) { 659 663 wqe->status = IB_WC_LOC_QP_OP_ERR; 660 - goto exit; 664 + goto err; 661 665 } 662 666 663 667 mask = rxe_opcode[opcode].mask; ··· 751 755 goto err; 752 756 } 753 757 754 - update_state(qp, wqe, &pkt); 758 + update_state(qp, &pkt); 755 759 756 760 goto next_wqe; 757 761
+3 -5
drivers/infiniband/sw/rxe/rxe_resp.c
··· 277 277 break; 278 278 279 279 case IB_QPT_UD: 280 - case IB_QPT_SMI: 281 280 case IB_QPT_GSI: 282 281 break; 283 282 ··· 576 577 577 578 qp->resp.atomic_orig = *vaddr; 578 579 579 - if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP || 580 - pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) { 580 + if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) { 581 581 if (*vaddr == atmeth_comp(pkt)) 582 582 *vaddr = atmeth_swap_add(pkt); 583 583 } else { ··· 832 834 833 835 if (pkt->mask & RXE_SEND_MASK) { 834 836 if (qp_type(qp) == IB_QPT_UD || 835 - qp_type(qp) == IB_QPT_SMI || 836 837 qp_type(qp) == IB_QPT_GSI) { 837 838 if (skb->protocol == htons(ETH_P_IP)) { 838 839 memset(&hdr.reserved, 0, ··· 1262 1265 struct rxe_pkt_info *pkt = NULL; 1263 1266 int ret = 0; 1264 1267 1265 - rxe_get(qp); 1268 + if (!rxe_get(qp)) 1269 + return -EAGAIN; 1266 1270 1267 1271 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; 1268 1272
+80 -49
drivers/infiniband/sw/rxe/rxe_srq.c
··· 6 6 7 7 #include <linux/vmalloc.h> 8 8 #include "rxe.h" 9 - #include "rxe_loc.h" 10 9 #include "rxe_queue.h" 11 10 12 - int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, 13 - struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) 11 + int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) 14 12 { 15 - if (srq && srq->error) { 16 - pr_warn("srq in error state\n"); 13 + struct ib_srq_attr *attr = &init->attr; 14 + 15 + if (attr->max_wr > rxe->attr.max_srq_wr) { 16 + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", 17 + attr->max_wr, rxe->attr.max_srq_wr); 17 18 goto err1; 18 19 } 19 20 20 - if (mask & IB_SRQ_MAX_WR) { 21 - if (attr->max_wr > rxe->attr.max_srq_wr) { 22 - pr_warn("max_wr(%d) > max_srq_wr(%d)\n", 23 - attr->max_wr, rxe->attr.max_srq_wr); 24 - goto err1; 25 - } 26 - 27 - if (attr->max_wr <= 0) { 28 - pr_warn("max_wr(%d) <= 0\n", attr->max_wr); 29 - goto err1; 30 - } 31 - 32 - if (srq && srq->limit && (attr->max_wr < srq->limit)) { 33 - pr_warn("max_wr (%d) < srq->limit (%d)\n", 34 - attr->max_wr, srq->limit); 35 - goto err1; 36 - } 37 - 38 - if (attr->max_wr < RXE_MIN_SRQ_WR) 39 - attr->max_wr = RXE_MIN_SRQ_WR; 21 + if (attr->max_wr <= 0) { 22 + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); 23 + goto err1; 40 24 } 41 25 42 - if (mask & IB_SRQ_LIMIT) { 43 - if (attr->srq_limit > rxe->attr.max_srq_wr) { 44 - pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", 45 - attr->srq_limit, rxe->attr.max_srq_wr); 46 - goto err1; 47 - } 26 + if (attr->max_wr < RXE_MIN_SRQ_WR) 27 + attr->max_wr = RXE_MIN_SRQ_WR; 48 28 49 - if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) { 50 - pr_warn("srq_limit (%d) > cur limit(%d)\n", 51 - attr->srq_limit, 52 - srq->rq.queue->buf->index_mask); 53 - goto err1; 54 - } 29 + if (attr->max_sge > rxe->attr.max_srq_sge) { 30 + pr_warn("max_sge(%d) > max_srq_sge(%d)\n", 31 + attr->max_sge, rxe->attr.max_srq_sge); 32 + goto err1; 55 33 } 56 34 57 - if (mask == IB_SRQ_INIT_MASK) { 58 - if (attr->max_sge > rxe->attr.max_srq_sge) { 59 - pr_warn("max_sge(%d) > max_srq_sge(%d)\n", 60 - attr->max_sge, rxe->attr.max_srq_sge); 61 - goto err1; 62 - } 63 - 64 - if (attr->max_sge < RXE_MIN_SRQ_SGE) 65 - attr->max_sge = RXE_MIN_SRQ_SGE; 66 - } 35 + if (attr->max_sge < RXE_MIN_SRQ_SGE) 36 + attr->max_sge = RXE_MIN_SRQ_SGE; 67 37 68 38 return 0; 69 39 ··· 63 93 spin_lock_init(&srq->rq.consumer_lock); 64 94 65 95 type = QUEUE_TYPE_FROM_CLIENT; 66 - q = rxe_queue_init(rxe, &srq->rq.max_wr, 67 - srq_wqe_size, type); 96 + q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); 68 97 if (!q) { 69 98 pr_warn("unable to allocate queue for srq\n"); 70 99 return -ENOMEM; ··· 88 119 } 89 120 90 121 return 0; 122 + } 123 + 124 + int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, 125 + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) 126 + { 127 + if (srq->error) { 128 + pr_warn("srq in error state\n"); 129 + goto err1; 130 + } 131 + 132 + if (mask & IB_SRQ_MAX_WR) { 133 + if (attr->max_wr > rxe->attr.max_srq_wr) { 134 + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", 135 + attr->max_wr, rxe->attr.max_srq_wr); 136 + goto err1; 137 + } 138 + 139 + if (attr->max_wr <= 0) { 140 + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); 141 + goto err1; 142 + } 143 + 144 + if (srq->limit && (attr->max_wr < srq->limit)) { 145 + pr_warn("max_wr (%d) < srq->limit (%d)\n", 146 + attr->max_wr, srq->limit); 147 + goto err1; 148 + } 149 + 150 + if (attr->max_wr < RXE_MIN_SRQ_WR) 151 + attr->max_wr = RXE_MIN_SRQ_WR; 152 + } 153 + 154 + if (mask & IB_SRQ_LIMIT) { 155 + if (attr->srq_limit > rxe->attr.max_srq_wr) { 156 + pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", 157 + attr->srq_limit, rxe->attr.max_srq_wr); 158 + goto err1; 159 + } 160 + 161 + if (attr->srq_limit > srq->rq.queue->buf->index_mask) { 162 + pr_warn("srq_limit (%d) > cur limit(%d)\n", 163 + attr->srq_limit, 164 + srq->rq.queue->buf->index_mask); 165 + goto err1; 166 + } 167 + } 168 + 169 + return 0; 170 + 171 + err1: 172 + return -EINVAL; 91 173 } 92 174 93 175 int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, ··· 173 153 rxe_queue_cleanup(q); 174 154 srq->rq.queue = NULL; 175 155 return err; 156 + } 157 + 158 + void rxe_srq_cleanup(struct rxe_pool_elem *elem) 159 + { 160 + struct rxe_srq *srq = container_of(elem, typeof(*srq), elem); 161 + 162 + if (srq->pd) 163 + rxe_put(srq->pd); 164 + 165 + if (srq->rq.queue) 166 + rxe_queue_cleanup(srq->rq.queue); 176 167 }
+17 -23
drivers/infiniband/sw/rxe/rxe_verbs.c
··· 7 7 #include <linux/dma-mapping.h> 8 8 #include <net/addrconf.h> 9 9 #include <rdma/uverbs_ioctl.h> 10 + 10 11 #include "rxe.h" 11 - #include "rxe_loc.h" 12 12 #include "rxe_queue.h" 13 13 #include "rxe_hw_counters.h" 14 14 ··· 286 286 struct rxe_srq *srq = to_rsrq(ibsrq); 287 287 struct rxe_create_srq_resp __user *uresp = NULL; 288 288 289 - if (init->srq_type != IB_SRQT_BASIC) 290 - return -EOPNOTSUPP; 291 - 292 289 if (udata) { 293 290 if (udata->outlen < sizeof(*uresp)) 294 291 return -EINVAL; 295 292 uresp = udata->outbuf; 296 293 } 297 294 298 - err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); 295 + if (init->srq_type != IB_SRQT_BASIC) 296 + return -EOPNOTSUPP; 297 + 298 + err = rxe_srq_chk_init(rxe, init); 299 299 if (err) 300 - goto err1; 300 + return err; 301 301 302 302 err = rxe_add_to_pool(&rxe->srq_pool, srq); 303 303 if (err) 304 - goto err1; 304 + return err; 305 305 306 306 rxe_get(pd); 307 307 srq->pd = pd; 308 308 309 309 err = rxe_srq_from_init(rxe, srq, init, udata, uresp); 310 310 if (err) 311 - goto err2; 311 + goto err_put; 312 312 313 313 return 0; 314 314 315 - err2: 316 - rxe_put(pd); 315 + err_put: 317 316 rxe_put(srq); 318 - err1: 319 317 return err; 320 318 } 321 319 ··· 337 339 338 340 err = rxe_srq_chk_attr(rxe, srq, attr, mask); 339 341 if (err) 340 - goto err1; 342 + return err; 341 343 342 344 err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); 343 345 if (err) 344 - goto err1; 345 - 346 + return err; 346 347 return 0; 347 - 348 - err1: 349 - return err; 350 348 } 351 349 352 350 static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) ··· 362 368 { 363 369 struct rxe_srq *srq = to_rsrq(ibsrq); 364 370 365 - if (srq->rq.queue) 366 - rxe_queue_cleanup(srq->rq.queue); 367 - 368 - rxe_put(srq->pd); 369 371 rxe_put(srq); 370 372 return 0; 371 373 } ··· 485 495 if (ret) 486 496 return ret; 487 497 488 - rxe_qp_destroy(qp); 489 498 rxe_put(qp); 490 499 return 0; 491 500 } ··· 525 536 wr->send_flags = ibwr->send_flags; 526 537 527 538 if (qp_type(qp) == IB_QPT_UD || 528 - qp_type(qp) == IB_QPT_SMI || 529 539 qp_type(qp) == IB_QPT_GSI) { 530 540 struct ib_ah *ibah = ud_wr(ibwr)->ah; 531 541 ··· 794 806 static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) 795 807 { 796 808 struct rxe_cq *cq = to_rcq(ibcq); 809 + 810 + /* See IBA C11-17: The CI shall return an error if this Verb is 811 + * invoked while a Work Queue is still associated with the CQ. 812 + */ 813 + if (atomic_read(&cq->num_wq)) 814 + return -EINVAL; 797 815 798 816 rxe_cq_disable(cq); 799 817
+1 -2
drivers/infiniband/sw/rxe/rxe_verbs.h
··· 67 67 bool is_dying; 68 68 bool is_user; 69 69 struct tasklet_struct comp_task; 70 + atomic_t num_wq; 70 71 }; 71 72 72 73 enum wqe_state { ··· 374 373 spinlock_t port_lock; /* guard port */ 375 374 unsigned int mtu_cap; 376 375 /* special QPs */ 377 - u32 qp_smi_index; 378 376 u32 qp_gsi_index; 379 377 }; 380 378 ··· 394 394 struct rxe_pool cq_pool; 395 395 struct rxe_pool mr_pool; 396 396 struct rxe_pool mw_pool; 397 - struct rxe_pool mc_grp_pool; 398 397 399 398 /* multicast support */ 400 399 spinlock_t mcg_lock;
+3 -2
drivers/infiniband/sw/siw/siw_main.c
··· 119 119 * <linux/if_arp.h> for type identifiers. 120 120 */ 121 121 if (netdev->type == ARPHRD_ETHER || netdev->type == ARPHRD_IEEE802 || 122 + netdev->type == ARPHRD_NONE || 122 123 (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) 123 124 return 1; 124 125 ··· 316 315 317 316 sdev->netdev = netdev; 318 317 319 - if (netdev->type != ARPHRD_LOOPBACK) { 318 + if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) { 320 319 addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, 321 320 netdev->dev_addr); 322 321 } else { 323 322 /* 324 - * The loopback device does not have a HW address, 323 + * This device does not have a HW address, 325 324 * but connection mangagement lib expects gid != 0 326 325 */ 327 326 size_t len = min_t(size_t, strlen(base_dev->name), 6);
+2 -2
drivers/infiniband/sw/siw/siw_verbs.c
··· 132 132 133 133 /* Revisit atomic caps if RFC 7306 gets supported */ 134 134 attr->atomic_cap = 0; 135 - attr->device_cap_flags = 136 - IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_ALLOW_USER_UNREG; 135 + attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; 136 + attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG; 137 137 attr->max_cq = sdev->attrs.max_cq; 138 138 attr->max_cqe = sdev->attrs.max_cqe; 139 139 attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL;
+1
drivers/infiniband/ulp/ipoib/ipoib.h
··· 411 411 struct dentry *path_dentry; 412 412 #endif 413 413 u64 hca_caps; 414 + u64 kernel_caps; 414 415 struct ipoib_ethtool_st ethtool; 415 416 unsigned int max_send_sge; 416 417 const struct net_device_ops *rn_ops;
+3 -2
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 1850 1850 static void ipoib_set_dev_features(struct ipoib_dev_priv *priv) 1851 1851 { 1852 1852 priv->hca_caps = priv->ca->attrs.device_cap_flags; 1853 + priv->kernel_caps = priv->ca->attrs.kernel_cap_flags; 1853 1854 1854 1855 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { 1855 1856 priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; 1856 1857 1857 - if (priv->hca_caps & IB_DEVICE_UD_TSO) 1858 + if (priv->kernel_caps & IBK_UD_TSO) 1858 1859 priv->dev->hw_features |= NETIF_F_TSO; 1859 1860 1860 1861 priv->dev->features |= priv->dev->hw_features; ··· 2202 2201 2203 2202 priv->rn_ops = dev->netdev_ops; 2204 2203 2205 - if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION) 2204 + if (hca->attrs.kernel_cap_flags & IBK_VIRTUAL_FUNCTION) 2206 2205 dev->netdev_ops = &ipoib_netdev_ops_vf; 2207 2206 else 2208 2207 dev->netdev_ops = &ipoib_netdev_ops_pf;
+3 -3
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
··· 197 197 init_attr.send_cq = priv->send_cq; 198 198 init_attr.recv_cq = priv->recv_cq; 199 199 200 - if (priv->hca_caps & IB_DEVICE_UD_TSO) 200 + if (priv->kernel_caps & IBK_UD_TSO) 201 201 init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; 202 202 203 - if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) 203 + if (priv->kernel_caps & IBK_BLOCK_MULTICAST_LOOPBACK) 204 204 init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; 205 205 206 206 if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) 207 207 init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; 208 208 209 - if (priv->hca_caps & IB_DEVICE_RDMA_NETDEV_OPA) 209 + if (priv->kernel_caps & IBK_RDMA_NETDEV_OPA) 210 210 init_attr.create_flags |= IB_QP_CREATE_NETDEV_USE; 211 211 212 212 priv->qp = ib_create_qp(priv->pd, &init_attr);
+1 -1
drivers/infiniband/ulp/iser/iscsi_iser.c
··· 650 650 SHOST_DIX_GUARD_CRC); 651 651 } 652 652 653 - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) 653 + if (!(ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) 654 654 shost->virt_boundary_mask = SZ_4K - 1; 655 655 656 656 if (iscsi_host_add(shost, ib_dev->dev.parent)) {
+1 -1
drivers/infiniband/ulp/iser/iscsi_iser.h
··· 363 363 * @cq: Connection completion queue 364 364 * @cq_size: The number of max outstanding completions 365 365 * @device: reference to iser device 366 - * @fr_pool: connection fast registration poool 366 + * @fr_pool: connection fast registration pool 367 367 * @pi_support: Indicate device T10-PI support 368 368 * @reg_cqe: completion handler 369 369 */
+4 -4
drivers/infiniband/ulp/iser/iser_verbs.c
··· 115 115 if (!desc) 116 116 return ERR_PTR(-ENOMEM); 117 117 118 - if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) 118 + if (ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) 119 119 mr_type = IB_MR_TYPE_SG_GAPS; 120 120 else 121 121 mr_type = IB_MR_TYPE_MEM_REG; ··· 517 517 * (head and tail) for a single page worth data, so one additional 518 518 * entry is required. 519 519 */ 520 - if (attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG) 520 + if (attr->kernel_cap_flags & IBK_SG_GAPS_REG) 521 521 reserved_mr_pages = 0; 522 522 else 523 523 reserved_mr_pages = 1; ··· 562 562 563 563 /* connection T10-PI support */ 564 564 if (iser_pi_enable) { 565 - if (!(device->ib_device->attrs.device_cap_flags & 566 - IB_DEVICE_INTEGRITY_HANDOVER)) { 565 + if (!(device->ib_device->attrs.kernel_cap_flags & 566 + IBK_INTEGRITY_HANDOVER)) { 567 567 iser_warn("T10-PI requested but not supported on %s, " 568 568 "continue without T10-PI\n", 569 569 dev_name(&ib_conn->device->ib_device->dev));
+17 -10
drivers/infiniband/ulp/isert/ib_isert.c
··· 42 42 43 43 static DEFINE_MUTEX(device_list_mutex); 44 44 static LIST_HEAD(device_list); 45 + static struct workqueue_struct *isert_login_wq; 45 46 static struct workqueue_struct *isert_comp_wq; 46 47 static struct workqueue_struct *isert_release_wq; 47 48 ··· 231 230 } 232 231 233 232 /* Check signature cap */ 234 - if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER) 233 + if (ib_dev->attrs.kernel_cap_flags & IBK_INTEGRITY_HANDOVER) 235 234 device->pi_capable = true; 236 235 else 237 236 device->pi_capable = false; ··· 1018 1017 complete(&isert_conn->login_comp); 1019 1018 return; 1020 1019 } 1021 - schedule_delayed_work(&conn->login_work, 0); 1020 + queue_delayed_work(isert_login_wq, &conn->login_work, 0); 1022 1021 } 1023 1022 1024 1023 static struct iscsit_cmd ··· 2349 2348 2350 2349 /* 2351 2350 * For login requests after the first PDU, isert_rx_login_req() will 2352 - * kick schedule_delayed_work(&conn->login_work) as the packet is 2353 - * received, which turns this callback from iscsi_target_do_login_rx() 2354 - * into a NOP. 2351 + * kick queue_delayed_work(isert_login_wq, &conn->login_work) as 2352 + * the packet is received, which turns this callback from 2353 + * iscsi_target_do_login_rx() into a NOP. 2355 2354 */ 2356 2355 if (!login->first_request) 2357 2356 return 0; ··· 2607 2606 2608 2607 static int __init isert_init(void) 2609 2608 { 2610 - int ret; 2609 + isert_login_wq = alloc_workqueue("isert_login_wq", 0, 0); 2610 + if (!isert_login_wq) { 2611 + isert_err("Unable to allocate isert_login_wq\n"); 2612 + return -ENOMEM; 2613 + } 2611 2614 2612 2615 isert_comp_wq = alloc_workqueue("isert_comp_wq", 2613 2616 WQ_UNBOUND | WQ_HIGHPRI, 0); 2614 2617 if (!isert_comp_wq) { 2615 2618 isert_err("Unable to allocate isert_comp_wq\n"); 2616 - return -ENOMEM; 2619 + goto destroy_login_wq; 2617 2620 } 2618 2621 2619 2622 isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND, 2620 2623 WQ_UNBOUND_MAX_ACTIVE); 2621 2624 if (!isert_release_wq) { 2622 2625 isert_err("Unable to allocate isert_release_wq\n"); 2623 - ret = -ENOMEM; 2624 2626 goto destroy_comp_wq; 2625 2627 } 2626 2628 ··· 2634 2630 2635 2631 destroy_comp_wq: 2636 2632 destroy_workqueue(isert_comp_wq); 2633 + destroy_login_wq: 2634 + destroy_workqueue(isert_login_wq); 2637 2635 2638 - return ret; 2636 + return -ENOMEM; 2639 2637 } 2640 2638 2641 2639 static void __exit isert_exit(void) 2642 2640 { 2643 - flush_scheduled_work(); 2641 + flush_workqueue(isert_login_wq); 2644 2642 destroy_workqueue(isert_release_wq); 2645 2643 destroy_workqueue(isert_comp_wq); 2646 2644 iscsit_unregister_transport(&iser_target_transport); 2647 2645 isert_info("iSER_TARGET[0] - Released iser_target_transport\n"); 2646 + destroy_workqueue(isert_login_wq); 2648 2647 } 2649 2648 2650 2649 MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure");
+1 -1
drivers/infiniband/ulp/rtrs/rtrs-clt.c
··· 2785 2785 /** 2786 2786 * rtrs_clt_open() - Open a path to an RTRS server 2787 2787 * @ops: holds the link event callback and the private pointer. 2788 - * @sessname: name of the session 2788 + * @pathname: name of the path to an RTRS server 2789 2789 * @paths: Paths to be established defined by their src and dst addresses 2790 2790 * @paths_num: Number of elements in the @paths array 2791 2791 * @port: port to be used by the RTRS session
+4 -4
drivers/infiniband/ulp/srp/ib_srp.c
··· 430 430 spin_lock_init(&pool->lock); 431 431 INIT_LIST_HEAD(&pool->free_list); 432 432 433 - if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) 433 + if (device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) 434 434 mr_type = IB_MR_TYPE_SG_GAPS; 435 435 else 436 436 mr_type = IB_MR_TYPE_MEM_REG; ··· 3650 3650 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; 3651 3651 target_host->max_segment_size = ib_dma_max_seg_size(ibdev); 3652 3652 3653 - if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) 3653 + if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) 3654 3654 target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; 3655 3655 3656 3656 target = host_to_target(target_host); ··· 3706 3706 } 3707 3707 3708 3708 if (srp_dev->use_fast_reg) { 3709 - bool gaps_reg = (ibdev->attrs.device_cap_flags & 3710 - IB_DEVICE_SG_GAPS_REG); 3709 + bool gaps_reg = ibdev->attrs.kernel_cap_flags & 3710 + IBK_SG_GAPS_REG; 3711 3711 3712 3712 max_sectors_per_mr = srp_dev->max_pages_per_mr << 3713 3713 (ilog2(srp_dev->mr_page_size) - 9);
+2 -2
drivers/nvme/host/rdma.c
··· 867 867 ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev); 868 868 869 869 /* T10-PI support */ 870 - if (ctrl->device->dev->attrs.device_cap_flags & 871 - IB_DEVICE_INTEGRITY_HANDOVER) 870 + if (ctrl->device->dev->attrs.kernel_cap_flags & 871 + IBK_INTEGRITY_HANDOVER) 872 872 pi_capable = true; 873 873 874 874 ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
+2 -2
drivers/nvme/target/rdma.c
··· 1221 1221 ndev->inline_data_size = nport->inline_data_size; 1222 1222 ndev->inline_page_count = inline_page_count; 1223 1223 1224 - if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags & 1225 - IB_DEVICE_INTEGRITY_HANDOVER)) { 1224 + if (nport->pi_enable && !(cm_id->device->attrs.kernel_cap_flags & 1225 + IBK_INTEGRITY_HANDOVER)) { 1226 1226 pr_warn("T10-PI is not supported by device %s. Disabling it\n", 1227 1227 cm_id->device->name); 1228 1228 nport->pi_enable = false;
+1 -1
fs/cifs/smbdirect.c
··· 649 649 smbd_max_frmr_depth, 650 650 info->id->device->attrs.max_fast_reg_page_list_len); 651 651 info->mr_type = IB_MR_TYPE_MEM_REG; 652 - if (info->id->device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) 652 + if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) 653 653 info->mr_type = IB_MR_TYPE_SG_GAPS; 654 654 655 655 info->pd = ib_alloc_pd(info->id->device, 0);
+77 -59
include/rdma/ib_verbs.h
··· 220 220 }; 221 221 222 222 enum ib_device_cap_flags { 223 - IB_DEVICE_RESIZE_MAX_WR = (1 << 0), 224 - IB_DEVICE_BAD_PKEY_CNTR = (1 << 1), 225 - IB_DEVICE_BAD_QKEY_CNTR = (1 << 2), 226 - IB_DEVICE_RAW_MULTI = (1 << 3), 227 - IB_DEVICE_AUTO_PATH_MIG = (1 << 4), 228 - IB_DEVICE_CHANGE_PHY_PORT = (1 << 5), 229 - IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6), 230 - IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7), 231 - IB_DEVICE_SHUTDOWN_PORT = (1 << 8), 232 - /* Not in use, former INIT_TYPE = (1 << 9),*/ 233 - IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10), 234 - IB_DEVICE_SYS_IMAGE_GUID = (1 << 11), 235 - IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12), 236 - IB_DEVICE_SRQ_RESIZE = (1 << 13), 237 - IB_DEVICE_N_NOTIFY_CQ = (1 << 14), 223 + IB_DEVICE_RESIZE_MAX_WR = IB_UVERBS_DEVICE_RESIZE_MAX_WR, 224 + IB_DEVICE_BAD_PKEY_CNTR = IB_UVERBS_DEVICE_BAD_PKEY_CNTR, 225 + IB_DEVICE_BAD_QKEY_CNTR = IB_UVERBS_DEVICE_BAD_QKEY_CNTR, 226 + IB_DEVICE_RAW_MULTI = IB_UVERBS_DEVICE_RAW_MULTI, 227 + IB_DEVICE_AUTO_PATH_MIG = IB_UVERBS_DEVICE_AUTO_PATH_MIG, 228 + IB_DEVICE_CHANGE_PHY_PORT = IB_UVERBS_DEVICE_CHANGE_PHY_PORT, 229 + IB_DEVICE_UD_AV_PORT_ENFORCE = IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE, 230 + IB_DEVICE_CURR_QP_STATE_MOD = IB_UVERBS_DEVICE_CURR_QP_STATE_MOD, 231 + IB_DEVICE_SHUTDOWN_PORT = IB_UVERBS_DEVICE_SHUTDOWN_PORT, 232 + /* IB_DEVICE_INIT_TYPE = IB_UVERBS_DEVICE_INIT_TYPE, (not in use) */ 233 + IB_DEVICE_PORT_ACTIVE_EVENT = IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT, 234 + IB_DEVICE_SYS_IMAGE_GUID = IB_UVERBS_DEVICE_SYS_IMAGE_GUID, 235 + IB_DEVICE_RC_RNR_NAK_GEN = IB_UVERBS_DEVICE_RC_RNR_NAK_GEN, 236 + IB_DEVICE_SRQ_RESIZE = IB_UVERBS_DEVICE_SRQ_RESIZE, 237 + IB_DEVICE_N_NOTIFY_CQ = IB_UVERBS_DEVICE_N_NOTIFY_CQ, 238 238 239 - /* 240 - * This device supports a per-device lkey or stag that can be 241 - * used without performing a memory registration for the local 242 - * memory. Note that ULPs should never check this flag, but 243 - * instead of use the local_dma_lkey flag in the ib_pd structure, 244 - * which will always contain a usable lkey. 245 - */ 246 - IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15), 247 - /* Reserved, old SEND_W_INV = (1 << 16),*/ 248 - IB_DEVICE_MEM_WINDOW = (1 << 17), 239 + /* Reserved, old SEND_W_INV = 1 << 16,*/ 240 + IB_DEVICE_MEM_WINDOW = IB_UVERBS_DEVICE_MEM_WINDOW, 249 241 /* 250 242 * Devices should set IB_DEVICE_UD_IP_SUM if they support 251 243 * insertion of UDP and TCP checksum on outgoing UD IPoIB ··· 245 253 * incoming messages. Setting this flag implies that the 246 254 * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. 247 255 */ 248 - IB_DEVICE_UD_IP_CSUM = (1 << 18), 249 - IB_DEVICE_UD_TSO = (1 << 19), 250 - IB_DEVICE_XRC = (1 << 20), 256 + IB_DEVICE_UD_IP_CSUM = IB_UVERBS_DEVICE_UD_IP_CSUM, 257 + IB_DEVICE_XRC = IB_UVERBS_DEVICE_XRC, 251 258 252 259 /* 253 260 * This device supports the IB "base memory management extension", ··· 257 266 * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the 258 267 * stag. 259 268 */ 260 - IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21), 261 - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22), 262 - IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23), 263 - IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24), 264 - IB_DEVICE_RC_IP_CSUM = (1 << 25), 269 + IB_DEVICE_MEM_MGT_EXTENSIONS = IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS, 270 + IB_DEVICE_MEM_WINDOW_TYPE_2A = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A, 271 + IB_DEVICE_MEM_WINDOW_TYPE_2B = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B, 272 + IB_DEVICE_RC_IP_CSUM = IB_UVERBS_DEVICE_RC_IP_CSUM, 265 273 /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */ 266 - IB_DEVICE_RAW_IP_CSUM = (1 << 26), 267 - /* 268 - * Devices should set IB_DEVICE_CROSS_CHANNEL if they 269 - * support execution of WQEs that involve synchronization 270 - * of I/O operations with single completion queue managed 271 - * by hardware. 272 - */ 273 - IB_DEVICE_CROSS_CHANNEL = (1 << 27), 274 - IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), 275 - IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30), 276 - IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), 277 - IB_DEVICE_SG_GAPS_REG = (1ULL << 32), 278 - IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), 274 + IB_DEVICE_RAW_IP_CSUM = IB_UVERBS_DEVICE_RAW_IP_CSUM, 275 + IB_DEVICE_MANAGED_FLOW_STEERING = 276 + IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING, 279 277 /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ 280 - IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), 281 - IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35), 278 + IB_DEVICE_RAW_SCATTER_FCS = IB_UVERBS_DEVICE_RAW_SCATTER_FCS, 282 279 /* The device supports padding incoming writes to cacheline. */ 283 - IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), 284 - IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37), 280 + IB_DEVICE_PCI_WRITE_END_PADDING = 281 + IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING, 282 + }; 283 + 284 + enum ib_kernel_cap_flags { 285 + /* 286 + * This device supports a per-device lkey or stag that can be 287 + * used without performing a memory registration for the local 288 + * memory. Note that ULPs should never check this flag, but 289 + * instead of use the local_dma_lkey flag in the ib_pd structure, 290 + * which will always contain a usable lkey. 291 + */ 292 + IBK_LOCAL_DMA_LKEY = 1 << 0, 293 + /* IB_QP_CREATE_INTEGRITY_EN is supported to implement T10-PI */ 294 + IBK_INTEGRITY_HANDOVER = 1 << 1, 295 + /* IB_ACCESS_ON_DEMAND is supported during reg_user_mr() */ 296 + IBK_ON_DEMAND_PAGING = 1 << 2, 297 + /* IB_MR_TYPE_SG_GAPS is supported */ 298 + IBK_SG_GAPS_REG = 1 << 3, 299 + /* Driver supports RDMA_NLDEV_CMD_DELLINK */ 300 + IBK_ALLOW_USER_UNREG = 1 << 4, 301 + 302 + /* ipoib will use IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK */ 303 + IBK_BLOCK_MULTICAST_LOOPBACK = 1 << 5, 304 + /* iopib will use IB_QP_CREATE_IPOIB_UD_LSO for its QPs */ 305 + IBK_UD_TSO = 1 << 6, 306 + /* iopib will use the device ops: 307 + * get_vf_config 308 + * get_vf_guid 309 + * get_vf_stats 310 + * set_vf_guid 311 + * set_vf_link_state 312 + */ 313 + IBK_VIRTUAL_FUNCTION = 1 << 7, 314 + /* ipoib will use IB_QP_CREATE_NETDEV_USE for its QPs */ 315 + IBK_RDMA_NETDEV_OPA = 1 << 8, 285 316 }; 286 317 287 318 enum ib_atomic_cap { ··· 402 389 int max_qp; 403 390 int max_qp_wr; 404 391 u64 device_cap_flags; 392 + u64 kernel_cap_flags; 405 393 int max_send_sge; 406 394 int max_recv_sge; 407 395 int max_sge_rd; ··· 578 564 /** 579 565 * struct rdma_hw_stats 580 566 * @lock - Mutex to protect parallel write access to lifespan and values 581 - * of counters, which are 64bits and not guaranteeed to be written 567 + * of counters, which are 64bits and not guaranteed to be written 582 568 * atomicaly on 32bits systems. 583 569 * @timestamp - Used by the core code to track when the last update was 584 570 * @lifespan - Used by the core code to determine how old the counters ··· 1635 1621 }; 1636 1622 1637 1623 enum ib_raw_packet_caps { 1638 - /* Strip cvlan from incoming packet and report it in the matching work 1624 + /* 1625 + * Strip cvlan from incoming packet and report it in the matching work 1639 1626 * completion is supported. 1640 1627 */ 1641 - IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0), 1642 - /* Scatter FCS field of an incoming packet to host memory is supported. 1628 + IB_RAW_PACKET_CAP_CVLAN_STRIPPING = 1629 + IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING, 1630 + /* 1631 + * Scatter FCS field of an incoming packet to host memory is supported. 1643 1632 */ 1644 - IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1), 1633 + IB_RAW_PACKET_CAP_SCATTER_FCS = IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS, 1645 1634 /* Checksum offloads are supported (for both send and receive). */ 1646 - IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2), 1647 - /* When a packet is received for an RQ with no receive WQEs, the 1635 + IB_RAW_PACKET_CAP_IP_CSUM = IB_UVERBS_RAW_PACKET_CAP_IP_CSUM, 1636 + /* 1637 + * When a packet is received for an RQ with no receive WQEs, the 1648 1638 * packet processing is delayed. 1649 1639 */ 1650 - IB_RAW_PACKET_CAP_DELAY_DROP = (1 << 3), 1640 + IB_RAW_PACKET_CAP_DELAY_DROP = IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP, 1651 1641 }; 1652 1642 1653 1643 enum ib_wq_type { ··· 4322 4304 return -EINVAL; 4323 4305 4324 4306 if (flags & IB_ACCESS_ON_DEMAND && 4325 - !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) 4307 + !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING)) 4326 4308 return -EINVAL; 4327 4309 return 0; 4328 4310 }
+1 -2
include/rdma/opa_vnic.h
··· 90 90 91 91 static inline bool rdma_cap_opa_vnic(struct ib_device *device) 92 92 { 93 - return !!(device->attrs.device_cap_flags & 94 - IB_DEVICE_RDMA_NETDEV_OPA); 93 + return !!(device->attrs.kernel_cap_flags & IBK_RDMA_NETDEV_OPA); 95 94 } 96 95 97 96 #endif /* _OPA_VNIC_H */
+42
include/uapi/rdma/ib_user_verbs.h
··· 1298 1298 1299 1299 #define IB_DEVICE_NAME_MAX 64 1300 1300 1301 + /* 1302 + * bits 9, 15, 16, 19, 22, 27, 30, 31, 32, 33, 35 and 37 may be set by old 1303 + * kernels and should not be used. 1304 + */ 1305 + enum ib_uverbs_device_cap_flags { 1306 + IB_UVERBS_DEVICE_RESIZE_MAX_WR = 1 << 0, 1307 + IB_UVERBS_DEVICE_BAD_PKEY_CNTR = 1 << 1, 1308 + IB_UVERBS_DEVICE_BAD_QKEY_CNTR = 1 << 2, 1309 + IB_UVERBS_DEVICE_RAW_MULTI = 1 << 3, 1310 + IB_UVERBS_DEVICE_AUTO_PATH_MIG = 1 << 4, 1311 + IB_UVERBS_DEVICE_CHANGE_PHY_PORT = 1 << 5, 1312 + IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE = 1 << 6, 1313 + IB_UVERBS_DEVICE_CURR_QP_STATE_MOD = 1 << 7, 1314 + IB_UVERBS_DEVICE_SHUTDOWN_PORT = 1 << 8, 1315 + /* IB_UVERBS_DEVICE_INIT_TYPE = 1 << 9, (not in use) */ 1316 + IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT = 1 << 10, 1317 + IB_UVERBS_DEVICE_SYS_IMAGE_GUID = 1 << 11, 1318 + IB_UVERBS_DEVICE_RC_RNR_NAK_GEN = 1 << 12, 1319 + IB_UVERBS_DEVICE_SRQ_RESIZE = 1 << 13, 1320 + IB_UVERBS_DEVICE_N_NOTIFY_CQ = 1 << 14, 1321 + IB_UVERBS_DEVICE_MEM_WINDOW = 1 << 17, 1322 + IB_UVERBS_DEVICE_UD_IP_CSUM = 1 << 18, 1323 + IB_UVERBS_DEVICE_XRC = 1 << 20, 1324 + IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS = 1 << 21, 1325 + IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A = 1 << 23, 1326 + IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B = 1 << 24, 1327 + IB_UVERBS_DEVICE_RC_IP_CSUM = 1 << 25, 1328 + /* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_IP_CSUM. */ 1329 + IB_UVERBS_DEVICE_RAW_IP_CSUM = 1 << 26, 1330 + IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING = 1 << 29, 1331 + /* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS. */ 1332 + IB_UVERBS_DEVICE_RAW_SCATTER_FCS = 1ULL << 34, 1333 + IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING = 1ULL << 36, 1334 + }; 1335 + 1336 + enum ib_uverbs_raw_packet_caps { 1337 + IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING = 1 << 0, 1338 + IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS = 1 << 1, 1339 + IB_UVERBS_RAW_PACKET_CAP_IP_CSUM = 1 << 2, 1340 + IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP = 1 << 3, 1341 + }; 1342 + 1301 1343 #endif /* IB_USER_VERBS_H */
+2 -2
net/rds/ib.c
··· 154 154 rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); 155 155 156 156 rds_ibdev->odp_capable = 157 - !!(device->attrs.device_cap_flags & 158 - IB_DEVICE_ON_DEMAND_PAGING) && 157 + !!(device->attrs.kernel_cap_flags & 158 + IBK_ON_DEMAND_PAGING) && 159 159 !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & 160 160 IB_ODP_SUPPORT_WRITE) && 161 161 !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
+1 -1
net/sunrpc/xprtrdma/frwr_ops.c
··· 195 195 ep->re_attr.cap.max_recv_sge = 1; 196 196 197 197 ep->re_mrtype = IB_MR_TYPE_MEM_REG; 198 - if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) 198 + if (attrs->kernel_cap_flags & IBK_SG_GAPS_REG) 199 199 ep->re_mrtype = IB_MR_TYPE_SG_GAPS; 200 200 201 201 /* Quirk: Some devices advertise a large max_fast_reg_page_list_len