Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
"A smaller set of patches, nothing stands out as being particularly
major this cycle. The biggest item would be the new HIP09 HW support
from HNS, otherwise it was pretty quiet for new work here:

- Driver bug fixes and updates: bnxt_re, cxgb4, rxe, hns, i40iw,
cxgb4, mlx4 and mlx5

- Bug fixes and polishing for the new rts ULP

- Cleanup of uverbs checking for allowed driver operations

- Use sysfs_emit all over the place

- Lots of bug fixes and clarity improvements for hns

- hip09 support for hns

- NDR and 50/100Gb signaling rates

- Remove dma_virt_ops and go back to using the IB DMA wrappers

- mlx5 optimizations for contiguous DMA regions"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (147 commits)
RDMA/cma: Don't overwrite sgid_attr after device is released
RDMA/mlx5: Fix MR cache memory leak
RDMA/rxe: Use acquire/release for memory ordering
RDMA/hns: Simplify AEQE process for different types of queue
RDMA/hns: Fix inaccurate prints
RDMA/hns: Fix incorrect symbol types
RDMA/hns: Clear redundant variable initialization
RDMA/hns: Fix coding style issues
RDMA/hns: Remove unnecessary access right set during INIT2INIT
RDMA/hns: WARN_ON if get a reserved sl from users
RDMA/hns: Avoid filling sl in high 3 bits of vlan_id
RDMA/hns: Do shift on traffic class when using RoCEv2
RDMA/hns: Normalization the judgment of some features
RDMA/hns: Limit the length of data copied between kernel and userspace
RDMA/mlx4: Remove bogus dev_base_lock usage
RDMA/uverbs: Fix incorrect variable type
RDMA/core: Do not indicate device ready when device enablement fails
RDMA/core: Clean up cq pool mechanism
RDMA/core: Update kernel documentation for ib_create_named_qp()
MAINTAINERS: SOFT-ROCE: Change Zhu Yanjun's email address
...

+3871 -3941
+1
.mailmap
··· 345 345 Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de> 346 346 Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com> 347 347 Yusuke Goda <goda.yusuke@renesas.com> 348 + Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
+1 -1
MAINTAINERS
··· 16399 16399 F: include/uapi/rdma/siw-abi.h 16400 16400 16401 16401 SOFT-ROCE DRIVER (rxe) 16402 - M: Zhu Yanjun <yanjunz@nvidia.com> 16402 + M: Zhu Yanjun <zyjzyj2000@gmail.com> 16403 16403 L: linux-rdma@vger.kernel.org 16404 16404 S: Supported 16405 16405 F: drivers/infiniband/sw/rxe/
+5 -4
drivers/infiniband/core/cm.c
··· 1251 1251 EXPORT_SYMBOL(ib_cm_listen); 1252 1252 1253 1253 /** 1254 - * Create a new listening ib_cm_id and listen on the given service ID. 1254 + * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on 1255 + * the given service ID. 1255 1256 * 1256 1257 * If there's an existing ID listening on that same device and service ID, 1257 1258 * return it. ··· 1766 1765 } 1767 1766 1768 1767 /** 1769 - * Convert OPA SGID to IB SGID 1768 + * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID 1770 1769 * ULPs (such as IPoIB) do not understand OPA GIDs and will 1771 1770 * reject them as the local_gid will not match the sgid. Therefore, 1772 1771 * change the pathrec's SGID to an IB SGID. ··· 4274 4273 group = container_of(obj, struct cm_counter_group, obj); 4275 4274 cm_attr = container_of(attr, struct cm_counter_attribute, attr); 4276 4275 4277 - return sprintf(buf, "%ld\n", 4278 - atomic_long_read(&group->counter[cm_attr->index])); 4276 + return sysfs_emit(buf, "%ld\n", 4277 + atomic_long_read(&group->counter[cm_attr->index])); 4279 4278 } 4280 4279 4281 4280 static const struct sysfs_ops cm_counter_ops = {
+117 -78
drivers/infiniband/core/cma.c
··· 477 477 list_del(&id_priv->list); 478 478 cma_dev_put(id_priv->cma_dev); 479 479 id_priv->cma_dev = NULL; 480 + if (id_priv->id.route.addr.dev_addr.sgid_attr) { 481 + rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); 482 + id_priv->id.route.addr.dev_addr.sgid_attr = NULL; 483 + } 480 484 mutex_unlock(&lock); 481 485 } 482 486 ··· 1865 1861 1866 1862 kfree(id_priv->id.route.path_rec); 1867 1863 1868 - if (id_priv->id.route.addr.dev_addr.sgid_attr) 1869 - rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); 1870 - 1871 1864 put_net(id_priv->id.route.addr.dev_addr.net); 1872 1865 rdma_restrack_del(&id_priv->res); 1873 1866 kfree(id_priv); ··· 2496 2495 return id_priv->id.event_handler(id, event); 2497 2496 } 2498 2497 2499 - static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2500 - struct cma_device *cma_dev) 2498 + static int cma_listen_on_dev(struct rdma_id_private *id_priv, 2499 + struct cma_device *cma_dev, 2500 + struct rdma_id_private **to_destroy) 2501 2501 { 2502 2502 struct rdma_id_private *dev_id_priv; 2503 2503 struct net *net = id_priv->id.route.addr.dev_addr.net; ··· 2506 2504 2507 2505 lockdep_assert_held(&lock); 2508 2506 2507 + *to_destroy = NULL; 2509 2508 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2510 - return; 2509 + return 0; 2511 2510 2512 2511 dev_id_priv = 2513 2512 __rdma_create_id(net, cma_listen_handler, id_priv, 2514 2513 id_priv->id.ps, id_priv->id.qp_type, id_priv); 2515 2514 if (IS_ERR(dev_id_priv)) 2516 - return; 2515 + return PTR_ERR(dev_id_priv); 2517 2516 2518 2517 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2519 2518 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2520 2519 rdma_addr_size(cma_src_addr(id_priv))); 2521 2520 2522 2521 _cma_attach_to_dev(dev_id_priv, cma_dev); 2523 - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2524 2522 cma_id_get(id_priv); 2525 2523 dev_id_priv->internal_id = 1; 2526 2524 dev_id_priv->afonly = id_priv->afonly; ··· 2529 2527 2530 2528 ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); 2531 2529 if (ret) 2532 - dev_warn(&cma_dev->device->dev, 2533 - "RDMA CMA: cma_listen_on_dev, error %d\n", ret); 2530 + goto err_listen; 2531 + list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2532 + return 0; 2533 + err_listen: 2534 + /* Caller must destroy this after releasing lock */ 2535 + *to_destroy = dev_id_priv; 2536 + dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret); 2537 + return ret; 2534 2538 } 2535 2539 2536 - static void cma_listen_on_all(struct rdma_id_private *id_priv) 2540 + static int cma_listen_on_all(struct rdma_id_private *id_priv) 2537 2541 { 2542 + struct rdma_id_private *to_destroy; 2538 2543 struct cma_device *cma_dev; 2544 + int ret; 2539 2545 2540 2546 mutex_lock(&lock); 2541 2547 list_add_tail(&id_priv->list, &listen_any_list); 2542 - list_for_each_entry(cma_dev, &dev_list, list) 2543 - cma_listen_on_dev(id_priv, cma_dev); 2548 + list_for_each_entry(cma_dev, &dev_list, list) { 2549 + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); 2550 + if (ret) { 2551 + /* Prevent racing with cma_process_remove() */ 2552 + if (to_destroy) 2553 + list_del_init(&to_destroy->list); 2554 + goto err_listen; 2555 + } 2556 + } 2544 2557 mutex_unlock(&lock); 2558 + return 0; 2559 + 2560 + err_listen: 2561 + list_del(&id_priv->list); 2562 + mutex_unlock(&lock); 2563 + if (to_destroy) 2564 + rdma_destroy_id(&to_destroy->id); 2565 + return ret; 2545 2566 } 2546 2567 2547 2568 void rdma_set_service_type(struct rdma_cm_id *id, int tos) ··· 3717 3692 ret = -ENOSYS; 3718 3693 goto err; 3719 3694 } 3720 - } else 3721 - cma_listen_on_all(id_priv); 3695 + } else { 3696 + ret = cma_listen_on_all(id_priv); 3697 + if (ret) 3698 + goto err; 3699 + } 3722 3700 3723 3701 return 0; 3724 3702 err: ··· 4801 4773 .notifier_call = cma_netdev_callback 4802 4774 }; 4803 4775 4804 - static int cma_add_one(struct ib_device *device) 4805 - { 4806 - struct cma_device *cma_dev; 4807 - struct rdma_id_private *id_priv; 4808 - unsigned int i; 4809 - unsigned long supported_gids = 0; 4810 - int ret; 4811 - 4812 - cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4813 - if (!cma_dev) 4814 - return -ENOMEM; 4815 - 4816 - cma_dev->device = device; 4817 - cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4818 - sizeof(*cma_dev->default_gid_type), 4819 - GFP_KERNEL); 4820 - if (!cma_dev->default_gid_type) { 4821 - ret = -ENOMEM; 4822 - goto free_cma_dev; 4823 - } 4824 - 4825 - cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, 4826 - sizeof(*cma_dev->default_roce_tos), 4827 - GFP_KERNEL); 4828 - if (!cma_dev->default_roce_tos) { 4829 - ret = -ENOMEM; 4830 - goto free_gid_type; 4831 - } 4832 - 4833 - rdma_for_each_port (device, i) { 4834 - supported_gids = roce_gid_type_mask_support(device, i); 4835 - WARN_ON(!supported_gids); 4836 - if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) 4837 - cma_dev->default_gid_type[i - rdma_start_port(device)] = 4838 - CMA_PREFERRED_ROCE_GID_TYPE; 4839 - else 4840 - cma_dev->default_gid_type[i - rdma_start_port(device)] = 4841 - find_first_bit(&supported_gids, BITS_PER_LONG); 4842 - cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; 4843 - } 4844 - 4845 - init_completion(&cma_dev->comp); 4846 - refcount_set(&cma_dev->refcount, 1); 4847 - INIT_LIST_HEAD(&cma_dev->id_list); 4848 - ib_set_client_data(device, &cma_client, cma_dev); 4849 - 4850 - mutex_lock(&lock); 4851 - list_add_tail(&cma_dev->list, &dev_list); 4852 - list_for_each_entry(id_priv, &listen_any_list, list) 4853 - cma_listen_on_dev(id_priv, cma_dev); 4854 - mutex_unlock(&lock); 4855 - 4856 - trace_cm_add_one(device); 4857 - return 0; 4858 - 4859 - free_gid_type: 4860 - kfree(cma_dev->default_gid_type); 4861 - 4862 - free_cma_dev: 4863 - kfree(cma_dev); 4864 - return ret; 4865 - } 4866 - 4867 4776 static void cma_send_device_removal_put(struct rdma_id_private *id_priv) 4868 4777 { 4869 4778 struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; ··· 4861 4896 4862 4897 cma_dev_put(cma_dev); 4863 4898 wait_for_completion(&cma_dev->comp); 4899 + } 4900 + 4901 + static int cma_add_one(struct ib_device *device) 4902 + { 4903 + struct rdma_id_private *to_destroy; 4904 + struct cma_device *cma_dev; 4905 + struct rdma_id_private *id_priv; 4906 + unsigned int i; 4907 + unsigned long supported_gids = 0; 4908 + int ret; 4909 + 4910 + cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL); 4911 + if (!cma_dev) 4912 + return -ENOMEM; 4913 + 4914 + cma_dev->device = device; 4915 + cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4916 + sizeof(*cma_dev->default_gid_type), 4917 + GFP_KERNEL); 4918 + if (!cma_dev->default_gid_type) { 4919 + ret = -ENOMEM; 4920 + goto free_cma_dev; 4921 + } 4922 + 4923 + cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, 4924 + sizeof(*cma_dev->default_roce_tos), 4925 + GFP_KERNEL); 4926 + if (!cma_dev->default_roce_tos) { 4927 + ret = -ENOMEM; 4928 + goto free_gid_type; 4929 + } 4930 + 4931 + rdma_for_each_port (device, i) { 4932 + supported_gids = roce_gid_type_mask_support(device, i); 4933 + WARN_ON(!supported_gids); 4934 + if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) 4935 + cma_dev->default_gid_type[i - rdma_start_port(device)] = 4936 + CMA_PREFERRED_ROCE_GID_TYPE; 4937 + else 4938 + cma_dev->default_gid_type[i - rdma_start_port(device)] = 4939 + find_first_bit(&supported_gids, BITS_PER_LONG); 4940 + cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; 4941 + } 4942 + 4943 + init_completion(&cma_dev->comp); 4944 + refcount_set(&cma_dev->refcount, 1); 4945 + INIT_LIST_HEAD(&cma_dev->id_list); 4946 + ib_set_client_data(device, &cma_client, cma_dev); 4947 + 4948 + mutex_lock(&lock); 4949 + list_add_tail(&cma_dev->list, &dev_list); 4950 + list_for_each_entry(id_priv, &listen_any_list, list) { 4951 + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); 4952 + if (ret) 4953 + goto free_listen; 4954 + } 4955 + mutex_unlock(&lock); 4956 + 4957 + trace_cm_add_one(device); 4958 + return 0; 4959 + 4960 + free_listen: 4961 + list_del(&cma_dev->list); 4962 + mutex_unlock(&lock); 4963 + 4964 + /* cma_process_remove() will delete to_destroy */ 4965 + cma_process_remove(cma_dev); 4966 + kfree(cma_dev->default_roce_tos); 4967 + free_gid_type: 4968 + kfree(cma_dev->default_gid_type); 4969 + 4970 + free_cma_dev: 4971 + kfree(cma_dev); 4972 + return ret; 4864 4973 } 4865 4974 4866 4975 static void cma_remove_one(struct ib_device *device, void *client_data)
+2 -2
drivers/infiniband/core/cma_configfs.c
··· 115 115 if (gid_type < 0) 116 116 return gid_type; 117 117 118 - return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type)); 118 + return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_type)); 119 119 } 120 120 121 121 static ssize_t default_roce_mode_store(struct config_item *item, ··· 157 157 tos = cma_get_default_roce_tos(cma_dev, group->port_num); 158 158 cma_configfs_params_put(cma_dev); 159 159 160 - return sprintf(buf, "%u\n", tos); 160 + return sysfs_emit(buf, "%u\n", tos); 161 161 } 162 162 163 163 static ssize_t default_roce_tos_store(struct config_item *item,
+9 -19
drivers/infiniband/core/core_priv.h
··· 318 318 void nldev_init(void); 319 319 void nldev_exit(void); 320 320 321 - static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, 322 - struct ib_pd *pd, 323 - struct ib_qp_init_attr *attr, 324 - struct ib_udata *udata, 325 - struct ib_uqp_object *uobj) 321 + static inline struct ib_qp * 322 + _ib_create_qp(struct ib_device *dev, struct ib_pd *pd, 323 + struct ib_qp_init_attr *attr, struct ib_udata *udata, 324 + struct ib_uqp_object *uobj, const char *caller) 326 325 { 327 - enum ib_qp_type qp_type = attr->qp_type; 328 326 struct ib_qp *qp; 329 - bool is_xrc; 330 327 331 328 if (!dev->ops.create_qp) 332 329 return ERR_PTR(-EOPNOTSUPP); ··· 344 347 qp->srq = attr->srq; 345 348 qp->rwq_ind_tbl = attr->rwq_ind_tbl; 346 349 qp->event_handler = attr->event_handler; 350 + qp->port = attr->port_num; 347 351 348 352 atomic_set(&qp->usecnt, 0); 349 353 spin_lock_init(&qp->mr_lock); ··· 352 354 INIT_LIST_HEAD(&qp->sig_mrs); 353 355 354 356 rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); 355 - /* 356 - * We don't track XRC QPs for now, because they don't have PD 357 - * and more importantly they are created internaly by driver, 358 - * see mlx5 create_dev_resources() as an example. 359 - */ 360 - is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; 361 - if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { 362 - rdma_restrack_parent_name(&qp->res, &pd->res); 363 - rdma_restrack_add(&qp->res); 364 - } 357 + WARN_ONCE(!udata && !caller, "Missing kernel QP owner"); 358 + rdma_restrack_set_name(&qp->res, udata ? NULL : caller); 359 + rdma_restrack_add(&qp->res); 365 360 return qp; 366 361 } 367 362 ··· 402 411 struct vm_area_struct *vma, 403 412 struct rdma_user_mmap_entry *entry); 404 413 405 - void ib_cq_pool_init(struct ib_device *dev); 406 - void ib_cq_pool_destroy(struct ib_device *dev); 414 + void ib_cq_pool_cleanup(struct ib_device *dev); 407 415 408 416 #endif /* _CORE_PRIV_H */
+63 -75
drivers/infiniband/core/counters.c
··· 64 64 return ret; 65 65 } 66 66 67 - static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, 68 - enum rdma_nl_counter_mode mode) 67 + static void auto_mode_init_counter(struct rdma_counter *counter, 68 + const struct ib_qp *qp, 69 + enum rdma_nl_counter_mask new_mask) 70 + { 71 + struct auto_mode_param *param = &counter->mode.param; 72 + 73 + counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 74 + counter->mode.mask = new_mask; 75 + 76 + if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 77 + param->qp_type = qp->qp_type; 78 + } 79 + 80 + static int __rdma_counter_bind_qp(struct rdma_counter *counter, 81 + struct ib_qp *qp) 82 + { 83 + int ret; 84 + 85 + if (qp->counter) 86 + return -EINVAL; 87 + 88 + if (!qp->device->ops.counter_bind_qp) 89 + return -EOPNOTSUPP; 90 + 91 + mutex_lock(&counter->lock); 92 + ret = qp->device->ops.counter_bind_qp(counter, qp); 93 + mutex_unlock(&counter->lock); 94 + 95 + return ret; 96 + } 97 + 98 + static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port, 99 + struct ib_qp *qp, 100 + enum rdma_nl_counter_mode mode) 69 101 { 70 102 struct rdma_port_counter *port_counter; 71 103 struct rdma_counter *counter; ··· 120 88 121 89 port_counter = &dev->port_data[port].port_counter; 122 90 mutex_lock(&port_counter->lock); 123 - if (mode == RDMA_COUNTER_MODE_MANUAL) { 91 + switch (mode) { 92 + case RDMA_COUNTER_MODE_MANUAL: 124 93 ret = __counter_set_mode(&port_counter->mode, 125 94 RDMA_COUNTER_MODE_MANUAL, 0); 126 - if (ret) 95 + if (ret) { 96 + mutex_unlock(&port_counter->lock); 127 97 goto err_mode; 98 + } 99 + break; 100 + case RDMA_COUNTER_MODE_AUTO: 101 + auto_mode_init_counter(counter, qp, port_counter->mode.mask); 102 + break; 103 + default: 104 + ret = -EOPNOTSUPP; 105 + mutex_unlock(&port_counter->lock); 106 + goto err_mode; 128 107 } 129 108 130 109 port_counter->num_counters++; ··· 145 102 kref_init(&counter->kref); 146 103 mutex_init(&counter->lock); 147 104 105 + ret = __rdma_counter_bind_qp(counter, qp); 106 + if (ret) 107 + goto err_mode; 108 + 109 + rdma_restrack_parent_name(&counter->res, &qp->res); 110 + rdma_restrack_add(&counter->res); 148 111 return counter; 149 112 150 113 err_mode: 151 - mutex_unlock(&port_counter->lock); 152 114 kfree(counter->stats); 153 115 err_stats: 154 116 rdma_restrack_put(&counter->res); ··· 180 132 kfree(counter); 181 133 } 182 134 183 - static void auto_mode_init_counter(struct rdma_counter *counter, 184 - const struct ib_qp *qp, 185 - enum rdma_nl_counter_mask new_mask) 186 - { 187 - struct auto_mode_param *param = &counter->mode.param; 188 - 189 - counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 190 - counter->mode.mask = new_mask; 191 - 192 - if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 193 - param->qp_type = qp->qp_type; 194 - } 195 - 196 135 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 197 136 enum rdma_nl_counter_mask auto_mask) 198 137 { ··· 194 159 task_pid_nr(qp->res.task)); 195 160 196 161 return match; 197 - } 198 - 199 - static int __rdma_counter_bind_qp(struct rdma_counter *counter, 200 - struct ib_qp *qp) 201 - { 202 - int ret; 203 - 204 - if (qp->counter) 205 - return -EINVAL; 206 - 207 - if (!qp->device->ops.counter_bind_qp) 208 - return -EOPNOTSUPP; 209 - 210 - mutex_lock(&counter->lock); 211 - ret = qp->device->ops.counter_bind_qp(counter, qp); 212 - mutex_unlock(&counter->lock); 213 - 214 - return ret; 215 162 } 216 163 217 164 static int __rdma_counter_unbind_qp(struct ib_qp *qp) ··· 264 247 return counter; 265 248 } 266 249 267 - static void rdma_counter_res_add(struct rdma_counter *counter, 268 - struct ib_qp *qp) 269 - { 270 - rdma_restrack_parent_name(&counter->res, &qp->res); 271 - rdma_restrack_add(&counter->res); 272 - } 273 - 274 250 static void counter_release(struct kref *kref) 275 251 { 276 252 struct rdma_counter *counter; ··· 285 275 struct rdma_counter *counter; 286 276 int ret; 287 277 288 - if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) 278 + if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) 289 279 return 0; 290 280 291 281 if (!rdma_is_port_valid(dev, port)) ··· 303 293 return ret; 304 294 } 305 295 } else { 306 - counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); 296 + counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO); 307 297 if (!counter) 308 298 return -ENOMEM; 309 - 310 - auto_mode_init_counter(counter, qp, port_counter->mode.mask); 311 - 312 - ret = __rdma_counter_bind_qp(counter, qp); 313 - if (ret) { 314 - rdma_counter_free(counter); 315 - return ret; 316 - } 317 - 318 - rdma_counter_res_add(counter, qp); 319 299 } 320 300 321 301 return 0; ··· 419 419 return NULL; 420 420 } 421 421 422 - static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, 423 - struct ib_qp *qp) 424 - { 425 - if ((counter->device != qp->device) || (counter->port != qp->port)) 426 - return -EINVAL; 427 - 428 - return __rdma_counter_bind_qp(counter, qp); 429 - } 430 - 431 422 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, 432 423 u32 counter_id) 433 424 { ··· 466 475 goto err_task; 467 476 } 468 477 469 - ret = rdma_counter_bind_qp_manual(counter, qp); 478 + if ((counter->device != qp->device) || (counter->port != qp->port)) { 479 + ret = -EINVAL; 480 + goto err_task; 481 + } 482 + 483 + ret = __rdma_counter_bind_qp(counter, qp); 470 484 if (ret) 471 485 goto err_task; 472 486 ··· 516 520 goto err; 517 521 } 518 522 519 - counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); 523 + counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL); 520 524 if (!counter) { 521 525 ret = -ENOMEM; 522 526 goto err; 523 527 } 524 528 525 - ret = rdma_counter_bind_qp_manual(counter, qp); 526 - if (ret) 527 - goto err_bind; 528 - 529 529 if (counter_id) 530 530 *counter_id = counter->id; 531 531 532 - rdma_counter_res_add(counter, qp); 533 - 534 532 rdma_restrack_put(&qp->res); 535 - return ret; 533 + return 0; 536 534 537 - err_bind: 538 - rdma_counter_free(counter); 539 535 err: 540 536 rdma_restrack_put(&qp->res); 541 537 return ret;
+4 -12
drivers/infiniband/core/cq.c
··· 123 123 } 124 124 125 125 /** 126 - * ib_process_direct_cq - process a CQ in caller context 126 + * ib_process_cq_direct - process a CQ in caller context 127 127 * @cq: CQ to process 128 128 * @budget: number of CQEs to poll for 129 129 * ··· 197 197 } 198 198 199 199 /** 200 - * __ib_alloc_cq allocate a completion queue 200 + * __ib_alloc_cq - allocate a completion queue 201 201 * @dev: device to allocate the CQ for 202 202 * @private: driver private data, accessible from cq->cq_context 203 203 * @nr_cqe: number of CQEs to allocate ··· 349 349 } 350 350 EXPORT_SYMBOL(ib_free_cq); 351 351 352 - void ib_cq_pool_init(struct ib_device *dev) 353 - { 354 - unsigned int i; 355 - 356 - spin_lock_init(&dev->cq_pools_lock); 357 - for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) 358 - INIT_LIST_HEAD(&dev->cq_pools[i]); 359 - } 360 - 361 - void ib_cq_pool_destroy(struct ib_device *dev) 352 + void ib_cq_pool_cleanup(struct ib_device *dev) 362 353 { 363 354 struct ib_cq *cq, *n; 364 355 unsigned int i; ··· 358 367 list_for_each_entry_safe(cq, n, &dev->cq_pools[i], 359 368 pool_entry) { 360 369 WARN_ON(cq->cqe_used); 370 + list_del(&cq->pool_entry); 361 371 cq->shared = false; 362 372 ib_free_cq(cq); 363 373 }
+66 -26
drivers/infiniband/core/device.c
··· 284 284 IB_MANDATORY_FUNC(poll_cq), 285 285 IB_MANDATORY_FUNC(req_notify_cq), 286 286 IB_MANDATORY_FUNC(get_dma_mr), 287 + IB_MANDATORY_FUNC(reg_user_mr), 287 288 IB_MANDATORY_FUNC(dereg_mr), 288 289 IB_MANDATORY_FUNC(get_port_immutable) 289 290 }; ··· 570 569 struct ib_device *_ib_alloc_device(size_t size) 571 570 { 572 571 struct ib_device *device; 572 + unsigned int i; 573 573 574 574 if (WARN_ON(size < sizeof(struct ib_device))) 575 575 return NULL; ··· 602 600 init_completion(&device->unreg_completion); 603 601 INIT_WORK(&device->unregistration_work, ib_unregister_work); 604 602 603 + spin_lock_init(&device->cq_pools_lock); 604 + for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++) 605 + INIT_LIST_HEAD(&device->cq_pools[i]); 606 + 607 + device->uverbs_cmd_mask = 608 + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) | 609 + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | 610 + BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | 611 + BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) | 612 + BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | 613 + BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 614 + BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | 615 + BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | 616 + BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) | 617 + BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) | 618 + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | 619 + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | 620 + BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | 621 + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | 622 + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | 623 + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | 624 + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | 625 + BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) | 626 + BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | 627 + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | 628 + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) | 629 + BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) | 630 + BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) | 631 + BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | 632 + BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | 633 + BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | 634 + BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) | 635 + BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | 636 + BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | 637 + BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); 605 638 return device; 606 639 } 607 640 EXPORT_SYMBOL(_ib_alloc_device); ··· 1214 1177 return ret; 1215 1178 } 1216 1179 1217 - static void setup_dma_device(struct ib_device *device, 1218 - struct device *dma_device) 1219 - { 1220 - /* 1221 - * If the caller does not provide a DMA capable device then the IB 1222 - * device will be used. In this case the caller should fully setup the 1223 - * ibdev for DMA. This usually means using dma_virt_ops. 1224 - */ 1225 - #ifdef CONFIG_DMA_VIRT_OPS 1226 - if (!dma_device) { 1227 - device->dev.dma_ops = &dma_virt_ops; 1228 - dma_device = &device->dev; 1229 - } 1230 - #endif 1231 - WARN_ON(!dma_device); 1232 - device->dma_device = dma_device; 1233 - WARN_ON(!device->dma_device->dma_parms); 1234 - } 1235 - 1236 1180 /* 1237 1181 * setup_device() allocates memory and sets up data that requires calling the 1238 1182 * device ops, this is the only reason these actions are not done during ··· 1267 1249 remove_client_context(device, cid); 1268 1250 } 1269 1251 1270 - ib_cq_pool_destroy(device); 1252 + ib_cq_pool_cleanup(device); 1271 1253 1272 1254 /* Pairs with refcount_set in enable_device */ 1273 1255 ib_device_put(device); ··· 1311 1293 if (ret) 1312 1294 goto out; 1313 1295 } 1314 - 1315 - ib_cq_pool_init(device); 1316 1296 1317 1297 down_read(&clients_rwsem); 1318 1298 xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { ··· 1357 1341 if (ret) 1358 1342 return ret; 1359 1343 1360 - setup_dma_device(device, dma_device); 1344 + /* 1345 + * If the caller does not provide a DMA capable device then the IB core 1346 + * will set up ib_sge and scatterlist structures that stash the kernel 1347 + * virtual address into the address field. 1348 + */ 1349 + WARN_ON(dma_device && !dma_device->dma_parms); 1350 + device->dma_device = dma_device; 1351 + 1361 1352 ret = setup_device(device); 1362 1353 if (ret) 1363 1354 return ret; ··· 1397 1374 } 1398 1375 1399 1376 ret = enable_device_and_get(device); 1400 - dev_set_uevent_suppress(&device->dev, false); 1401 - /* Mark for userspace that device is ready */ 1402 - kobject_uevent(&device->dev.kobj, KOBJ_ADD); 1403 1377 if (ret) { 1404 1378 void (*dealloc_fn)(struct ib_device *); 1405 1379 ··· 1416 1396 ib_device_put(device); 1417 1397 __ib_unregister_device(device); 1418 1398 device->ops.dealloc_driver = dealloc_fn; 1399 + dev_set_uevent_suppress(&device->dev, false); 1419 1400 return ret; 1420 1401 } 1402 + dev_set_uevent_suppress(&device->dev, false); 1403 + /* Mark for userspace that device is ready */ 1404 + kobject_uevent(&device->dev.kobj, KOBJ_ADD); 1421 1405 ib_device_put(device); 1422 1406 1423 1407 return 0; ··· 2600 2576 SET_DEVICE_OP(dev_ops, create_qp); 2601 2577 SET_DEVICE_OP(dev_ops, create_rwq_ind_table); 2602 2578 SET_DEVICE_OP(dev_ops, create_srq); 2579 + SET_DEVICE_OP(dev_ops, create_user_ah); 2603 2580 SET_DEVICE_OP(dev_ops, create_wq); 2604 2581 SET_DEVICE_OP(dev_ops, dealloc_dm); 2605 2582 SET_DEVICE_OP(dev_ops, dealloc_driver); ··· 2699 2674 SET_OBJ_SIZE(dev_ops, ib_xrcd); 2700 2675 } 2701 2676 EXPORT_SYMBOL(ib_set_device_ops); 2677 + 2678 + #ifdef CONFIG_INFINIBAND_VIRT_DMA 2679 + int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) 2680 + { 2681 + struct scatterlist *s; 2682 + int i; 2683 + 2684 + for_each_sg(sg, s, nents, i) { 2685 + sg_dma_address(s) = (uintptr_t)sg_virt(s); 2686 + sg_dma_len(s) = s->length; 2687 + } 2688 + return nents; 2689 + } 2690 + EXPORT_SYMBOL(ib_dma_virt_map_sg); 2691 + #endif /* CONFIG_INFINIBAND_VIRT_DMA */ 2702 2692 2703 2693 static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { 2704 2694 [RDMA_NL_LS_OP_RESOLVE] = {
+1 -1
drivers/infiniband/core/iwpm_util.h
··· 141 141 int iwpm_get_nlmsg_seq(void); 142 142 143 143 /** 144 - * iwpm_add_reminfo - Add remote address info of the connecting peer 144 + * iwpm_add_remote_info - Add remote address info of the connecting peer 145 145 * to the remote info hash table 146 146 * @reminfo: The remote info to be added 147 147 */
+68 -33
drivers/infiniband/core/rdma_core.c
··· 137 137 } else if (uobj->object) { 138 138 ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, 139 139 attrs); 140 - if (ret) { 141 - if (ib_is_destroy_retryable(ret, reason, uobj)) 142 - return ret; 143 - 144 - /* Nothing to be done, dangle the memory and move on */ 145 - WARN(true, 146 - "ib_uverbs: failed to remove uobject id %d, driver err=%d", 147 - uobj->id, ret); 148 - } 140 + if (ret) 141 + /* Nothing to be done, wait till ucontext will clean it */ 142 + return ret; 149 143 150 144 uobj->object = NULL; 151 145 } ··· 537 543 struct uverbs_obj_idr_type, type); 538 544 int ret = idr_type->destroy_object(uobj, why, attrs); 539 545 540 - /* 541 - * We can only fail gracefully if the user requested to destroy the 542 - * object or when a retry may be called upon an error. 543 - * In the rest of the cases, just remove whatever you can. 544 - */ 545 - if (ib_is_destroy_retryable(ret, why, uobj)) 546 + if (ret) 546 547 return ret; 547 548 548 549 if (why == RDMA_REMOVE_ABORT) ··· 570 581 { 571 582 const struct uverbs_obj_fd_type *fd_type = container_of( 572 583 uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); 573 - int ret = fd_type->destroy_object(uobj, why); 574 584 575 - if (ib_is_destroy_retryable(ret, why, uobj)) 576 - return ret; 577 - 585 + fd_type->destroy_object(uobj, why); 578 586 return 0; 579 587 } 580 588 ··· 592 606 * It will be put by remove_commit_idr_uobject() 593 607 */ 594 608 old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL); 609 + WARN_ON(old != NULL); 610 + } 611 + 612 + static void swap_idr_uobjects(struct ib_uobject *obj_old, 613 + struct ib_uobject *obj_new) 614 + { 615 + struct ib_uverbs_file *ufile = obj_old->ufile; 616 + void *old; 617 + 618 + /* 619 + * New must be an object that been allocated but not yet committed, this 620 + * moves the pre-committed state to obj_old, new still must be comitted. 621 + */ 622 + old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY, 623 + GFP_KERNEL); 624 + if (WARN_ON(old != obj_old)) 625 + return; 626 + 627 + swap(obj_old->id, obj_new->id); 628 + 629 + old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL); 595 630 WARN_ON(old != NULL); 596 631 } 597 632 ··· 659 652 660 653 /* Matches the down_read in rdma_alloc_begin_uobject */ 661 654 up_read(&ufile->hw_destroy_rwsem); 655 + } 656 + 657 + /* 658 + * new_uobj will be assigned to the handle currently used by to_uobj, and 659 + * to_uobj will be destroyed. 660 + * 661 + * Upon return the caller must do: 662 + * rdma_alloc_commit_uobject(new_uobj) 663 + * uobj_put_destroy(to_uobj) 664 + * 665 + * to_uobj must have a write get but the put mode switches to destroy once 666 + * this is called. 667 + */ 668 + void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj, 669 + struct uverbs_attr_bundle *attrs) 670 + { 671 + assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE); 672 + 673 + if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object || 674 + !to_uobj->uapi_object->type_class->swap_uobjects)) 675 + return; 676 + 677 + to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj); 678 + 679 + /* 680 + * If this fails then the uobject is still completely valid (though with 681 + * a new ID) and we leak it until context close. 682 + */ 683 + uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs); 662 684 } 663 685 664 686 /* ··· 797 761 .lookup_put = lookup_put_idr_uobject, 798 762 .destroy_hw = destroy_hw_idr_uobject, 799 763 .remove_handle = remove_handle_idr_uobject, 764 + .swap_uobjects = swap_idr_uobjects, 800 765 }; 801 766 EXPORT_SYMBOL(uverbs_idr_class); 802 767 ··· 900 863 * racing with a lookup_get. 901 864 */ 902 865 WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); 866 + if (reason == RDMA_REMOVE_DRIVER_FAILURE) 867 + obj->object = NULL; 903 868 if (!uverbs_destroy_uobject(obj, reason, &attrs)) 904 869 ret = 0; 905 870 else 906 871 atomic_set(&obj->usecnt, 0); 872 + } 873 + 874 + if (reason == RDMA_REMOVE_DRIVER_FAILURE) { 875 + WARN_ON(!list_empty(&ufile->uobjects)); 876 + return 0; 907 877 } 908 878 return ret; 909 879 } ··· 933 889 if (!ufile->ucontext) 934 890 goto done; 935 891 936 - ufile->ucontext->cleanup_retryable = true; 937 - while (!list_empty(&ufile->uobjects)) 938 - if (__uverbs_cleanup_ufile(ufile, reason)) { 939 - /* 940 - * No entry was cleaned-up successfully during this 941 - * iteration. It is a driver bug to fail destruction. 942 - */ 943 - WARN_ON(!list_empty(&ufile->uobjects)); 944 - break; 945 - } 892 + while (!list_empty(&ufile->uobjects) && 893 + !__uverbs_cleanup_ufile(ufile, reason)) { 894 + } 946 895 947 - ufile->ucontext->cleanup_retryable = false; 948 - if (!list_empty(&ufile->uobjects)) 949 - __uverbs_cleanup_ufile(ufile, reason); 950 - 896 + if (WARN_ON(!list_empty(&ufile->uobjects))) 897 + __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE); 951 898 ufile_destroy_ucontext(ufile, reason); 952 899 953 900 done:
+19 -4
drivers/infiniband/core/restrack.c
··· 221 221 { 222 222 struct ib_device *dev = res_to_dev(res); 223 223 struct rdma_restrack_root *rt; 224 - int ret; 224 + int ret = 0; 225 225 226 226 if (!dev) 227 227 return; 228 + 229 + if (res->no_track) 230 + goto out; 228 231 229 232 rt = &dev->res[res->type]; 230 233 ··· 235 232 /* Special case to ensure that LQPN points to right QP */ 236 233 struct ib_qp *qp = container_of(res, struct ib_qp, res); 237 234 238 - ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL); 239 - res->id = ret ? 0 : qp->qp_num; 235 + WARN_ONCE(qp->qp_num >> 24 || qp->port >> 8, 236 + "QP number 0x%0X and port 0x%0X", qp->qp_num, 237 + qp->port); 238 + res->id = qp->qp_num; 239 + if (qp->qp_type == IB_QPT_SMI || qp->qp_type == IB_QPT_GSI) 240 + res->id |= qp->port << 24; 241 + ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL); 242 + if (ret) 243 + res->id = 0; 240 244 } else if (res->type == RDMA_RESTRACK_COUNTER) { 241 245 /* Special case to ensure that cntn points to right counter */ 242 246 struct rdma_counter *counter; ··· 256 246 &rt->next_id, GFP_KERNEL); 257 247 } 258 248 249 + out: 259 250 if (!ret) 260 251 res->valid = true; 261 252 } ··· 329 318 return; 330 319 } 331 320 321 + if (res->no_track) 322 + goto out; 323 + 332 324 dev = res_to_dev(res); 333 325 if (WARN_ON(!dev)) 334 326 return; ··· 342 328 if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP) 343 329 return; 344 330 WARN_ON(old != res); 345 - res->valid = false; 346 331 332 + out: 333 + res->valid = false; 347 334 rdma_restrack_put(res); 348 335 wait_for_completion(&res->comp); 349 336 }
+4 -1
drivers/infiniband/core/rw.c
··· 285 285 static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, 286 286 u32 sg_cnt, enum dma_data_direction dir) 287 287 { 288 - if (is_pci_p2pdma_page(sg_page(sg))) 288 + if (is_pci_p2pdma_page(sg_page(sg))) { 289 + if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) 290 + return 0; 289 291 return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); 292 + } 290 293 return ib_dma_map_sg(dev, sg, sg_cnt, dir); 291 294 } 292 295
+2 -1
drivers/infiniband/core/sa_query.c
··· 1435 1435 }; 1436 1436 1437 1437 /** 1438 - * Check if current PR query can be an OPA query. 1438 + * opa_pr_query_possible - Check if current PR query can be an OPA query. 1439 + * 1439 1440 * Retuns PR_NOT_SUPPORTED if a path record query is not 1440 1441 * possible, PR_OPA_SUPPORTED if an OPA path record query 1441 1442 * is possible and PR_IB_SUPPORTED if an IB path record
+97 -69
drivers/infiniband/core/sysfs.c
··· 165 165 if (ret) 166 166 return ret; 167 167 168 - return sprintf(buf, "%d: %s\n", attr.state, 169 - attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ? 170 - state_name[attr.state] : "UNKNOWN"); 168 + return sysfs_emit(buf, "%d: %s\n", attr.state, 169 + attr.state >= 0 && 170 + attr.state < ARRAY_SIZE(state_name) ? 171 + state_name[attr.state] : 172 + "UNKNOWN"); 171 173 } 172 174 173 175 static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, ··· 182 180 if (ret) 183 181 return ret; 184 182 185 - return sprintf(buf, "0x%x\n", attr.lid); 183 + return sysfs_emit(buf, "0x%x\n", attr.lid); 186 184 } 187 185 188 186 static ssize_t lid_mask_count_show(struct ib_port *p, ··· 196 194 if (ret) 197 195 return ret; 198 196 199 - return sprintf(buf, "%d\n", attr.lmc); 197 + return sysfs_emit(buf, "%d\n", attr.lmc); 200 198 } 201 199 202 200 static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, ··· 209 207 if (ret) 210 208 return ret; 211 209 212 - return sprintf(buf, "0x%x\n", attr.sm_lid); 210 + return sysfs_emit(buf, "0x%x\n", attr.sm_lid); 213 211 } 214 212 215 213 static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, ··· 222 220 if (ret) 223 221 return ret; 224 222 225 - return sprintf(buf, "%d\n", attr.sm_sl); 223 + return sysfs_emit(buf, "%d\n", attr.sm_sl); 226 224 } 227 225 228 226 static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, ··· 235 233 if (ret) 236 234 return ret; 237 235 238 - return sprintf(buf, "0x%08x\n", attr.port_cap_flags); 236 + return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags); 239 237 } 240 238 241 239 static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, ··· 275 273 speed = " HDR"; 276 274 rate = 500; 277 275 break; 276 + case IB_SPEED_NDR: 277 + speed = " NDR"; 278 + rate = 1000; 279 + break; 278 280 case IB_SPEED_SDR: 279 281 default: /* default to SDR for invalid rates */ 280 282 speed = " SDR"; ··· 290 284 if (rate < 0) 291 285 return -EINVAL; 292 286 293 - return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", 294 - rate / 10, rate % 10 ? ".5" : "", 295 - ib_width_enum_to_int(attr.active_width), speed); 287 + return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10, 288 + rate % 10 ? ".5" : "", 289 + ib_width_enum_to_int(attr.active_width), speed); 296 290 } 297 291 298 292 static const char *phys_state_to_str(enum ib_port_phys_state phys_state) ··· 324 318 if (ret) 325 319 return ret; 326 320 327 - return sprintf(buf, "%d: %s\n", attr.phys_state, 328 - phys_state_to_str(attr.phys_state)); 321 + return sysfs_emit(buf, "%d: %s\n", attr.phys_state, 322 + phys_state_to_str(attr.phys_state)); 329 323 } 330 324 331 325 static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, 332 326 char *buf) 333 327 { 328 + const char *output; 329 + 334 330 switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) { 335 331 case IB_LINK_LAYER_INFINIBAND: 336 - return sprintf(buf, "%s\n", "InfiniBand"); 332 + output = "InfiniBand"; 333 + break; 337 334 case IB_LINK_LAYER_ETHERNET: 338 - return sprintf(buf, "%s\n", "Ethernet"); 335 + output = "Ethernet"; 336 + break; 339 337 default: 340 - return sprintf(buf, "%s\n", "Unknown"); 338 + output = "Unknown"; 339 + break; 341 340 } 341 + 342 + return sysfs_emit(buf, "%s\n", output); 342 343 } 343 344 344 345 static PORT_ATTR_RO(state); ··· 371 358 NULL 372 359 }; 373 360 374 - static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) 361 + static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) 375 362 { 376 363 struct net_device *ndev; 377 - size_t ret = -EINVAL; 364 + int ret = -EINVAL; 378 365 379 366 rcu_read_lock(); 380 367 ndev = rcu_dereference(gid_attr->ndev); 381 368 if (ndev) 382 - ret = sprintf(buf, "%s\n", ndev->name); 369 + ret = sysfs_emit(buf, "%s\n", ndev->name); 383 370 rcu_read_unlock(); 384 371 return ret; 385 372 } 386 373 387 - static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) 374 + static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) 388 375 { 389 - return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); 376 + return sysfs_emit(buf, "%s\n", 377 + ib_cache_gid_type_str(gid_attr->gid_type)); 390 378 } 391 379 392 380 static ssize_t _show_port_gid_attr( 393 381 struct ib_port *p, struct port_attribute *attr, char *buf, 394 - size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) 382 + ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) 395 383 { 396 384 struct port_table_attribute *tab_attr = 397 385 container_of(attr, struct port_table_attribute, attr); ··· 415 401 struct port_table_attribute *tab_attr = 416 402 container_of(attr, struct port_table_attribute, attr); 417 403 const struct ib_gid_attr *gid_attr; 418 - ssize_t ret; 404 + int len; 419 405 420 406 gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); 421 407 if (IS_ERR(gid_attr)) { ··· 430 416 * space throwing such error on fail to read gid, return zero 431 417 * GID as before. This maintains backward compatibility. 432 418 */ 433 - return sprintf(buf, "%pI6\n", zgid.raw); 419 + return sysfs_emit(buf, "%pI6\n", zgid.raw); 434 420 } 435 421 436 - ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw); 422 + len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw); 437 423 rdma_put_gid_attr(gid_attr); 438 - return ret; 424 + return len; 439 425 } 440 426 441 427 static ssize_t show_port_gid_attr_ndev(struct ib_port *p, ··· 457 443 struct port_table_attribute *tab_attr = 458 444 container_of(attr, struct port_table_attribute, attr); 459 445 u16 pkey; 460 - ssize_t ret; 446 + int ret; 461 447 462 448 ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey); 463 449 if (ret) 464 450 return ret; 465 451 466 - return sprintf(buf, "0x%04x\n", pkey); 452 + return sysfs_emit(buf, "0x%04x\n", pkey); 467 453 } 468 454 469 455 #define PORT_PMA_ATTR(_name, _counter, _width, _offset) \ ··· 535 521 container_of(attr, struct port_table_attribute, attr); 536 522 int offset = tab_attr->index & 0xffff; 537 523 int width = (tab_attr->index >> 16) & 0xff; 538 - ssize_t ret; 524 + int ret; 539 525 u8 data[8]; 526 + int len; 540 527 541 528 ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, 542 529 40 + offset / 8, sizeof(data)); ··· 546 531 547 532 switch (width) { 548 533 case 4: 549 - ret = sprintf(buf, "%u\n", (*data >> 550 - (4 - (offset % 8))) & 0xf); 534 + len = sysfs_emit(buf, "%u\n", 535 + (*data >> (4 - (offset % 8))) & 0xf); 551 536 break; 552 537 case 8: 553 - ret = sprintf(buf, "%u\n", *data); 538 + len = sysfs_emit(buf, "%u\n", *data); 554 539 break; 555 540 case 16: 556 - ret = sprintf(buf, "%u\n", 557 - be16_to_cpup((__be16 *)data)); 541 + len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data)); 558 542 break; 559 543 case 32: 560 - ret = sprintf(buf, "%u\n", 561 - be32_to_cpup((__be32 *)data)); 544 + len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data)); 562 545 break; 563 546 case 64: 564 - ret = sprintf(buf, "%llu\n", 565 - be64_to_cpup((__be64 *)data)); 547 + len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data)); 566 548 break; 567 - 568 549 default: 569 - ret = 0; 550 + len = 0; 551 + break; 570 552 } 571 553 572 - return ret; 554 + return len; 573 555 } 574 556 575 557 static PORT_PMA_ATTR(symbol_error , 0, 16, 32); ··· 827 815 return 0; 828 816 } 829 817 830 - static ssize_t print_hw_stat(struct ib_device *dev, int port_num, 831 - struct rdma_hw_stats *stats, int index, char *buf) 818 + static int print_hw_stat(struct ib_device *dev, int port_num, 819 + struct rdma_hw_stats *stats, int index, char *buf) 832 820 { 833 821 u64 v = rdma_counter_get_hwstat_value(dev, port_num, index); 834 822 835 - return sprintf(buf, "%llu\n", stats->value[index] + v); 823 + return sysfs_emit(buf, "%llu\n", stats->value[index] + v); 836 824 } 837 825 838 826 static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, ··· 889 877 msecs = jiffies_to_msecs(stats->lifespan); 890 878 mutex_unlock(&stats->lock); 891 879 892 - return sprintf(buf, "%d\n", msecs); 880 + return sysfs_emit(buf, "%d\n", msecs); 893 881 } 894 882 895 883 static ssize_t set_stats_lifespan(struct kobject *kobj, ··· 1236 1224 return ret; 1237 1225 } 1238 1226 1227 + static const char *node_type_string(int node_type) 1228 + { 1229 + switch (node_type) { 1230 + case RDMA_NODE_IB_CA: 1231 + return "CA"; 1232 + case RDMA_NODE_IB_SWITCH: 1233 + return "switch"; 1234 + case RDMA_NODE_IB_ROUTER: 1235 + return "router"; 1236 + case RDMA_NODE_RNIC: 1237 + return "RNIC"; 1238 + case RDMA_NODE_USNIC: 1239 + return "usNIC"; 1240 + case RDMA_NODE_USNIC_UDP: 1241 + return "usNIC UDP"; 1242 + case RDMA_NODE_UNSPECIFIED: 1243 + return "unspecified"; 1244 + } 1245 + return "<unknown>"; 1246 + } 1247 + 1239 1248 static ssize_t node_type_show(struct device *device, 1240 1249 struct device_attribute *attr, char *buf) 1241 1250 { 1242 1251 struct ib_device *dev = rdma_device_to_ibdev(device); 1243 1252 1244 - switch (dev->node_type) { 1245 - case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); 1246 - case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); 1247 - case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); 1248 - case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); 1249 - case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type); 1250 - case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); 1251 - case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); 1252 - default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); 1253 - } 1253 + return sysfs_emit(buf, "%d: %s\n", dev->node_type, 1254 + node_type_string(dev->node_type)); 1254 1255 } 1255 1256 static DEVICE_ATTR_RO(node_type); 1256 1257 ··· 1271 1246 struct device_attribute *dev_attr, char *buf) 1272 1247 { 1273 1248 struct ib_device *dev = rdma_device_to_ibdev(device); 1249 + __be16 *guid = (__be16 *)&dev->attrs.sys_image_guid; 1274 1250 1275 - return sprintf(buf, "%04x:%04x:%04x:%04x\n", 1276 - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]), 1277 - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]), 1278 - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]), 1279 - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3])); 1251 + return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", 1252 + be16_to_cpu(guid[0]), 1253 + be16_to_cpu(guid[1]), 1254 + be16_to_cpu(guid[2]), 1255 + be16_to_cpu(guid[3])); 1280 1256 } 1281 1257 static DEVICE_ATTR_RO(sys_image_guid); 1282 1258 ··· 1285 1259 struct device_attribute *attr, char *buf) 1286 1260 { 1287 1261 struct ib_device *dev = rdma_device_to_ibdev(device); 1262 + __be16 *node_guid = (__be16 *)&dev->node_guid; 1288 1263 1289 - return sprintf(buf, "%04x:%04x:%04x:%04x\n", 1290 - be16_to_cpu(((__be16 *) &dev->node_guid)[0]), 1291 - be16_to_cpu(((__be16 *) &dev->node_guid)[1]), 1292 - be16_to_cpu(((__be16 *) &dev->node_guid)[2]), 1293 - be16_to_cpu(((__be16 *) &dev->node_guid)[3])); 1264 + return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", 1265 + be16_to_cpu(node_guid[0]), 1266 + be16_to_cpu(node_guid[1]), 1267 + be16_to_cpu(node_guid[2]), 1268 + be16_to_cpu(node_guid[3])); 1294 1269 } 1295 1270 static DEVICE_ATTR_RO(node_guid); 1296 1271 ··· 1300 1273 { 1301 1274 struct ib_device *dev = rdma_device_to_ibdev(device); 1302 1275 1303 - return sprintf(buf, "%.64s\n", dev->node_desc); 1276 + return sysfs_emit(buf, "%.64s\n", dev->node_desc); 1304 1277 } 1305 1278 1306 1279 static ssize_t node_desc_store(struct device *device, ··· 1327 1300 char *buf) 1328 1301 { 1329 1302 struct ib_device *dev = rdma_device_to_ibdev(device); 1303 + char version[IB_FW_VERSION_NAME_MAX] = {}; 1330 1304 1331 - ib_get_device_fw_str(dev, buf); 1332 - strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); 1333 - return strlen(buf); 1305 + ib_get_device_fw_str(dev, version); 1306 + 1307 + return sysfs_emit(buf, "%s\n", version); 1334 1308 } 1335 1309 static DEVICE_ATTR_RO(fw_ver); 1336 1310
+1 -1
drivers/infiniband/core/ucma.c
··· 1825 1825 struct device_attribute *attr, 1826 1826 char *buf) 1827 1827 { 1828 - return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); 1828 + return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); 1829 1829 } 1830 1830 static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); 1831 1831
+13 -4
drivers/infiniband/core/umem.c
··· 84 84 dma_addr_t mask; 85 85 int i; 86 86 87 + if (umem->is_odp) { 88 + unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift); 89 + 90 + /* ODP must always be self consistent. */ 91 + if (!(pgsz_bitmap & page_size)) 92 + return 0; 93 + return page_size; 94 + } 95 + 87 96 /* rdma_for_each_block() has a bug if the page size is smaller than the 88 97 * page size used to build the umem. For now prevent smaller page sizes 89 98 * from being returned. ··· 229 220 230 221 cur_base += ret * PAGE_SIZE; 231 222 npages -= ret; 232 - sg = __sg_alloc_table_from_pages( 233 - &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, 234 - dma_get_max_seg_size(device->dma_device), sg, npages, 235 - GFP_KERNEL); 223 + sg = __sg_alloc_table_from_pages(&umem->sg_head, page_list, ret, 224 + 0, ret << PAGE_SHIFT, 225 + ib_dma_max_seg_size(device), sg, npages, 226 + GFP_KERNEL); 236 227 umem->sg_nents = umem->sg_head.nents; 237 228 if (IS_ERR(sg)) { 238 229 unpin_user_pages_dirty_lock(page_list, ret, 0);
+3 -3
drivers/infiniband/core/user_mad.c
··· 1191 1191 if (!port) 1192 1192 return -ENODEV; 1193 1193 1194 - return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev)); 1194 + return sysfs_emit(buf, "%s\n", dev_name(&port->ib_dev->dev)); 1195 1195 } 1196 1196 static DEVICE_ATTR_RO(ibdev); 1197 1197 ··· 1203 1203 if (!port) 1204 1204 return -ENODEV; 1205 1205 1206 - return sprintf(buf, "%d\n", port->port_num); 1206 + return sysfs_emit(buf, "%d\n", port->port_num); 1207 1207 } 1208 1208 static DEVICE_ATTR_RO(port); 1209 1209 ··· 1222 1222 static ssize_t abi_version_show(struct class *class, 1223 1223 struct class_attribute *attr, char *buf) 1224 1224 { 1225 - return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); 1225 + return sysfs_emit(buf, "%d\n", IB_USER_MAD_ABI_VERSION); 1226 1226 } 1227 1227 static CLASS_ATTR_RO(abi_version); 1228 1228
+91 -58
drivers/infiniband/core/uverbs_cmd.c
··· 681 681 return 0; 682 682 683 683 ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata); 684 - 685 - if (ib_is_destroy_retryable(ret, why, uobject)) { 684 + if (ret) { 686 685 atomic_inc(&xrcd->usecnt); 687 686 return ret; 688 687 } ··· 689 690 if (inode) 690 691 xrcd_table_delete(dev, inode); 691 692 692 - return ret; 693 + return 0; 693 694 } 694 695 695 696 static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) ··· 709 710 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) 710 711 return -EINVAL; 711 712 712 - ret = ib_check_mr_access(cmd.access_flags); 713 - if (ret) 714 - return ret; 715 - 716 713 uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); 717 714 if (IS_ERR(uobj)) 718 715 return PTR_ERR(uobj); 716 + 717 + ret = ib_check_mr_access(ib_dev, cmd.access_flags); 718 + if (ret) 719 + goto err_free; 719 720 720 721 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); 721 722 if (!pd) { 722 723 ret = -EINVAL; 723 724 goto err_free; 724 - } 725 - 726 - if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { 727 - if (!(pd->device->attrs.device_cap_flags & 728 - IB_DEVICE_ON_DEMAND_PAGING)) { 729 - pr_debug("ODP support not available\n"); 730 - ret = -EINVAL; 731 - goto err_put; 732 - } 733 725 } 734 726 735 727 mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, ··· 764 774 { 765 775 struct ib_uverbs_rereg_mr cmd; 766 776 struct ib_uverbs_rereg_mr_resp resp; 767 - struct ib_pd *pd = NULL; 768 777 struct ib_mr *mr; 769 - struct ib_pd *old_pd; 770 778 int ret; 771 779 struct ib_uobject *uobj; 780 + struct ib_uobject *new_uobj; 781 + struct ib_device *ib_dev; 782 + struct ib_pd *orig_pd; 783 + struct ib_pd *new_pd; 784 + struct ib_mr *new_mr; 772 785 773 786 ret = uverbs_request(attrs, &cmd, sizeof(cmd)); 774 787 if (ret) 775 788 return ret; 776 789 777 - if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) 790 + if (!cmd.flags) 778 791 return -EINVAL; 779 792 793 + if (cmd.flags & ~IB_MR_REREG_SUPPORTED) 794 + return -EOPNOTSUPP; 795 + 780 796 if ((cmd.flags & IB_MR_REREG_TRANS) && 781 - (!cmd.start || !cmd.hca_va || 0 >= cmd.length || 782 - (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) 783 - return -EINVAL; 797 + (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) 798 + return -EINVAL; 784 799 785 800 uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs); 786 801 if (IS_ERR(uobj)) ··· 799 804 } 800 805 801 806 if (cmd.flags & IB_MR_REREG_ACCESS) { 802 - ret = ib_check_mr_access(cmd.access_flags); 807 + ret = ib_check_mr_access(mr->device, cmd.access_flags); 803 808 if (ret) 804 809 goto put_uobjs; 805 810 } 806 811 812 + orig_pd = mr->pd; 807 813 if (cmd.flags & IB_MR_REREG_PD) { 808 - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, 809 - attrs); 810 - if (!pd) { 814 + new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, 815 + attrs); 816 + if (!new_pd) { 811 817 ret = -EINVAL; 812 818 goto put_uobjs; 813 819 } 820 + } else { 821 + new_pd = mr->pd; 814 822 } 815 823 816 - old_pd = mr->pd; 817 - ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start, 818 - cmd.length, cmd.hca_va, 819 - cmd.access_flags, pd, 820 - &attrs->driver_udata); 821 - if (ret) 824 + /* 825 + * The driver might create a new HW object as part of the rereg, we need 826 + * to have a uobject ready to hold it. 827 + */ 828 + new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); 829 + if (IS_ERR(new_uobj)) { 830 + ret = PTR_ERR(new_uobj); 822 831 goto put_uobj_pd; 823 - 824 - if (cmd.flags & IB_MR_REREG_PD) { 825 - atomic_inc(&pd->usecnt); 826 - mr->pd = pd; 827 - atomic_dec(&old_pd->usecnt); 828 832 } 829 833 830 - if (cmd.flags & IB_MR_REREG_TRANS) 831 - mr->iova = cmd.hca_va; 834 + new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length, 835 + cmd.hca_va, cmd.access_flags, new_pd, 836 + &attrs->driver_udata); 837 + if (IS_ERR(new_mr)) { 838 + ret = PTR_ERR(new_mr); 839 + goto put_new_uobj; 840 + } 841 + if (new_mr) { 842 + new_mr->device = new_pd->device; 843 + new_mr->pd = new_pd; 844 + new_mr->type = IB_MR_TYPE_USER; 845 + new_mr->dm = NULL; 846 + new_mr->sig_attrs = NULL; 847 + new_mr->uobject = uobj; 848 + atomic_inc(&new_pd->usecnt); 849 + new_mr->iova = cmd.hca_va; 850 + new_uobj->object = new_mr; 851 + 852 + rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR); 853 + rdma_restrack_set_name(&new_mr->res, NULL); 854 + rdma_restrack_add(&new_mr->res); 855 + 856 + /* 857 + * The new uobj for the new HW object is put into the same spot 858 + * in the IDR and the old uobj & HW object is deleted. 859 + */ 860 + rdma_assign_uobject(uobj, new_uobj, attrs); 861 + rdma_alloc_commit_uobject(new_uobj, attrs); 862 + uobj_put_destroy(uobj); 863 + new_uobj = NULL; 864 + uobj = NULL; 865 + mr = new_mr; 866 + } else { 867 + if (cmd.flags & IB_MR_REREG_PD) { 868 + atomic_dec(&orig_pd->usecnt); 869 + mr->pd = new_pd; 870 + atomic_inc(&new_pd->usecnt); 871 + } 872 + if (cmd.flags & IB_MR_REREG_TRANS) 873 + mr->iova = cmd.hca_va; 874 + } 832 875 833 876 memset(&resp, 0, sizeof(resp)); 834 877 resp.lkey = mr->lkey; ··· 874 841 875 842 ret = uverbs_response(attrs, &resp, sizeof(resp)); 876 843 844 + put_new_uobj: 845 + if (new_uobj) 846 + uobj_alloc_abort(new_uobj, attrs); 877 847 put_uobj_pd: 878 848 if (cmd.flags & IB_MR_REREG_PD) 879 - uobj_put_obj_read(pd); 849 + uobj_put_obj_read(new_pd); 880 850 881 851 put_uobjs: 882 - uobj_put_write(uobj); 852 + if (uobj) 853 + uobj_put_write(uobj); 883 854 884 855 return ret; 885 856 } ··· 1438 1401 if (cmd->qp_type == IB_QPT_XRC_TGT) 1439 1402 qp = ib_create_qp(pd, &attr); 1440 1403 else 1441 - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, 1442 - obj); 1404 + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, 1405 + NULL); 1443 1406 1444 1407 if (IS_ERR(qp)) { 1445 1408 ret = PTR_ERR(qp); ··· 1943 1906 if (ret) 1944 1907 return ret; 1945 1908 1946 - if (cmd.base.attr_mask & 1947 - ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) 1909 + if (cmd.base.attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 1948 1910 return -EOPNOTSUPP; 1949 1911 1950 1912 return modify_qp(attrs, &cmd); ··· 1965 1929 * Last bit is reserved for extending the attr_mask by 1966 1930 * using another field. 1967 1931 */ 1968 - BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31)); 1969 - 1970 - if (cmd.base.attr_mask & 1971 - ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) 1932 + if (cmd.base.attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) 1972 1933 return -EOPNOTSUPP; 1973 1934 1974 1935 ret = modify_qp(attrs, &cmd); ··· 3726 3693 ib_uverbs_create_ah, 3727 3694 UAPI_DEF_WRITE_UDATA_IO( 3728 3695 struct ib_uverbs_create_ah, 3729 - struct ib_uverbs_create_ah_resp), 3730 - UAPI_DEF_METHOD_NEEDS_FN(create_ah)), 3696 + struct ib_uverbs_create_ah_resp)), 3731 3697 DECLARE_UVERBS_WRITE( 3732 3698 IB_USER_VERBS_CMD_DESTROY_AH, 3733 3699 ib_uverbs_destroy_ah, 3734 - UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah), 3735 - UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))), 3700 + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah)), 3701 + UAPI_DEF_OBJ_NEEDS_FN(create_user_ah), 3702 + UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), 3736 3703 3737 3704 DECLARE_UVERBS_OBJECT( 3738 3705 UVERBS_OBJECT_COMP_CHANNEL, ··· 3786 3753 IB_USER_VERBS_EX_CMD_MODIFY_CQ, 3787 3754 ib_uverbs_ex_modify_cq, 3788 3755 UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq), 3789 - UAPI_DEF_METHOD_NEEDS_FN(create_cq))), 3756 + UAPI_DEF_METHOD_NEEDS_FN(modify_cq))), 3790 3757 3791 3758 DECLARE_UVERBS_OBJECT( 3792 3759 UVERBS_OBJECT_DEVICE, ··· 4032 3999 DECLARE_UVERBS_WRITE( 4033 4000 IB_USER_VERBS_CMD_CLOSE_XRCD, 4034 4001 ib_uverbs_close_xrcd, 4035 - UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd), 4036 - UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)), 4002 + UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd)), 4037 4003 DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP, 4038 4004 ib_uverbs_open_qp, 4039 4005 UAPI_DEF_WRITE_UDATA_IO( ··· 4042 4010 ib_uverbs_open_xrcd, 4043 4011 UAPI_DEF_WRITE_UDATA_IO( 4044 4012 struct ib_uverbs_open_xrcd, 4045 - struct ib_uverbs_open_xrcd_resp), 4046 - UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))), 4013 + struct ib_uverbs_open_xrcd_resp)), 4014 + UAPI_DEF_OBJ_NEEDS_FN(alloc_xrcd), 4015 + UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)), 4047 4016 4048 4017 {}, 4049 4018 };
+2 -2
drivers/infiniband/core/uverbs_main.c
··· 1046 1046 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1047 1047 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1048 1048 if (ib_dev) 1049 - ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev)); 1049 + ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev)); 1050 1050 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1051 1051 1052 1052 return ret; ··· 1065 1065 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1066 1066 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1067 1067 if (ib_dev) 1068 - ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); 1068 + ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); 1069 1069 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1070 1070 1071 1071 return ret;
+7 -11
drivers/infiniband/core/uverbs_std_types.c
··· 88 88 return -EBUSY; 89 89 90 90 ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl); 91 - if (ib_is_destroy_retryable(ret, why, uobject)) 91 + if (ret) 92 92 return ret; 93 93 94 94 for (i = 0; i < table_size; i++) ··· 96 96 97 97 kfree(rwq_ind_tbl); 98 98 kfree(ind_tbl); 99 - return ret; 99 + return 0; 100 100 } 101 101 102 102 static int uverbs_free_xrcd(struct ib_uobject *uobject, ··· 108 108 container_of(uobject, struct ib_uxrcd_object, uobject); 109 109 int ret; 110 110 111 - ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); 112 - if (ret) 113 - return ret; 111 + if (atomic_read(&uxrcd->refcnt)) 112 + return -EBUSY; 114 113 115 114 mutex_lock(&attrs->ufile->device->xrcd_tree_mutex); 116 115 ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs); ··· 123 124 struct uverbs_attr_bundle *attrs) 124 125 { 125 126 struct ib_pd *pd = uobject->object; 126 - int ret; 127 127 128 - ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); 129 - if (ret) 130 - return ret; 128 + if (atomic_read(&pd->usecnt)) 129 + return -EBUSY; 131 130 132 131 return ib_dealloc_pd_user(pd, &attrs->driver_udata); 133 132 } ··· 154 157 spin_unlock_irq(&event_queue->lock); 155 158 } 156 159 157 - static int 160 + static void 158 161 uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, 159 162 enum rdma_remove_reason why) 160 163 { ··· 163 166 uobj); 164 167 165 168 ib_uverbs_free_event_queue(&file->ev_queue); 166 - return 0; 167 169 } 168 170 169 171 int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
+2 -3
drivers/infiniband/core/uverbs_std_types_async_fd.c
··· 19 19 return 0; 20 20 } 21 21 22 - static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, 23 - enum rdma_remove_reason why) 22 + static void uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, 23 + enum rdma_remove_reason why) 24 24 { 25 25 struct ib_uverbs_async_event_file *event_file = 26 26 container_of(uobj, struct ib_uverbs_async_event_file, uobj); ··· 30 30 if (why == RDMA_REMOVE_DRIVER_REMOVE) 31 31 ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL, 32 32 NULL, NULL); 33 - return 0; 34 33 } 35 34 36 35 int uverbs_async_event_release(struct inode *inode, struct file *filp)
+2 -3
drivers/infiniband/core/uverbs_std_types_counters.c
··· 42 42 struct ib_counters *counters = uobject->object; 43 43 int ret; 44 44 45 - ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); 46 - if (ret) 47 - return ret; 45 + if (atomic_read(&counters->usecnt)) 46 + return -EBUSY; 48 47 49 48 ret = counters->device->ops.destroy_counters(counters); 50 49 if (ret)
+2 -2
drivers/infiniband/core/uverbs_std_types_cq.c
··· 46 46 int ret; 47 47 48 48 ret = ib_destroy_cq_user(cq, &attrs->driver_udata); 49 - if (ib_is_destroy_retryable(ret, why, uobject)) 49 + if (ret) 50 50 return ret; 51 51 52 52 ib_uverbs_release_ucq( ··· 55 55 ev_queue) : 56 56 NULL, 57 57 ucq); 58 - return ret; 58 + return 0; 59 59 } 60 60 61 61 static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
+5 -9
drivers/infiniband/core/uverbs_std_types_device.c
··· 317 317 struct ib_device *ib_dev; 318 318 size_t user_entry_size; 319 319 ssize_t num_entries; 320 - size_t max_entries; 321 - size_t num_bytes; 320 + int max_entries; 322 321 u32 flags; 323 322 int ret; 324 323 ··· 335 336 attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, 336 337 user_entry_size); 337 338 if (max_entries <= 0) 338 - return -EINVAL; 339 + return max_entries ?: -EINVAL; 339 340 340 341 ucontext = ib_uverbs_get_ucontext(attrs); 341 342 if (IS_ERR(ucontext)) 342 343 return PTR_ERR(ucontext); 343 344 ib_dev = ucontext->device; 344 345 345 - if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes)) 346 - return -EINVAL; 347 - 348 - entries = uverbs_zalloc(attrs, num_bytes); 349 - if (!entries) 350 - return -ENOMEM; 346 + entries = uverbs_kcalloc(attrs, max_entries, sizeof(*entries)); 347 + if (IS_ERR(entries)) 348 + return PTR_ERR(entries); 351 349 352 350 num_entries = rdma_query_gid_table(ib_dev, entries, max_entries); 353 351 if (num_entries < 0)
+2 -4
drivers/infiniband/core/uverbs_std_types_dm.c
··· 39 39 struct uverbs_attr_bundle *attrs) 40 40 { 41 41 struct ib_dm *dm = uobject->object; 42 - int ret; 43 42 44 - ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); 45 - if (ret) 46 - return ret; 43 + if (atomic_read(&dm->usecnt)) 44 + return -EBUSY; 47 45 48 46 return dm->device->ops.dealloc_dm(dm, attrs); 49 47 }
+2 -4
drivers/infiniband/core/uverbs_std_types_flow_action.c
··· 39 39 struct uverbs_attr_bundle *attrs) 40 40 { 41 41 struct ib_flow_action *action = uobject->object; 42 - int ret; 43 42 44 - ret = ib_destroy_usecnt(&action->usecnt, why, uobject); 45 - if (ret) 46 - return ret; 43 + if (atomic_read(&action->usecnt)) 44 + return -EBUSY; 47 45 48 46 return action->device->ops.destroy_flow_action(action); 49 47 }
+5 -1
drivers/infiniband/core/uverbs_std_types_mr.c
··· 33 33 #include "rdma_core.h" 34 34 #include "uverbs.h" 35 35 #include <rdma/uverbs_std_types.h> 36 + #include "restrack.h" 36 37 37 38 static int uverbs_free_mr(struct ib_uobject *uobject, 38 39 enum rdma_remove_reason why, ··· 115 114 if (!(attr.access_flags & IB_ZERO_BASED)) 116 115 return -EINVAL; 117 116 118 - ret = ib_check_mr_access(attr.access_flags); 117 + ret = ib_check_mr_access(ib_dev, attr.access_flags); 119 118 if (ret) 120 119 return ret; 121 120 ··· 135 134 atomic_inc(&pd->usecnt); 136 135 atomic_inc(&dm->usecnt); 137 136 137 + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); 138 + rdma_restrack_set_name(&mr->res, NULL); 139 + rdma_restrack_add(&mr->res); 138 140 uobj->object = mr; 139 141 140 142 uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
+4 -4
drivers/infiniband/core/uverbs_std_types_qp.c
··· 32 32 } 33 33 34 34 ret = ib_destroy_qp_user(qp, &attrs->driver_udata); 35 - if (ib_is_destroy_retryable(ret, why, uobject)) 35 + if (ret) 36 36 return ret; 37 37 38 38 if (uqp->uxrcd) 39 39 atomic_dec(&uqp->uxrcd->refcnt); 40 40 41 41 ib_uverbs_release_uevent(&uqp->uevent); 42 - return ret; 42 + return 0; 43 43 } 44 44 45 45 static int check_creation_flags(enum ib_qp_type qp_type, ··· 251 251 if (attr.qp_type == IB_QPT_XRC_TGT) 252 252 qp = ib_create_qp(pd, &attr); 253 253 else 254 - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, 255 - obj); 254 + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, 255 + NULL); 256 256 257 257 if (IS_ERR(qp)) { 258 258 ret = PTR_ERR(qp);
+2 -2
drivers/infiniband/core/uverbs_std_types_srq.c
··· 18 18 int ret; 19 19 20 20 ret = ib_destroy_srq_user(srq, &attrs->driver_udata); 21 - if (ib_is_destroy_retryable(ret, why, uobject)) 21 + if (ret) 22 22 return ret; 23 23 24 24 if (srq_type == IB_SRQT_XRC) { ··· 30 30 } 31 31 32 32 ib_uverbs_release_uevent(uevent); 33 - return ret; 33 + return 0; 34 34 } 35 35 36 36 static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)(
+2 -2
drivers/infiniband/core/uverbs_std_types_wq.c
··· 17 17 int ret; 18 18 19 19 ret = ib_destroy_wq_user(wq, &attrs->driver_udata); 20 - if (ib_is_destroy_retryable(ret, why, uobject)) 20 + if (ret) 21 21 return ret; 22 22 23 23 ib_uverbs_release_uevent(&uwq->uevent); 24 - return ret; 24 + return 0; 25 25 } 26 26 27 27 static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)(
+1 -4
drivers/infiniband/core/uverbs_uapi.c
··· 79 79 80 80 method_elm->is_ex = def->write.is_ex; 81 81 method_elm->handler = def->func_write; 82 - if (def->write.is_ex) 83 - method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask & 84 - BIT_ULL(def->write.command_num)); 85 - else 82 + if (!def->write.is_ex) 86 83 method_elm->disabled = !(ibdev->uverbs_cmd_mask & 87 84 BIT_ULL(def->write.command_num)); 88 85
+17 -10
drivers/infiniband/core/verbs.c
··· 244 244 /* Protection domains */ 245 245 246 246 /** 247 - * ib_alloc_pd - Allocates an unused protection domain. 247 + * __ib_alloc_pd - Allocates an unused protection domain. 248 248 * @device: The device on which to allocate the protection domain. 249 249 * @flags: protection domain flags 250 250 * @caller: caller's build-time module name ··· 516 516 517 517 might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); 518 518 519 - if (!device->ops.create_ah) 519 + if (!udata && !device->ops.create_ah) 520 520 return ERR_PTR(-EOPNOTSUPP); 521 521 522 522 ah = rdma_zalloc_drv_obj_gfp( ··· 533 533 init_attr.flags = flags; 534 534 init_attr.xmit_slave = xmit_slave; 535 535 536 - ret = device->ops.create_ah(ah, &init_attr, udata); 536 + if (udata) 537 + ret = device->ops.create_user_ah(ah, &init_attr, udata); 538 + else 539 + ret = device->ops.create_ah(ah, &init_attr, NULL); 537 540 if (ret) { 538 541 kfree(ah); 539 542 return ERR_PTR(ret); ··· 1191 1188 } 1192 1189 1193 1190 /** 1194 - * ib_create_qp - Creates a kernel QP associated with the specified protection 1191 + * ib_create_named_qp - Creates a kernel QP associated with the specified protection 1195 1192 * domain. 1196 1193 * @pd: The protection domain associated with the QP. 1197 1194 * @qp_init_attr: A list of initial attributes required to create the 1198 1195 * QP. If QP creation succeeds, then the attributes are updated to 1199 1196 * the actual capabilities of the created QP. 1197 + * @caller: caller's build-time module name 1200 1198 * 1201 1199 * NOTE: for user qp use ib_create_qp_user with valid udata! 1202 1200 */ 1203 - struct ib_qp *ib_create_qp(struct ib_pd *pd, 1204 - struct ib_qp_init_attr *qp_init_attr) 1201 + struct ib_qp *ib_create_named_qp(struct ib_pd *pd, 1202 + struct ib_qp_init_attr *qp_init_attr, 1203 + const char *caller) 1205 1204 { 1206 1205 struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; 1207 1206 struct ib_qp *qp; ··· 1228 1223 if (qp_init_attr->cap.max_rdma_ctxs) 1229 1224 rdma_rw_init_qp(device, qp_init_attr); 1230 1225 1231 - qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); 1226 + qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL, caller); 1232 1227 if (IS_ERR(qp)) 1233 1228 return qp; 1234 1229 ··· 1294 1289 return ERR_PTR(ret); 1295 1290 1296 1291 } 1297 - EXPORT_SYMBOL(ib_create_qp); 1292 + EXPORT_SYMBOL(ib_create_named_qp); 1298 1293 1299 1294 static const struct { 1300 1295 int valid; ··· 1667 1662 qp->qp_type == IB_QPT_XRC_TGT); 1668 1663 } 1669 1664 1670 - /** 1665 + /* 1671 1666 * IB core internal function to perform QP attributes modification. 1672 1667 */ 1673 1668 static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, ··· 1703 1698 slave = rdma_lag_get_ah_roce_slave(qp->device, 1704 1699 &attr->ah_attr, 1705 1700 GFP_KERNEL); 1706 - if (IS_ERR(slave)) 1701 + if (IS_ERR(slave)) { 1702 + ret = PTR_ERR(slave); 1707 1703 goto out_av; 1704 + } 1708 1705 attr->xmit_slave = slave; 1709 1706 } 1710 1707 }
+12 -3
drivers/infiniband/hw/bnxt_re/ib_verbs.c
··· 1271 1271 } 1272 1272 qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); 1273 1273 qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */ 1274 - if (init_attr->create_flags) 1274 + if (init_attr->create_flags) { 1275 1275 ibdev_dbg(&rdev->ibdev, 1276 1276 "QP create flags 0x%x not supported", 1277 1277 init_attr->create_flags); 1278 + return -EOPNOTSUPP; 1279 + } 1278 1280 1279 1281 /* Setup CQs */ 1280 1282 if (init_attr->send_cq) { ··· 1659 1657 srq->qplib_srq.max_wqe = entries; 1660 1658 1661 1659 srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; 1662 - srq->qplib_srq.wqe_size = 1663 - bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); 1660 + /* 128 byte wqe size for SRQ . So use max sges */ 1661 + srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges); 1664 1662 srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; 1665 1663 srq->srq_limit = srq_init_attr->attr.srq_limit; 1666 1664 srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; ··· 1830 1828 int rc, entries; 1831 1829 unsigned int flags; 1832 1830 u8 nw_type; 1831 + 1832 + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 1833 + return -EOPNOTSUPP; 1833 1834 1834 1835 qp->qplib_qp.modify_flags = 0; 1835 1836 if (qp_attr_mask & IB_QP_STATE) { ··· 2083 2078 goto out; 2084 2079 } 2085 2080 qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); 2081 + qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state); 2086 2082 qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0; 2087 2083 qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access); 2088 2084 qp_attr->pkey_index = qplib_qp->pkey_index; ··· 2832 2826 int cqe = attr->cqe; 2833 2827 struct bnxt_qplib_nq *nq = NULL; 2834 2828 unsigned int nq_alloc_cnt; 2829 + 2830 + if (attr->flags) 2831 + return -EOPNOTSUPP; 2835 2832 2836 2833 /* Validate CQ fields */ 2837 2834 if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
+3 -31
drivers/infiniband/hw/bnxt_re/main.c
··· 608 608 struct bnxt_re_dev *rdev = 609 609 rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); 610 610 611 - return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); 611 + return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor); 612 612 } 613 613 static DEVICE_ATTR_RO(hw_rev); 614 614 ··· 618 618 struct bnxt_re_dev *rdev = 619 619 rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); 620 620 621 - return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); 621 + return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc); 622 622 } 623 623 static DEVICE_ATTR_RO(hca_type); 624 624 ··· 646 646 .create_cq = bnxt_re_create_cq, 647 647 .create_qp = bnxt_re_create_qp, 648 648 .create_srq = bnxt_re_create_srq, 649 + .create_user_ah = bnxt_re_create_ah, 649 650 .dealloc_driver = bnxt_re_dealloc_driver, 650 651 .dealloc_pd = bnxt_re_dealloc_pd, 651 652 .dealloc_ucontext = bnxt_re_dealloc_ucontext, ··· 701 700 ibdev->num_comp_vectors = rdev->num_msix - 1; 702 701 ibdev->dev.parent = &rdev->en_dev->pdev->dev; 703 702 ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; 704 - 705 - /* User space */ 706 - ibdev->uverbs_cmd_mask = 707 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 708 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 709 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 710 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 711 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 712 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 713 - (1ull << IB_USER_VERBS_CMD_REREG_MR) | 714 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 715 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 716 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 717 - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 718 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 719 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 720 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 721 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 722 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 723 - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 724 - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 725 - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 726 - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 727 - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 728 - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | 729 - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | 730 - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); 731 - /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ 732 - 733 703 734 704 rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); 735 705 ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
+1 -1
drivers/infiniband/hw/bnxt_re/qplib_sp.c
··· 118 118 * 128 WQEs needs to be reserved for the HW (8916). Prevent 119 119 * reporting the max number 120 120 */ 121 - attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS; 121 + attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; 122 122 attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ? 123 123 6 : sb->max_sge; 124 124 attr->max_cq = le32_to_cpu(sb->max_cq);
+3
drivers/infiniband/hw/cxgb4/cq.c
··· 1006 1006 1007 1007 pr_debug("ib_dev %p entries %d\n", ibdev, entries); 1008 1008 if (attr->flags) 1009 + return -EOPNOTSUPP; 1010 + 1011 + if (entries < 1 || entries > ibdev->attrs.max_cqe) 1009 1012 return -EINVAL; 1010 1013 1011 1014 if (vector >= rhp->rdev.lldi.nciq)
-2
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
··· 983 983 u32 max_num_sg); 984 984 int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 985 985 unsigned int *sg_offset); 986 - int c4iw_dealloc_mw(struct ib_mw *mw); 987 986 void c4iw_dealloc(struct uld_ctx *ctx); 988 - int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); 989 987 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, 990 988 u64 length, u64 virt, int acc, 991 989 struct ib_udata *udata);
-84
drivers/infiniband/hw/cxgb4/mem.c
··· 365 365 pbl_size, pbl_addr, skb, wr_waitp); 366 366 } 367 367 368 - static int allocate_window(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, 369 - struct c4iw_wr_wait *wr_waitp) 370 - { 371 - *stag = T4_STAG_UNSET; 372 - return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, 373 - 0UL, 0, 0, 0, 0, NULL, wr_waitp); 374 - } 375 - 376 - static int deallocate_window(struct c4iw_rdev *rdev, u32 stag, 377 - struct sk_buff *skb, 378 - struct c4iw_wr_wait *wr_waitp) 379 - { 380 - return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, 381 - 0, skb, wr_waitp); 382 - } 383 - 384 368 static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, 385 369 u32 pbl_size, u32 pbl_addr, 386 370 struct c4iw_wr_wait *wr_waitp) ··· 593 609 err_free_mhp: 594 610 kfree(mhp); 595 611 return ERR_PTR(err); 596 - } 597 - 598 - int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) 599 - { 600 - struct c4iw_mw *mhp = to_c4iw_mw(ibmw); 601 - struct c4iw_dev *rhp; 602 - struct c4iw_pd *php; 603 - u32 mmid; 604 - u32 stag = 0; 605 - int ret; 606 - 607 - if (ibmw->type != IB_MW_TYPE_1) 608 - return -EINVAL; 609 - 610 - php = to_c4iw_pd(ibmw->pd); 611 - rhp = php->rhp; 612 - mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); 613 - if (!mhp->wr_waitp) 614 - return -ENOMEM; 615 - 616 - mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); 617 - if (!mhp->dereg_skb) { 618 - ret = -ENOMEM; 619 - goto free_wr_wait; 620 - } 621 - 622 - ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp); 623 - if (ret) 624 - goto free_skb; 625 - 626 - mhp->rhp = rhp; 627 - mhp->attr.pdid = php->pdid; 628 - mhp->attr.type = FW_RI_STAG_MW; 629 - mhp->attr.stag = stag; 630 - mmid = (stag) >> 8; 631 - ibmw->rkey = stag; 632 - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { 633 - ret = -ENOMEM; 634 - goto dealloc_win; 635 - } 636 - pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); 637 - return 0; 638 - 639 - dealloc_win: 640 - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, 641 - mhp->wr_waitp); 642 - free_skb: 643 - kfree_skb(mhp->dereg_skb); 644 - free_wr_wait: 645 - c4iw_put_wr_wait(mhp->wr_waitp); 646 - return ret; 647 - } 648 - 649 - int c4iw_dealloc_mw(struct ib_mw *mw) 650 - { 651 - struct c4iw_dev *rhp; 652 - struct c4iw_mw *mhp; 653 - u32 mmid; 654 - 655 - mhp = to_c4iw_mw(mw); 656 - rhp = mhp->rhp; 657 - mmid = (mw->rkey) >> 8; 658 - xa_erase_irq(&rhp->mrs, mmid); 659 - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, 660 - mhp->wr_waitp); 661 - kfree_skb(mhp->dereg_skb); 662 - c4iw_put_wr_wait(mhp->wr_waitp); 663 - return 0; 664 612 } 665 613 666 614 struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+6 -29
drivers/infiniband/hw/cxgb4/provider.c
··· 322 322 rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); 323 323 324 324 pr_debug("dev 0x%p\n", dev); 325 - return sprintf(buf, "%d\n", 326 - CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); 325 + return sysfs_emit( 326 + buf, "%d\n", 327 + CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); 327 328 } 328 329 static DEVICE_ATTR_RO(hw_rev); 329 330 ··· 338 337 339 338 pr_debug("dev 0x%p\n", dev); 340 339 lldev->ethtool_ops->get_drvinfo(lldev, &info); 341 - return sprintf(buf, "%s\n", info.driver); 340 + return sysfs_emit(buf, "%s\n", info.driver); 342 341 } 343 342 static DEVICE_ATTR_RO(hca_type); 344 343 ··· 349 348 rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); 350 349 351 350 pr_debug("dev 0x%p\n", dev); 352 - return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, 353 - c4iw_dev->rdev.lldi.pdev->device); 351 + return sysfs_emit(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, 352 + c4iw_dev->rdev.lldi.pdev->device); 354 353 } 355 354 static DEVICE_ATTR_RO(board_id); 356 355 ··· 457 456 458 457 .alloc_hw_stats = c4iw_alloc_stats, 459 458 .alloc_mr = c4iw_alloc_mr, 460 - .alloc_mw = c4iw_alloc_mw, 461 459 .alloc_pd = c4iw_allocate_pd, 462 460 .alloc_ucontext = c4iw_alloc_ucontext, 463 461 .create_cq = c4iw_create_cq, 464 462 .create_qp = c4iw_create_qp, 465 463 .create_srq = c4iw_create_srq, 466 - .dealloc_mw = c4iw_dealloc_mw, 467 464 .dealloc_pd = c4iw_deallocate_pd, 468 465 .dealloc_ucontext = c4iw_dealloc_ucontext, 469 466 .dereg_mr = c4iw_dereg_mr, ··· 532 533 if (fastreg_support) 533 534 dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 534 535 dev->ibdev.local_dma_lkey = 0; 535 - dev->ibdev.uverbs_cmd_mask = 536 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 537 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 538 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 539 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 540 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 541 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 542 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 543 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 544 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 545 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 546 - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 547 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 548 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 549 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 550 - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 551 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 552 - (1ull << IB_USER_VERBS_CMD_POST_SEND) | 553 - (1ull << IB_USER_VERBS_CMD_POST_RECV) | 554 - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 555 - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 556 - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); 557 536 dev->ibdev.node_type = RDMA_NODE_RNIC; 558 537 BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); 559 538 memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
+7 -1
drivers/infiniband/hw/cxgb4/qp.c
··· 2126 2126 2127 2127 pr_debug("ib_pd %p\n", pd); 2128 2128 2129 - if (attrs->qp_type != IB_QPT_RC) 2129 + if (attrs->qp_type != IB_QPT_RC || attrs->create_flags) 2130 2130 return ERR_PTR(-EOPNOTSUPP); 2131 2131 2132 2132 php = to_c4iw_pd(pd); ··· 2373 2373 struct c4iw_qp_attributes attrs = {}; 2374 2374 2375 2375 pr_debug("ib_qp %p\n", ibqp); 2376 + 2377 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 2378 + return -EOPNOTSUPP; 2376 2379 2377 2380 /* iwarp does not support the RTR state */ 2378 2381 if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) ··· 2682 2679 int rqsize; 2683 2680 int ret; 2684 2681 int wr_len; 2682 + 2683 + if (attrs->srq_type != IB_SRQT_BASIC) 2684 + return -EOPNOTSUPP; 2685 2685 2686 2686 pr_debug("%s ib_pd %p\n", __func__, pd); 2687 2687
+3 -31
drivers/infiniband/hw/efa/efa_main.c
··· 245 245 .alloc_hw_stats = efa_alloc_hw_stats, 246 246 .alloc_pd = efa_alloc_pd, 247 247 .alloc_ucontext = efa_alloc_ucontext, 248 - .create_ah = efa_create_ah, 249 248 .create_cq = efa_create_cq, 250 249 .create_qp = efa_create_qp, 250 + .create_user_ah = efa_create_ah, 251 251 .dealloc_pd = efa_dealloc_pd, 252 252 .dealloc_ucontext = efa_dealloc_ucontext, 253 253 .dereg_mr = efa_dereg_mr, ··· 307 307 dev->ibdev.phys_port_cnt = 1; 308 308 dev->ibdev.num_comp_vectors = 1; 309 309 dev->ibdev.dev.parent = &pdev->dev; 310 - 311 - dev->ibdev.uverbs_cmd_mask = 312 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 313 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 314 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 315 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 316 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 317 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 318 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 319 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 320 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 321 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 322 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 323 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 324 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 325 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 326 - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 327 - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); 328 - 329 - dev->ibdev.uverbs_ex_cmd_mask = 330 - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); 331 310 332 311 ib_set_device_ops(&dev->ibdev, &efa_dev_ops); 333 312 ··· 384 405 return err; 385 406 } 386 407 387 - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); 408 + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_width)); 388 409 if (err) { 389 - dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err); 410 + dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", err); 390 411 return err; 391 412 } 392 413 393 - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); 394 - if (err) { 395 - dev_err(&pdev->dev, 396 - "err_pci_set_consistent_dma_mask failed %d\n", 397 - err); 398 - return err; 399 - } 400 414 dma_set_max_seg_size(&pdev->dev, UINT_MAX); 401 415 return 0; 402 416 }
+6
drivers/infiniband/hw/efa/efa_verbs.c
··· 917 917 enum ib_qp_state new_state; 918 918 int err; 919 919 920 + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 921 + return -EOPNOTSUPP; 922 + 920 923 if (udata->inlen && 921 924 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 922 925 ibdev_dbg(&dev->ibdev, ··· 1031 1028 int err; 1032 1029 1033 1030 ibdev_dbg(ibdev, "create_cq entries %d\n", entries); 1031 + 1032 + if (attr->flags) 1033 + return -EOPNOTSUPP; 1034 1034 1035 1035 if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { 1036 1036 ibdev_dbg(ibdev,
+1
drivers/infiniband/hw/hfi1/qp.c
··· 339 339 return -EINVAL; 340 340 if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) 341 341 return -EINVAL; 342 + break; 342 343 default: 343 344 break; 344 345 }
+28 -32
drivers/infiniband/hw/hfi1/sysfs.c
··· 151 151 152 152 static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf) 153 153 { 154 - return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); 154 + return sysfs_emit(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); 155 155 } 156 156 157 157 static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf, ··· 296 296 container_of(kobj, struct hfi1_pportdata, sc2vl_kobj); 297 297 struct hfi1_devdata *dd = ppd->dd; 298 298 299 - return sprintf(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); 299 + return sysfs_emit(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); 300 300 } 301 301 302 302 static const struct sysfs_ops hfi1_sc2vl_ops = { ··· 401 401 container_of(kobj, struct hfi1_pportdata, sl2sc_kobj); 402 402 struct hfi1_ibport *ibp = &ppd->ibport_data; 403 403 404 - return sprintf(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); 404 + return sysfs_emit(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); 405 405 } 406 406 407 407 static const struct sysfs_ops hfi1_sl2sc_ops = { ··· 475 475 container_of(kobj, struct hfi1_pportdata, vl2mtu_kobj); 476 476 struct hfi1_devdata *dd = ppd->dd; 477 477 478 - return sprintf(buf, "%u\n", dd->vld[vlattr->vl].mtu); 478 + return sysfs_emit(buf, "%u\n", dd->vld[vlattr->vl].mtu); 479 479 } 480 480 481 481 static const struct sysfs_ops hfi1_vl2mtu_ops = { ··· 500 500 struct hfi1_ibdev *dev = 501 501 rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); 502 502 503 - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); 503 + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); 504 504 } 505 505 static DEVICE_ATTR_RO(hw_rev); 506 506 ··· 510 510 struct hfi1_ibdev *dev = 511 511 rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); 512 512 struct hfi1_devdata *dd = dd_from_dev(dev); 513 - int ret; 514 513 515 514 if (!dd->boardname) 516 - ret = -EINVAL; 517 - else 518 - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); 519 - return ret; 515 + return -EINVAL; 516 + 517 + return sysfs_emit(buf, "%s\n", dd->boardname); 520 518 } 521 519 static DEVICE_ATTR_RO(board_id); 522 520 ··· 526 528 struct hfi1_devdata *dd = dd_from_dev(dev); 527 529 528 530 /* The string printed here is already newline-terminated. */ 529 - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); 531 + return sysfs_emit(buf, "%s", dd->boardversion); 530 532 } 531 533 static DEVICE_ATTR_RO(boardversion); 532 534 ··· 543 545 * and a receive context, so returning the smaller of the two counts 544 546 * give a more accurate picture of total contexts available. 545 547 */ 546 - return scnprintf(buf, PAGE_SIZE, "%u\n", 547 - min(dd->num_user_contexts, 548 - (u32)dd->sc_sizes[SC_USER].count)); 548 + return sysfs_emit(buf, "%u\n", 549 + min(dd->num_user_contexts, 550 + (u32)dd->sc_sizes[SC_USER].count)); 549 551 } 550 552 static DEVICE_ATTR_RO(nctxts); 551 553 ··· 557 559 struct hfi1_devdata *dd = dd_from_dev(dev); 558 560 559 561 /* Return the number of free user ports (contexts) available. */ 560 - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); 562 + return sysfs_emit(buf, "%u\n", dd->freectxts); 561 563 } 562 564 static DEVICE_ATTR_RO(nfreectxts); 563 565 ··· 568 570 rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); 569 571 struct hfi1_devdata *dd = dd_from_dev(dev); 570 572 571 - return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); 573 + /* dd->serial is already newline terminated in chip.c */ 574 + return sysfs_emit(buf, "%s", dd->serial); 572 575 } 573 576 static DEVICE_ATTR_RO(serial); 574 577 ··· 597 598 * Convert the reported temperature from an integer (reported in 598 599 * units of 0.25C) to a floating point number. 599 600 */ 600 - #define temp2str(temp, buf, size, idx) \ 601 - scnprintf((buf) + (idx), (size) - (idx), "%u.%02u ", \ 602 - ((temp) >> 2), ((temp) & 0x3) * 25) 601 + #define temp_d(t) ((t) >> 2) 602 + #define temp_f(t) (((t)&0x3) * 25u) 603 603 604 604 /* 605 605 * Dump tempsense values, in decimal, to ease shell-scripts. ··· 613 615 int ret; 614 616 615 617 ret = hfi1_tempsense_rd(dd, &temp); 616 - if (!ret) { 617 - int idx = 0; 618 + if (ret) 619 + return ret; 618 620 619 - idx += temp2str(temp.curr, buf, PAGE_SIZE, idx); 620 - idx += temp2str(temp.lo_lim, buf, PAGE_SIZE, idx); 621 - idx += temp2str(temp.hi_lim, buf, PAGE_SIZE, idx); 622 - idx += temp2str(temp.crit_lim, buf, PAGE_SIZE, idx); 623 - idx += scnprintf(buf + idx, PAGE_SIZE - idx, 624 - "%u %u %u\n", temp.triggers & 0x1, 625 - temp.triggers & 0x2, temp.triggers & 0x4); 626 - ret = idx; 627 - } 628 - return ret; 621 + return sysfs_emit(buf, "%u.%02u %u.%02u %u.%02u %u.%02u %u %u %u\n", 622 + temp_d(temp.curr), temp_f(temp.curr), 623 + temp_d(temp.lo_lim), temp_f(temp.lo_lim), 624 + temp_d(temp.hi_lim), temp_f(temp.hi_lim), 625 + temp_d(temp.crit_lim), temp_f(temp.crit_lim), 626 + temp.triggers & 0x1, 627 + temp.triggers & 0x2, 628 + temp.triggers & 0x4); 629 629 } 630 630 static DEVICE_ATTR_RO(tempsense); 631 631 ··· 813 817 if (vl < 0) 814 818 return vl; 815 819 816 - return snprintf(buf, PAGE_SIZE, "%d\n", vl); 820 + return sysfs_emit(buf, "%d\n", vl); 817 821 } 818 822 819 823 static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO,
+5
drivers/infiniband/hw/hfi1/tid_rdma.c
··· 2826 2826 default: 2827 2827 break; 2828 2828 } 2829 + break; 2829 2830 default: 2830 2831 break; 2831 2832 } ··· 3006 3005 default: 3007 3006 break; 3008 3007 } 3008 + break; 3009 3009 default: 3010 3010 break; 3011 3011 } ··· 3223 3221 req = wqe_to_tid_req(prev); 3224 3222 if (req->ack_seg != req->total_segs) 3225 3223 goto interlock; 3224 + break; 3226 3225 default: 3227 3226 break; 3228 3227 } ··· 3242 3239 req = wqe_to_tid_req(prev); 3243 3240 if (req->ack_seg != req->total_segs) 3244 3241 goto interlock; 3242 + break; 3245 3243 default: 3246 3244 break; 3247 3245 } 3246 + break; 3248 3247 default: 3249 3248 break; 3250 3249 }
+23 -32
drivers/infiniband/hw/hns/hns_roce_ah.c
··· 31 31 */ 32 32 33 33 #include <linux/platform_device.h> 34 + #include <linux/pci.h> 34 35 #include <rdma/ib_addr.h> 35 36 #include <rdma/ib_cache.h> 36 37 #include "hns_roce_device.h" 37 - 38 - #define HNS_ROCE_PORT_NUM_SHIFT 24 39 - #define HNS_ROCE_VLAN_SL_BIT_MASK 7 40 - #define HNS_ROCE_VLAN_SL_SHIFT 13 41 38 42 39 static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) 43 40 { ··· 55 58 int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, 56 59 struct ib_udata *udata) 57 60 { 58 - struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); 59 - const struct ib_gid_attr *gid_attr; 60 - struct device *dev = hr_dev->dev; 61 - struct hns_roce_ah *ah = to_hr_ah(ibah); 62 61 struct rdma_ah_attr *ah_attr = init_attr->ah_attr; 63 62 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); 64 - u16 vlan_id = 0xffff; 65 - bool vlan_en = false; 66 - int ret; 63 + struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); 64 + struct hns_roce_ah *ah = to_hr_ah(ibah); 65 + int ret = 0; 67 66 68 - gid_attr = ah_attr->grh.sgid_attr; 69 - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); 70 - if (ret) 71 - return ret; 72 - 73 - /* Get mac address */ 74 - memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); 75 - 76 - if (vlan_id < VLAN_N_VID) { 77 - vlan_en = true; 78 - vlan_id |= (rdma_ah_get_sl(ah_attr) & 79 - HNS_ROCE_VLAN_SL_BIT_MASK) << 80 - HNS_ROCE_VLAN_SL_SHIFT; 81 - } 67 + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && udata) 68 + return -EOPNOTSUPP; 82 69 83 70 ah->av.port = rdma_ah_get_port_num(ah_attr); 84 71 ah->av.gid_index = grh->sgid_index; 85 - ah->av.vlan_id = vlan_id; 86 - ah->av.vlan_en = vlan_en; 87 - dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, 88 - ah->av.vlan_id); 89 72 90 73 if (rdma_ah_get_static_rate(ah_attr)) 91 74 ah->av.stat_rate = IB_RATE_10_GBPS; 92 75 93 - memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); 94 - ah->av.sl = rdma_ah_get_sl(ah_attr); 76 + ah->av.hop_limit = grh->hop_limit; 95 77 ah->av.flowlabel = grh->flow_label; 96 78 ah->av.udp_sport = get_ah_udp_sport(ah_attr); 79 + ah->av.sl = rdma_ah_get_sl(ah_attr); 80 + ah->av.tclass = get_tclass(grh); 97 81 98 - return 0; 82 + memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); 83 + memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); 84 + 85 + /* HIP08 needs to record vlan info in Address Vector */ 86 + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { 87 + ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, 88 + &ah->av.vlan_id, NULL); 89 + if (ret) 90 + return ret; 91 + 92 + ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID; 93 + } 94 + 95 + return ret; 99 96 } 100 97 101 98 int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+76 -56
drivers/infiniband/hw/hns/hns_roce_alloc.c
··· 159 159 160 160 void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) 161 161 { 162 - struct device *dev = hr_dev->dev; 163 - u32 size = buf->size; 164 - int i; 162 + struct hns_roce_buf_list *trunks; 163 + u32 i; 165 164 166 - if (size == 0) 165 + if (!buf) 167 166 return; 168 167 169 - buf->size = 0; 168 + trunks = buf->trunk_list; 169 + if (trunks) { 170 + buf->trunk_list = NULL; 171 + for (i = 0; i < buf->ntrunks; i++) 172 + dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift, 173 + trunks[i].buf, trunks[i].map); 170 174 171 - if (hns_roce_buf_is_direct(buf)) { 172 - dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); 173 - } else { 174 - for (i = 0; i < buf->npages; ++i) 175 - if (buf->page_list[i].buf) 176 - dma_free_coherent(dev, 1 << buf->page_shift, 177 - buf->page_list[i].buf, 178 - buf->page_list[i].map); 179 - kfree(buf->page_list); 180 - buf->page_list = NULL; 175 + kfree(trunks); 181 176 } 177 + 178 + kfree(buf); 182 179 } 183 180 184 - int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, 185 - struct hns_roce_buf *buf, u32 page_shift) 181 + /* 182 + * Allocate the dma buffer for storing ROCEE table entries 183 + * 184 + * @size: required size 185 + * @page_shift: the unit size in a continuous dma address range 186 + * @flags: HNS_ROCE_BUF_ flags to control the allocation flow. 187 + */ 188 + struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, 189 + u32 page_shift, u32 flags) 186 190 { 187 - struct hns_roce_buf_list *buf_list; 188 - struct device *dev = hr_dev->dev; 189 - u32 page_size; 190 - int i; 191 + u32 trunk_size, page_size, alloced_size; 192 + struct hns_roce_buf_list *trunks; 193 + struct hns_roce_buf *buf; 194 + gfp_t gfp_flags; 195 + u32 ntrunk, i; 191 196 192 197 /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */ 193 - buf->page_shift = max_t(int, HNS_HW_PAGE_SHIFT, page_shift); 198 + if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT)) 199 + return ERR_PTR(-EINVAL); 194 200 201 + gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL; 202 + buf = kzalloc(sizeof(*buf), gfp_flags); 203 + if (!buf) 204 + return ERR_PTR(-ENOMEM); 205 + 206 + buf->page_shift = page_shift; 195 207 page_size = 1 << buf->page_shift; 196 - buf->npages = DIV_ROUND_UP(size, page_size); 197 208 198 - /* required size is not bigger than one trunk size */ 199 - if (size <= max_direct) { 200 - buf->page_list = NULL; 201 - buf->direct.buf = dma_alloc_coherent(dev, size, 202 - &buf->direct.map, 203 - GFP_KERNEL); 204 - if (!buf->direct.buf) 205 - return -ENOMEM; 209 + /* Calc the trunk size and num by required size and page_shift */ 210 + if (flags & HNS_ROCE_BUF_DIRECT) { 211 + buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE)); 212 + ntrunk = 1; 206 213 } else { 207 - buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL); 208 - if (!buf_list) 209 - return -ENOMEM; 210 - 211 - for (i = 0; i < buf->npages; i++) { 212 - buf_list[i].buf = dma_alloc_coherent(dev, page_size, 213 - &buf_list[i].map, 214 - GFP_KERNEL); 215 - if (!buf_list[i].buf) 216 - break; 217 - } 218 - 219 - if (i != buf->npages && i > 0) { 220 - while (i-- > 0) 221 - dma_free_coherent(dev, page_size, 222 - buf_list[i].buf, 223 - buf_list[i].map); 224 - kfree(buf_list); 225 - return -ENOMEM; 226 - } 227 - buf->page_list = buf_list; 214 + buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE)); 215 + ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift); 228 216 } 229 - buf->size = size; 230 217 231 - return 0; 218 + trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags); 219 + if (!trunks) { 220 + kfree(buf); 221 + return ERR_PTR(-ENOMEM); 222 + } 223 + 224 + trunk_size = 1 << buf->trunk_shift; 225 + alloced_size = 0; 226 + for (i = 0; i < ntrunk; i++) { 227 + trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size, 228 + &trunks[i].map, gfp_flags); 229 + if (!trunks[i].buf) 230 + break; 231 + 232 + alloced_size += trunk_size; 233 + } 234 + 235 + buf->ntrunks = i; 236 + 237 + /* In nofail mode, it's only failed when the alloced size is 0 */ 238 + if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) { 239 + for (i = 0; i < buf->ntrunks; i++) 240 + dma_free_coherent(hr_dev->dev, trunk_size, 241 + trunks[i].buf, trunks[i].map); 242 + 243 + kfree(trunks); 244 + kfree(buf); 245 + return ERR_PTR(-ENOMEM); 246 + } 247 + 248 + buf->npages = DIV_ROUND_UP(alloced_size, page_size); 249 + buf->trunk_list = trunks; 250 + 251 + return buf; 232 252 } 233 253 234 254 int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, ··· 260 240 end = start + buf_cnt; 261 241 if (end > buf->npages) { 262 242 dev_err(hr_dev->dev, 263 - "Failed to check kmem bufs, end %d + %d total %d!\n", 243 + "failed to check kmem bufs, end %d + %d total %u!\n", 264 244 start, buf_cnt, buf->npages); 265 245 return -EINVAL; 266 246 } ··· 282 262 u64 addr; 283 263 284 264 if (page_shift < HNS_HW_PAGE_SHIFT) { 285 - dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n", 265 + dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", 286 266 page_shift); 287 267 return -EINVAL; 288 268 }
+18 -19
drivers/infiniband/hw/hns/hns_roce_cmd.c
··· 36 36 #include "hns_roce_device.h" 37 37 #include "hns_roce_cmd.h" 38 38 39 - #define CMD_POLL_TOKEN 0xffff 40 - #define CMD_MAX_NUM 32 41 - #define CMD_TOKEN_MASK 0x1f 39 + #define CMD_POLL_TOKEN 0xffff 40 + #define CMD_MAX_NUM 32 41 + #define CMD_TOKEN_MASK 0x1f 42 42 43 43 static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, 44 44 u64 out_param, u32 in_modifier, ··· 60 60 static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, 61 61 u64 out_param, unsigned long in_modifier, 62 62 u8 op_modifier, u16 op, 63 - unsigned long timeout) 63 + unsigned int timeout) 64 64 { 65 65 struct device *dev = hr_dev->dev; 66 66 int ret; ··· 78 78 79 79 static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, 80 80 u64 out_param, unsigned long in_modifier, 81 - u8 op_modifier, u16 op, unsigned long timeout) 81 + u8 op_modifier, u16 op, unsigned int timeout) 82 82 { 83 83 int ret; 84 84 ··· 93 93 void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, 94 94 u64 out_param) 95 95 { 96 - struct hns_roce_cmd_context 97 - *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask]; 96 + struct hns_roce_cmd_context *context = 97 + &hr_dev->cmd.context[token % hr_dev->cmd.max_cmds]; 98 98 99 99 if (token != context->token) 100 100 return; ··· 108 108 static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, 109 109 u64 out_param, unsigned long in_modifier, 110 110 u8 op_modifier, u16 op, 111 - unsigned long timeout) 111 + unsigned int timeout) 112 112 { 113 113 struct hns_roce_cmdq *cmd = &hr_dev->cmd; 114 114 struct hns_roce_cmd_context *context; ··· 159 159 160 160 static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, 161 161 u64 out_param, unsigned long in_modifier, 162 - u8 op_modifier, u16 op, unsigned long timeout) 162 + u8 op_modifier, u16 op, unsigned int timeout) 163 163 { 164 164 int ret; 165 165 166 166 down(&hr_dev->cmd.event_sem); 167 - ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, 168 - in_modifier, op_modifier, op, timeout); 167 + ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier, 168 + op_modifier, op, timeout); 169 169 up(&hr_dev->cmd.event_sem); 170 170 171 171 return ret; ··· 173 173 174 174 int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, 175 175 unsigned long in_modifier, u8 op_modifier, u16 op, 176 - unsigned long timeout) 176 + unsigned int timeout) 177 177 { 178 178 int ret; 179 179 ··· 231 231 struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; 232 232 int i; 233 233 234 - hr_cmd->context = kmalloc_array(hr_cmd->max_cmds, 235 - sizeof(*hr_cmd->context), 236 - GFP_KERNEL); 234 + hr_cmd->context = 235 + kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL); 237 236 if (!hr_cmd->context) 238 237 return -ENOMEM; 239 238 ··· 261 262 hr_cmd->use_events = 0; 262 263 } 263 264 264 - struct hns_roce_cmd_mailbox 265 - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) 265 + struct hns_roce_cmd_mailbox * 266 + hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) 266 267 { 267 268 struct hns_roce_cmd_mailbox *mailbox; 268 269 ··· 270 271 if (!mailbox) 271 272 return ERR_PTR(-ENOMEM); 272 273 273 - mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, 274 - &mailbox->dma); 274 + mailbox->buf = 275 + dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, &mailbox->dma); 275 276 if (!mailbox->buf) { 276 277 kfree(mailbox); 277 278 return ERR_PTR(-ENOMEM);
+3 -3
drivers/infiniband/hw/hns/hns_roce_cmd.h
··· 141 141 142 142 int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, 143 143 unsigned long in_modifier, u8 op_modifier, u16 op, 144 - unsigned long timeout); 144 + unsigned int timeout); 145 145 146 - struct hns_roce_cmd_mailbox 147 - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); 146 + struct hns_roce_cmd_mailbox * 147 + hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); 148 148 void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, 149 149 struct hns_roce_cmd_mailbox *mailbox); 150 150
+19 -7
drivers/infiniband/hw/hns/hns_roce_common.h
··· 38 38 #define roce_raw_write(value, addr) \ 39 39 __raw_writel((__force u32)cpu_to_le32(value), (addr)) 40 40 41 - #define roce_get_field(origin, mask, shift) \ 42 - (((le32_to_cpu(origin)) & (mask)) >> (shift)) 41 + #define roce_get_field(origin, mask, shift) \ 42 + ((le32_to_cpu(origin) & (mask)) >> (u32)(shift)) 43 43 44 44 #define roce_get_bit(origin, shift) \ 45 45 roce_get_field((origin), (1ul << (shift)), (shift)) 46 46 47 - #define roce_set_field(origin, mask, shift, val) \ 48 - do { \ 49 - (origin) &= ~cpu_to_le32(mask); \ 50 - (origin) |= cpu_to_le32(((u32)(val) << (shift)) & (mask)); \ 47 + #define roce_set_field(origin, mask, shift, val) \ 48 + do { \ 49 + (origin) &= ~cpu_to_le32(mask); \ 50 + (origin) |= cpu_to_le32(((u32)(val) << (u32)(shift)) & (mask)); \ 51 51 } while (0) 52 52 53 - #define roce_set_bit(origin, shift, val) \ 53 + #define roce_set_bit(origin, shift, val) \ 54 54 roce_set_field((origin), (1ul << (shift)), (shift), (val)) 55 + 56 + #define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l 57 + 58 + #define _hr_reg_enable(ptr, field_type, field_h, field_l) \ 59 + ({ \ 60 + const field_type *_ptr = ptr; \ 61 + *((__le32 *)_ptr + (field_h) / 32) |= \ 62 + cpu_to_le32(BIT((field_l) % 32)) + \ 63 + BUILD_BUG_ON_ZERO((field_h) != (field_l)); \ 64 + }) 65 + 66 + #define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) 55 67 56 68 #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 57 69 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
+25 -21
drivers/infiniband/hw/hns/hns_roce_cq.c
··· 36 36 #include "hns_roce_device.h" 37 37 #include "hns_roce_cmd.h" 38 38 #include "hns_roce_hem.h" 39 - #include <rdma/hns-abi.h> 40 39 #include "hns_roce_common.h" 41 40 42 41 static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) 43 42 { 43 + struct ib_device *ibdev = &hr_dev->ib_dev; 44 44 struct hns_roce_cmd_mailbox *mailbox; 45 45 struct hns_roce_cq_table *cq_table; 46 - struct ib_device *ibdev = &hr_dev->ib_dev; 47 46 u64 mtts[MTT_MIN_COUNT] = { 0 }; 48 47 dma_addr_t dma_handle; 49 48 int ret; 50 49 51 50 ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts), 52 51 &dma_handle); 53 - if (ret < 1) { 54 - ibdev_err(ibdev, "Failed to find CQ mtr\n"); 52 + if (!ret) { 53 + ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret); 55 54 return -EINVAL; 56 55 } 57 56 58 57 cq_table = &hr_dev->cq_table; 59 58 ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); 60 59 if (ret) { 61 - ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret); 60 + ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret); 62 61 return ret; 63 62 } 64 63 65 64 /* Get CQC memory HEM(Hardware Entry Memory) table */ 66 65 ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); 67 66 if (ret) { 68 - ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n", 67 + ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n", 69 68 hr_cq->cqn, ret); 70 69 goto err_out; 71 70 } 72 71 73 72 ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); 74 73 if (ret) { 75 - ibdev_err(ibdev, "Failed to xa_store CQ\n"); 74 + ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret); 76 75 goto err_put; 77 76 } 78 77 ··· 90 91 hns_roce_free_cmd_mailbox(hr_dev, mailbox); 91 92 if (ret) { 92 93 ibdev_err(ibdev, 93 - "Failed to send create cmd for CQ(0x%lx), err %d\n", 94 + "failed to send create cmd for CQ(0x%lx), ret = %d.\n", 94 95 hr_cq->cqn, ret); 95 96 goto err_xa; 96 97 } ··· 146 147 { 147 148 struct ib_device *ibdev = &hr_dev->ib_dev; 148 149 struct hns_roce_buf_attr buf_attr = {}; 149 - int err; 150 + int ret; 150 151 151 152 buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; 152 153 buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; ··· 154 155 buf_attr.region_count = 1; 155 156 buf_attr.fixed_page = true; 156 157 157 - err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, 158 + ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, 158 159 hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, 159 160 udata, addr); 160 - if (err) 161 - ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err); 161 + if (ret) 162 + ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret); 162 163 163 - return err; 164 + return ret; 164 165 } 165 166 166 167 static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) ··· 250 251 u32 cq_entries = attr->cqe; 251 252 int ret; 252 253 254 + if (attr->flags) 255 + return -EOPNOTSUPP; 256 + 253 257 if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { 254 - ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", 258 + ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n", 255 259 cq_entries, hr_dev->caps.max_cqes); 256 260 return -EINVAL; 257 261 } 258 262 259 263 if (vector >= hr_dev->caps.num_comp_vectors) { 260 - ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n", 264 + ibdev_err(ibdev, "failed to check CQ vector = %d, max = %d.\n", 261 265 vector, hr_dev->caps.num_comp_vectors); 262 266 return -EINVAL; 263 267 } ··· 276 274 277 275 if (udata) { 278 276 ret = ib_copy_from_udata(&ucmd, udata, 279 - min(sizeof(ucmd), udata->inlen)); 277 + min(udata->inlen, sizeof(ucmd))); 280 278 if (ret) { 281 - ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", 279 + ibdev_err(ibdev, "failed to copy CQ udata, ret = %d.\n", 282 280 ret); 283 281 return ret; 284 282 } ··· 288 286 289 287 ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); 290 288 if (ret) { 291 - ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); 289 + ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret); 292 290 return ret; 293 291 } 294 292 295 293 ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); 296 294 if (ret) { 297 - ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret); 295 + ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret); 298 296 goto err_cq_buf; 299 297 } 300 298 301 299 ret = alloc_cqc(hr_dev, hr_cq); 302 300 if (ret) { 303 - ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret); 301 + ibdev_err(ibdev, 302 + "failed to alloc CQ context, ret = %d.\n", ret); 304 303 goto err_cq_db; 305 304 } 306 305 ··· 316 313 317 314 if (udata) { 318 315 resp.cqn = hr_cq->cqn; 319 - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); 316 + ret = ib_copy_to_udata(udata, &resp, 317 + min(udata->outlen, sizeof(resp))); 320 318 if (ret) 321 319 goto err_cqc; 322 320 }
+4 -4
drivers/infiniband/hw/hns/hns_roce_db.c
··· 95 95 static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir, 96 96 struct hns_roce_db *db, int order) 97 97 { 98 - int o; 99 - int i; 98 + unsigned long o; 99 + unsigned long i; 100 100 101 101 for (o = order; o <= 1; ++o) { 102 102 i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o); ··· 154 154 155 155 void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) 156 156 { 157 - int o; 158 - int i; 157 + unsigned long o; 158 + unsigned long i; 159 159 160 160 mutex_lock(&hr_dev->pgdir_mutex); 161 161
+92 -86
drivers/infiniband/hw/hns/hns_roce_device.h
··· 34 34 #define _HNS_ROCE_DEVICE_H 35 35 36 36 #include <rdma/ib_verbs.h> 37 + #include <rdma/hns-abi.h> 37 38 38 39 #define DRV_NAME "hns_roce" 39 40 ··· 118 117 #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 119 118 #define SRQ_DB_REG 0x230 120 119 120 + #define HNS_ROCE_QP_BANK_NUM 8 121 + 121 122 /* The chip implementation of the consumer index is calculated 122 123 * according to twice the actual EQ depth 123 124 */ ··· 130 127 SERV_TYPE_UC, 131 128 SERV_TYPE_RD, 132 129 SERV_TYPE_UD, 133 - }; 134 - 135 - enum { 136 - HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), 137 - HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), 138 - }; 139 - 140 - enum hns_roce_cq_flags { 141 - HNS_ROCE_CQ_FLAG_RECORD_DB = BIT(0), 142 130 }; 143 131 144 132 enum hns_roce_qp_state { ··· 160 166 /* 0x10 and 0x11 is unused in currently application case */ 161 167 HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12, 162 168 HNS_ROCE_EVENT_TYPE_MB = 0x13, 163 - HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14, 164 169 HNS_ROCE_EVENT_TYPE_FLR = 0x15, 165 170 }; 166 171 ··· 214 221 HNS_ROCE_CAP_FLAG_FRMR = BIT(8), 215 222 HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), 216 223 HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), 224 + HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), 225 + HNS_ROCE_CAP_FLAG_STASH = BIT(17), 217 226 }; 218 227 219 228 #define HNS_ROCE_DB_TYPE_COUNT 2 ··· 259 264 /* The minimum page size is 4K for hardware */ 260 265 #define HNS_HW_PAGE_SHIFT 12 261 266 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) 262 - 263 - /* The minimum page count for hardware access page directly. */ 264 - #define HNS_HW_DIRECT_PAGE_COUNT 2 265 267 266 268 struct hns_roce_uar { 267 269 u64 pfn; ··· 310 318 }; 311 319 312 320 struct hns_roce_buf_region { 313 - int offset; /* page offset */ 321 + u32 offset; /* page offset */ 314 322 u32 count; /* page count */ 315 323 int hopnum; /* addressing hop num */ 316 324 }; ··· 330 338 size_t size; /* region size */ 331 339 int hopnum; /* multi-hop addressing hop num */ 332 340 } region[HNS_ROCE_MAX_BT_REGION]; 333 - int region_count; /* valid region count */ 341 + unsigned int region_count; /* valid region count */ 334 342 unsigned int page_shift; /* buffer page shift */ 335 343 bool fixed_page; /* decide page shift is fixed-size or maximum size */ 336 - int user_access; /* umem access flag */ 344 + unsigned int user_access; /* umem access flag */ 337 345 bool mtt_only; /* only alloc buffer-required MTT memory */ 338 346 }; 339 347 ··· 344 352 unsigned int buf_pg_shift; /* buffer page shift */ 345 353 unsigned int buf_pg_count; /* buffer page count */ 346 354 struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; 347 - int region_count; 355 + unsigned int region_count; 348 356 }; 349 357 350 358 /* memory translate region */ ··· 392 400 u64 *wrid; /* Work request ID */ 393 401 spinlock_t lock; 394 402 u32 wqe_cnt; /* WQE num */ 395 - int max_gs; 403 + u32 max_gs; 396 404 int offset; 397 405 int wqe_shift; /* WQE size */ 398 406 u32 head; ··· 411 419 dma_addr_t map; 412 420 }; 413 421 422 + /* 423 + * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous 424 + * dma address range. 425 + * 426 + * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep. 427 + * 428 + * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even 429 + * the allocated size is smaller than the required size. 430 + */ 431 + enum { 432 + HNS_ROCE_BUF_DIRECT = BIT(0), 433 + HNS_ROCE_BUF_NOSLEEP = BIT(1), 434 + HNS_ROCE_BUF_NOFAIL = BIT(2), 435 + }; 436 + 414 437 struct hns_roce_buf { 415 - struct hns_roce_buf_list direct; 416 - struct hns_roce_buf_list *page_list; 438 + struct hns_roce_buf_list *trunk_list; 439 + u32 ntrunks; 417 440 u32 npages; 418 - u32 size; 441 + unsigned int trunk_shift; 419 442 unsigned int page_shift; 420 443 }; 421 444 ··· 458 451 } u; 459 452 dma_addr_t dma; 460 453 void *virt_addr; 461 - int index; 462 - int order; 454 + unsigned long index; 455 + unsigned long order; 463 456 }; 464 457 465 458 struct hns_roce_cq { ··· 507 500 u64 *wrid; 508 501 struct hns_roce_idx_que idx_que; 509 502 spinlock_t lock; 510 - int head; 511 - int tail; 503 + u16 head; 504 + u16 tail; 512 505 struct mutex mutex; 513 506 void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); 514 507 }; ··· 517 510 struct hns_roce_bitmap bitmap; 518 511 }; 519 512 513 + struct hns_roce_bank { 514 + struct ida ida; 515 + u32 inuse; /* Number of IDs allocated */ 516 + u32 min; /* Lowest ID to allocate. */ 517 + u32 max; /* Highest ID to allocate. */ 518 + u32 next; /* Next ID to allocate. */ 519 + }; 520 + 520 521 struct hns_roce_qp_table { 521 - struct hns_roce_bitmap bitmap; 522 522 struct hns_roce_hem_table qp_table; 523 523 struct hns_roce_hem_table irrl_table; 524 524 struct hns_roce_hem_table trrl_table; 525 525 struct hns_roce_hem_table sccc_table; 526 526 struct mutex scc_mutex; 527 + struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; 528 + spinlock_t bank_lock; 527 529 }; 528 530 529 531 struct hns_roce_cq_table { ··· 563 547 u8 dgid[HNS_ROCE_GID_SIZE]; 564 548 u8 mac[ETH_ALEN]; 565 549 u16 vlan_id; 566 - bool vlan_en; 550 + u8 vlan_en; 567 551 }; 568 552 569 553 struct hns_roce_ah { ··· 635 619 struct hns_roce_work { 636 620 struct hns_roce_dev *hr_dev; 637 621 struct work_struct work; 638 - u32 qpn; 639 - u32 cqn; 640 622 int event_type; 641 623 int sub_type; 624 + u32 queue_num; 642 625 }; 643 626 644 627 struct hns_roce_qp { ··· 705 690 __le32 asyn; 706 691 union { 707 692 struct { 708 - __le32 qp; 693 + __le32 num; 709 694 u32 rsv0; 710 695 u32 rsv1; 711 - } qp_event; 712 - 713 - struct { 714 - __le32 srq; 715 - u32 rsv0; 716 - u32 rsv1; 717 - } srq_event; 718 - 719 - struct { 720 - __le32 cq; 721 - u32 rsv0; 722 - u32 rsv1; 723 - } cq_event; 724 - 725 - struct { 726 - __le32 ceqe; 727 - u32 rsv0; 728 - u32 rsv1; 729 - } ce_event; 696 + } queue_event; 730 697 731 698 struct { 732 699 __le64 out_param; ··· 727 730 int type_flag; /* Aeq:1 ceq:0 */ 728 731 int eqn; 729 732 u32 entries; 730 - int log_entries; 733 + u32 log_entries; 731 734 int eqe_size; 732 735 int irq; 733 736 int log_page_size; 734 - int cons_index; 737 + u32 cons_index; 735 738 struct hns_roce_buf_list *buf_list; 736 739 int over_ignore; 737 740 int coalesce; ··· 739 742 int hop_num; 740 743 struct hns_roce_mtr mtr; 741 744 u16 eq_max_cnt; 742 - int eq_period; 745 + u32 eq_period; 743 746 int shift; 744 747 int event_type; 745 748 int sub_type; ··· 762 765 u32 max_sq_inline; 763 766 u32 max_rq_sg; 764 767 u32 max_extend_sg; 765 - int num_qps; 766 - int reserved_qps; 768 + u32 num_qps; 769 + u32 reserved_qps; 767 770 int num_qpc_timer; 768 771 int num_cqc_timer; 769 772 int num_srqs; ··· 775 778 u32 max_srq_desc_sz; 776 779 int max_qp_init_rdma; 777 780 int max_qp_dest_rdma; 778 - int num_cqs; 781 + u32 num_cqs; 779 782 u32 max_cqes; 780 783 u32 min_cqes; 781 784 u32 min_wqes; ··· 784 787 int num_aeq_vectors; 785 788 int num_comp_vectors; 786 789 int num_other_vectors; 787 - int num_mtpts; 790 + u32 num_mtpts; 788 791 u32 num_mtt_segs; 789 792 u32 num_cqe_segs; 790 793 u32 num_srqwqe_segs; ··· 822 825 u32 cqc_timer_bt_num; 823 826 u32 mpt_bt_num; 824 827 u32 sccc_bt_num; 828 + u32 gmv_bt_num; 825 829 u32 qpc_ba_pg_sz; 826 830 u32 qpc_buf_pg_sz; 827 831 u32 qpc_hop_num; ··· 862 864 u32 eqe_ba_pg_sz; 863 865 u32 eqe_buf_pg_sz; 864 866 u32 eqe_hop_num; 867 + u32 gmv_entry_num; 868 + u32 gmv_entry_sz; 869 + u32 gmv_ba_pg_sz; 870 + u32 gmv_buf_pg_sz; 871 + u32 gmv_hop_num; 865 872 u32 sl_num; 866 873 u32 tsq_buf_pg_sz; 867 874 u32 tpq_buf_pg_sz; ··· 901 898 int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param, 902 899 u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, 903 900 u16 token, int event); 904 - int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); 901 + int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned int timeout); 905 902 int (*rst_prc_mbox)(struct hns_roce_dev *hr_dev); 906 903 int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, 907 904 const union ib_gid *gid, const struct ib_gid_attr *attr); ··· 1002 999 struct hns_roce_eq_table eq_table; 1003 1000 struct hns_roce_hem_table qpc_timer_table; 1004 1001 struct hns_roce_hem_table cqc_timer_table; 1002 + /* GMV is the memory area that the driver allocates for the hardware 1003 + * to store SGID, SMAC and VLAN information. 1004 + */ 1005 + struct hns_roce_hem_table gmv_table; 1005 1006 1006 1007 int cmd_mod; 1007 1008 int loop_idc; ··· 1076 1069 return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); 1077 1070 } 1078 1071 1079 - static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf) 1072 + static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, 1073 + unsigned int offset) 1080 1074 { 1081 - if (buf->page_list) 1082 - return false; 1083 - 1084 - return true; 1075 + return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) + 1076 + (offset & ((1 << buf->trunk_shift) - 1)); 1085 1077 } 1086 1078 1087 - static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) 1079 + static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) 1088 1080 { 1089 - if (hns_roce_buf_is_direct(buf)) 1090 - return (char *)(buf->direct.buf) + (offset & (buf->size - 1)); 1081 + unsigned int offset = idx << buf->page_shift; 1091 1082 1092 - return (char *)(buf->page_list[offset >> buf->page_shift].buf) + 1093 - (offset & ((1 << buf->page_shift) - 1)); 1094 - } 1095 - 1096 - static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) 1097 - { 1098 - if (hns_roce_buf_is_direct(buf)) 1099 - return buf->direct.map + ((dma_addr_t)idx << buf->page_shift); 1100 - else 1101 - return buf->page_list[idx].map; 1083 + return buf->trunk_list[offset >> buf->trunk_shift].map + 1084 + (offset & ((1 << buf->trunk_shift) - 1)); 1102 1085 } 1103 1086 1104 1087 #define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) ··· 1129 1132 return ilog2(to_hr_hem_entries_count(count, buf_shift)); 1130 1133 } 1131 1134 1135 + #define DSCP_SHIFT 2 1136 + 1137 + static inline u8 get_tclass(const struct ib_global_route *grh) 1138 + { 1139 + return grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP ? 1140 + grh->traffic_class >> DSCP_SHIFT : grh->traffic_class; 1141 + } 1142 + 1132 1143 int hns_roce_init_uar_table(struct hns_roce_dev *dev); 1133 1144 int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); 1134 1145 void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); ··· 1160 1155 void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, 1161 1156 struct hns_roce_mtr *mtr); 1162 1157 int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 1163 - dma_addr_t *pages, int page_cnt); 1158 + dma_addr_t *pages, unsigned int page_cnt); 1164 1159 1165 1160 int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); 1166 1161 int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); ··· 1203 1198 struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1204 1199 u64 virt_addr, int access_flags, 1205 1200 struct ib_udata *udata); 1206 - int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, 1207 - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, 1208 - struct ib_udata *udata); 1201 + struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, 1202 + u64 length, u64 virt_addr, 1203 + int mr_access_flags, struct ib_pd *pd, 1204 + struct ib_udata *udata); 1209 1205 struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1210 1206 u32 max_num_sg); 1211 1207 int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ··· 1221 1215 int hns_roce_dealloc_mw(struct ib_mw *ibmw); 1222 1216 1223 1217 void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); 1224 - int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, 1225 - struct hns_roce_buf *buf, u32 page_shift); 1218 + struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, 1219 + u32 page_shift, u32 flags); 1226 1220 1227 1221 int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, 1228 1222 int buf_cnt, int start, struct hns_roce_buf *buf); ··· 1244 1238 int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1245 1239 int attr_mask, struct ib_udata *udata); 1246 1240 void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); 1247 - void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n); 1248 - void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n); 1249 - void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n); 1250 - bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, 1241 + void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n); 1242 + void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n); 1243 + void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n); 1244 + bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, 1251 1245 struct ib_cq *ib_cq); 1252 1246 enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); 1253 1247 void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, ··· 1277 1271 void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); 1278 1272 void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); 1279 1273 void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); 1280 - int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); 1274 + u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); 1281 1275 void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); 1282 1276 int hns_roce_init(struct hns_roce_dev *hr_dev); 1283 1277 void hns_roce_exit(struct hns_roce_dev *hr_dev);
+37 -22
drivers/infiniband/hw/hns/hns_roce_hem.c
··· 75 75 case HEM_TYPE_CQC_TIMER: 76 76 hop_num = hr_dev->caps.cqc_timer_hop_num; 77 77 break; 78 + case HEM_TYPE_GMV: 79 + hop_num = hr_dev->caps.gmv_hop_num; 80 + break; 78 81 default: 79 82 return false; 80 83 } ··· 186 183 mhop->ba_l0_num = hr_dev->caps.srqc_bt_num; 187 184 mhop->hop_num = hr_dev->caps.srqc_hop_num; 188 185 break; 186 + case HEM_TYPE_GMV: 187 + mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz + 188 + PAGE_SHIFT); 189 + mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz + 190 + PAGE_SHIFT); 191 + mhop->ba_l0_num = hr_dev->caps.gmv_bt_num; 192 + mhop->hop_num = hr_dev->caps.gmv_hop_num; 193 + break; 189 194 default: 190 - dev_err(dev, "Table %d not support multi-hop addressing!\n", 195 + dev_err(dev, "table %u not support multi-hop addressing!\n", 191 196 type); 192 197 return -EINVAL; 193 198 } ··· 209 198 { 210 199 struct device *dev = hr_dev->dev; 211 200 u32 chunk_ba_num; 201 + u32 chunk_size; 212 202 u32 table_idx; 213 203 u32 bt_num; 214 - u32 chunk_size; 215 204 216 205 if (get_hem_table_config(hr_dev, mhop, table->type)) 217 206 return -EINVAL; ··· 243 232 mhop->l0_idx = table_idx; 244 233 break; 245 234 default: 246 - dev_err(dev, "Table %d not support hop_num = %d!\n", 247 - table->type, mhop->hop_num); 235 + dev_err(dev, "table %u not support hop_num = %u!\n", 236 + table->type, mhop->hop_num); 248 237 return -EINVAL; 249 238 } 250 239 if (mhop->l0_idx >= mhop->ba_l0_num) ··· 343 332 { 344 333 spinlock_t *lock = &hr_dev->bt_cmd_lock; 345 334 struct device *dev = hr_dev->dev; 346 - long end; 347 - unsigned long flags; 348 335 struct hns_roce_hem_iter iter; 349 336 void __iomem *bt_cmd; 350 337 __le32 bt_cmd_val[2]; 351 338 __le32 bt_cmd_h = 0; 339 + unsigned long flags; 352 340 __le32 bt_cmd_l; 353 - u64 bt_ba; 354 341 int ret = 0; 342 + u64 bt_ba; 343 + long end; 355 344 356 345 /* Find the HEM(Hardware Entry Memory) entry */ 357 346 unsigned long i = (obj & (table->num_obj - 1)) / ··· 449 438 index->buf = l0_idx; 450 439 break; 451 440 default: 452 - ibdev_err(ibdev, "Table %d not support mhop.hop_num = %d!\n", 441 + ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n", 453 442 table->type, mhop->hop_num); 454 443 return -EINVAL; 455 444 } 456 445 457 446 if (unlikely(index->buf >= table->num_hem)) { 458 - ibdev_err(ibdev, "Table %d exceed hem limt idx %llu,max %lu!\n", 447 + ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n", 459 448 table->type, index->buf, table->num_hem); 460 449 return -EINVAL; 461 450 } ··· 651 640 struct hns_roce_hem_table *table, unsigned long obj) 652 641 { 653 642 struct device *dev = hr_dev->dev; 654 - int ret = 0; 655 643 unsigned long i; 644 + int ret = 0; 656 645 657 646 if (hns_roce_check_whether_mhop(hr_dev, table->type)) 658 647 return hns_roce_table_mhop_get(hr_dev, table, obj); ··· 725 714 step_idx = hop_num; 726 715 727 716 if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx)) 728 - ibdev_warn(ibdev, "Clear hop%d HEM failed.\n", hop_num); 717 + ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num); 729 718 730 719 if (index->inited & HEM_INDEX_L1) 731 720 if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) 732 - ibdev_warn(ibdev, "Clear HEM step 1 failed.\n"); 721 + ibdev_warn(ibdev, "failed to clear HEM step 1.\n"); 733 722 734 723 if (index->inited & HEM_INDEX_L0) 735 724 if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) 736 - ibdev_warn(ibdev, "Clear HEM step 0 failed.\n"); 725 + ibdev_warn(ibdev, "failed to clear HEM step 0.\n"); 737 726 } 738 727 } 739 728 ··· 800 789 struct hns_roce_hem_chunk *chunk; 801 790 struct hns_roce_hem_mhop mhop; 802 791 struct hns_roce_hem *hem; 803 - void *addr = NULL; 804 792 unsigned long mhop_obj = obj; 805 793 unsigned long obj_per_chunk; 806 794 unsigned long idx_offset; 807 795 int offset, dma_offset; 796 + void *addr = NULL; 797 + u32 hem_idx = 0; 808 798 int length; 809 799 int i, j; 810 - u32 hem_idx = 0; 811 800 812 801 if (!table->lowmem) 813 802 return NULL; ··· 887 876 unsigned long buf_chunk_size; 888 877 unsigned long bt_chunk_size; 889 878 unsigned long bt_chunk_num; 890 - unsigned long num_bt_l0 = 0; 879 + unsigned long num_bt_l0; 891 880 u32 hop_num; 892 881 893 882 if (get_hem_table_config(hr_dev, &mhop, type)) ··· 977 966 { 978 967 struct hns_roce_hem_mhop mhop; 979 968 u32 buf_chunk_size; 980 - int i; 981 969 u64 obj; 970 + int i; 982 971 983 972 if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop)) 984 973 return; ··· 1028 1017 1029 1018 void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) 1030 1019 { 1031 - if (hr_dev->caps.srqc_entry_sz) 1020 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) 1032 1021 hns_roce_cleanup_hem_table(hr_dev, 1033 1022 &hr_dev->srq_table.table); 1034 1023 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); ··· 1038 1027 if (hr_dev->caps.cqc_timer_entry_sz) 1039 1028 hns_roce_cleanup_hem_table(hr_dev, 1040 1029 &hr_dev->cqc_timer_table); 1041 - if (hr_dev->caps.sccc_sz) 1030 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) 1042 1031 hns_roce_cleanup_hem_table(hr_dev, 1043 1032 &hr_dev->qp_table.sccc_table); 1044 1033 if (hr_dev->caps.trrl_entry_sz) 1045 1034 hns_roce_cleanup_hem_table(hr_dev, 1046 1035 &hr_dev->qp_table.trrl_table); 1036 + 1037 + if (hr_dev->caps.gmv_entry_sz) 1038 + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table); 1039 + 1047 1040 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); 1048 1041 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); 1049 1042 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); ··· 1249 1234 } 1250 1235 1251 1236 if (offset < r->offset) { 1252 - dev_err(hr_dev->dev, "invalid offset %d,min %d!\n", 1237 + dev_err(hr_dev->dev, "invalid offset %d, min %u!\n", 1253 1238 offset, r->offset); 1254 1239 return -EINVAL; 1255 1240 } ··· 1313 1298 const struct hns_roce_buf_region *regions, 1314 1299 int region_cnt) 1315 1300 { 1316 - struct roce_hem_item *hem, *temp_hem, *root_hem; 1317 1301 struct list_head temp_list[HNS_ROCE_MAX_BT_REGION]; 1302 + struct roce_hem_item *hem, *temp_hem, *root_hem; 1318 1303 const struct hns_roce_buf_region *r; 1319 1304 struct list_head temp_root; 1320 1305 struct list_head temp_btm; ··· 1419 1404 { 1420 1405 const struct hns_roce_buf_region *r; 1421 1406 int ofs, end; 1422 - int ret; 1423 1407 int unit; 1408 + int ret; 1424 1409 int i; 1425 1410 1426 1411 if (region_cnt > HNS_ROCE_MAX_BT_REGION) {
+2 -1
drivers/infiniband/hw/hns/hns_roce_hem.h
··· 47 47 HEM_TYPE_SCCC, 48 48 HEM_TYPE_QPC_TIMER, 49 49 HEM_TYPE_CQC_TIMER, 50 + HEM_TYPE_GMV, 50 51 51 52 /* UNMAP HEM */ 52 53 HEM_TYPE_MTT, ··· 175 174 return sg_dma_address(&iter->chunk->mem[iter->page_idx]); 176 175 } 177 176 178 - #endif /*_HNS_ROCE_HEM_H*/ 177 + #endif /* _HNS_ROCE_HEM_H */
+18 -32
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
··· 239 239 break; 240 240 } 241 241 242 - /*Ctrl field, ctrl set type: sig, solic, imm, fence */ 242 + /* Ctrl field, ctrl set type: sig, solic, imm, fence */ 243 243 /* SO wait for conforming application scenarios */ 244 244 ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ? 245 245 cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | ··· 288 288 ret = -EINVAL; 289 289 *bad_wr = wr; 290 290 dev_err(dev, "inline len(1-%d)=%d, illegal", 291 - ctrl->msg_length, 291 + le32_to_cpu(ctrl->msg_length), 292 292 hr_dev->caps.max_sq_inline); 293 293 goto out; 294 294 } ··· 300 300 } 301 301 ctrl->flag |= cpu_to_le32(HNS_ROCE_WQE_INLINE); 302 302 } else { 303 - /*sqe num is two */ 303 + /* sqe num is two */ 304 304 for (i = 0; i < wr->num_sge; i++) 305 305 set_data_seg(dseg + i, wr->sg_list + i); 306 306 ··· 353 353 unsigned long flags = 0; 354 354 unsigned int wqe_idx; 355 355 int ret = 0; 356 - int nreq = 0; 357 - int i = 0; 356 + int nreq; 357 + int i; 358 358 u32 reg_val; 359 359 360 360 spin_lock_irqsave(&hr_qp->rq.lock, flags); ··· 1165 1165 } 1166 1166 raq->e_raq_buf->map = addr; 1167 1167 1168 - /* Configure raq extended address. 48bit 4K align*/ 1168 + /* Configure raq extended address. 48bit 4K align */ 1169 1169 roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12); 1170 1170 1171 1171 /* Configure raq_shift */ ··· 1639 1639 } 1640 1640 1641 1641 static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, 1642 - unsigned long timeout) 1642 + unsigned int timeout) 1643 1643 { 1644 1644 u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG; 1645 1645 unsigned long end; ··· 2062 2062 CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); 2063 2063 } 2064 2064 2065 - static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) 2066 - { 2067 - return -EOPNOTSUPP; 2068 - } 2069 - 2070 2065 static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, 2071 2066 enum ib_cq_notify_flags flags) 2072 2067 { ··· 2300 2305 struct hns_roce_qp *cur_qp = NULL; 2301 2306 unsigned long flags; 2302 2307 int npolled; 2303 - int ret = 0; 2308 + int ret; 2304 2309 2305 2310 spin_lock_irqsave(&hr_cq->lock, flags); 2306 2311 ··· 2760 2765 roce_set_field(context->qpc_bytes_16, 2761 2766 QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, 2762 2767 QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); 2763 - 2764 2768 } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { 2765 2769 roce_set_field(context->qpc_bytes_4, 2766 2770 QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, ··· 3255 3261 enum ib_qp_state cur_state, 3256 3262 enum ib_qp_state new_state) 3257 3263 { 3264 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 3265 + return -EOPNOTSUPP; 3258 3266 3259 3267 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) 3260 3268 return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state, ··· 3600 3604 return 0; 3601 3605 } 3602 3606 3603 - static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) 3607 + static void set_eq_cons_index_v1(struct hns_roce_eq *eq, u32 req_not) 3604 3608 { 3605 3609 roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) | 3606 - (req_not << eq->log_entries), eq->doorbell); 3610 + (req_not << eq->log_entries), eq->doorbell); 3607 3611 } 3608 3612 3609 3613 static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev, ··· 3683 3687 int phy_port; 3684 3688 int qpn; 3685 3689 3686 - qpn = roce_get_field(aeqe->event.qp_event.qp, 3690 + qpn = roce_get_field(aeqe->event.queue_event.num, 3687 3691 HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, 3688 3692 HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S); 3689 - phy_port = roce_get_field(aeqe->event.qp_event.qp, 3693 + phy_port = roce_get_field(aeqe->event.queue_event.num, 3690 3694 HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M, 3691 3695 HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S); 3692 3696 if (qpn <= 1) ··· 3717 3721 struct device *dev = &hr_dev->pdev->dev; 3718 3722 u32 cqn; 3719 3723 3720 - cqn = roce_get_field(aeqe->event.cq_event.cq, 3721 - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, 3722 - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); 3724 + cqn = roce_get_field(aeqe->event.queue_event.num, 3725 + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, 3726 + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); 3723 3727 3724 3728 switch (event_type) { 3725 3729 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: ··· 3794 3798 int event_type; 3795 3799 3796 3800 while ((aeqe = next_aeqe_sw_v1(eq))) { 3797 - 3798 3801 /* Make sure we read the AEQ entry after we have checked the 3799 3802 * ownership bit 3800 3803 */ ··· 3848 3853 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 3849 3854 hns_roce_v1_db_overflow_handle(hr_dev, aeqe); 3850 3855 break; 3851 - case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: 3852 - dev_warn(dev, "CEQ 0x%lx overflow.\n", 3853 - roce_get_field(aeqe->event.ce_event.ceqe, 3854 - HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M, 3855 - HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S)); 3856 - break; 3857 3856 default: 3858 3857 dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n", 3859 3858 event_type, eq->eqn, eq->cons_index); ··· 3892 3903 u32 cqn; 3893 3904 3894 3905 while ((ceqe = next_ceqe_sw_v1(eq))) { 3895 - 3896 3906 /* Make sure we read CEQ entry after we have checked the 3897 3907 * ownership bit 3898 3908 */ ··· 4117 4129 void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn]; 4118 4130 struct device *dev = &hr_dev->pdev->dev; 4119 4131 dma_addr_t tmp_dma_addr; 4120 - u32 eqcuridx_val = 0; 4132 + u32 eqcuridx_val; 4121 4133 u32 eqconsindx_val; 4122 4134 u32 eqshift_val; 4123 4135 __le32 tmp2 = 0; ··· 4335 4347 4336 4348 static const struct ib_device_ops hns_roce_v1_dev_ops = { 4337 4349 .destroy_qp = hns_roce_v1_destroy_qp, 4338 - .modify_cq = hns_roce_v1_modify_cq, 4339 4350 .poll_cq = hns_roce_v1_poll_cq, 4340 4351 .post_recv = hns_roce_v1_post_recv, 4341 4352 .post_send = hns_roce_v1_post_send, ··· 4354 4367 .set_mtu = hns_roce_v1_set_mtu, 4355 4368 .write_mtpt = hns_roce_v1_write_mtpt, 4356 4369 .write_cqc = hns_roce_v1_write_cqc, 4357 - .modify_cq = hns_roce_v1_modify_cq, 4358 4370 .clear_hem = hns_roce_v1_clear_hem, 4359 4371 .modify_qp = hns_roce_v1_modify_qp, 4360 4372 .query_qp = hns_roce_v1_query_qp,
+1 -1
drivers/infiniband/hw/hns/hns_roce_hw_v1.h
··· 419 419 420 420 struct hns_roce_wqe_raddr_seg { 421 421 __le32 rkey; 422 - __le32 len;/* reserved */ 422 + __le32 len; /* reserved */ 423 423 __le64 raddr; 424 424 }; 425 425
+315 -243
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 214 214 return 0; 215 215 } 216 216 217 - static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, 218 - unsigned int *sge_ind, unsigned int valid_num_sge) 217 + static void set_extend_sge(struct hns_roce_qp *qp, struct ib_sge *sge, 218 + unsigned int *sge_ind, unsigned int cnt) 219 219 { 220 220 struct hns_roce_v2_wqe_data_seg *dseg; 221 - unsigned int cnt = valid_num_sge; 222 - struct ib_sge *sge = wr->sg_list; 223 221 unsigned int idx = *sge_ind; 224 - 225 - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { 226 - cnt -= HNS_ROCE_SGE_IN_WQE; 227 - sge += HNS_ROCE_SGE_IN_WQE; 228 - } 229 222 230 223 while (cnt > 0) { 231 224 dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); 232 - set_data_seg_v2(dseg, sge); 233 - idx++; 225 + if (likely(sge->length)) { 226 + set_data_seg_v2(dseg, sge); 227 + idx++; 228 + cnt--; 229 + } 234 230 sge++; 235 - cnt--; 236 231 } 237 232 238 233 *sge_ind = idx; ··· 335 340 } 336 341 } 337 342 338 - set_extend_sge(qp, wr, sge_ind, valid_num_sge); 343 + set_extend_sge(qp, wr->sg_list + i, sge_ind, 344 + valid_num_sge - HNS_ROCE_SGE_IN_WQE); 339 345 } 340 346 341 347 roce_set_field(rc_sq_wqe->byte_16, ··· 361 365 } else if (unlikely(hr_qp->state == IB_QPS_RESET || 362 366 hr_qp->state == IB_QPS_INIT || 363 367 hr_qp->state == IB_QPS_RTR)) { 364 - ibdev_err(ibdev, "failed to post WQE, QP state %d!\n", 368 + ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n", 365 369 hr_qp->state); 366 370 return -EINVAL; 367 371 } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { ··· 418 422 return 0; 419 423 } 420 424 425 + static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, 426 + struct hns_roce_ah *ah) 427 + { 428 + struct ib_device *ib_dev = ah->ibah.device; 429 + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); 430 + 431 + roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, 432 + V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); 433 + 434 + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, 435 + V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); 436 + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, 437 + V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); 438 + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, 439 + V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); 440 + 441 + if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) 442 + return -EINVAL; 443 + 444 + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, 445 + V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); 446 + 447 + ud_sq_wqe->sgid_index = ah->av.gid_index; 448 + 449 + memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN); 450 + memcpy(ud_sq_wqe->dgid, ah->av.dgid, GID_LEN_V2); 451 + 452 + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) 453 + return 0; 454 + 455 + roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, 456 + ah->av.vlan_en); 457 + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, 458 + V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); 459 + 460 + return 0; 461 + } 462 + 421 463 static inline int set_ud_wqe(struct hns_roce_qp *qp, 422 464 const struct ib_send_wr *wr, 423 465 void *wqe, unsigned int *sge_idx, 424 466 unsigned int owner_bit) 425 467 { 426 - struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); 427 468 struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); 428 469 struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe; 429 470 unsigned int curr_idx = *sge_idx; 430 - int valid_num_sge; 471 + unsigned int valid_num_sge; 431 472 u32 msg_len = 0; 432 - bool loopback; 433 - u8 *smac; 434 473 int ret; 435 474 436 475 valid_num_sge = calc_wr_sge_num(wr, &msg_len); ··· 475 444 if (WARN_ON(ret)) 476 445 return ret; 477 446 478 - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M, 479 - V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]); 480 - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M, 481 - V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]); 482 - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M, 483 - V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]); 484 - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M, 485 - V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]); 486 - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_4_M, 487 - V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, ah->av.mac[4]); 488 - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M, 489 - V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]); 490 - 491 - /* MAC loopback */ 492 - smac = (u8 *)hr_dev->dev_addr[qp->port]; 493 - loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0; 494 - 495 - roce_set_bit(ud_sq_wqe->byte_40, 496 - V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); 497 - 498 447 ud_sq_wqe->msg_len = cpu_to_le32(msg_len); 499 448 500 - /* Set sig attr */ 501 449 roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, 502 - (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); 450 + !!(wr->send_flags & IB_SEND_SIGNALED)); 503 451 504 - /* Set se attr */ 505 452 roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, 506 - (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); 507 - 508 - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, 509 - owner_bit); 453 + !!(wr->send_flags & IB_SEND_SOLICITED)); 510 454 511 455 roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, 512 456 V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); ··· 494 488 V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, 495 489 curr_idx & (qp->sge.sge_cnt - 1)); 496 490 497 - roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, 498 - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); 499 491 ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? 500 492 qp->qkey : ud_wr(wr)->remote_qkey); 501 493 roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, 502 494 V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); 503 495 504 - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, 505 - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); 506 - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, 507 - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); 508 - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, 509 - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); 510 - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, 511 - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); 512 - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, 513 - V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); 514 - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M, 515 - V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port); 496 + ret = fill_ud_av(ud_sq_wqe, ah); 497 + if (ret) 498 + return ret; 516 499 517 - roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, 518 - ah->av.vlan_en ? 1 : 0); 519 - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, 520 - V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index); 500 + set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); 521 501 522 - memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); 523 - 524 - set_extend_sge(qp, wr, &curr_idx, valid_num_sge); 502 + /* 503 + * The pipeline can sequentially post all valid WQEs into WQ buffer, 504 + * including new WQEs waiting for the doorbell to update the PI again. 505 + * Therefore, the owner bit of WQE MUST be updated after all fields 506 + * and extSGEs have been written into DDR instead of cache. 507 + */ 508 + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) 509 + dma_wmb(); 525 510 526 511 *sge_idx = curr_idx; 512 + roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, 513 + owner_bit); 527 514 528 515 return 0; 529 516 } ··· 590 591 roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, 591 592 (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); 592 593 593 - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, 594 - owner_bit); 595 - 596 594 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || 597 595 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 598 596 set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); ··· 597 601 ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, 598 602 &curr_idx, valid_num_sge); 599 603 604 + /* 605 + * The pipeline can sequentially post all valid WQEs into WQ buffer, 606 + * including new WQEs waiting for the doorbell to update the PI again. 607 + * Therefore, the owner bit of WQE MUST be updated after all fields 608 + * and extSGEs have been written into DDR instead of cache. 609 + */ 610 + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) 611 + dma_wmb(); 612 + 600 613 *sge_idx = curr_idx; 614 + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, 615 + owner_bit); 601 616 602 617 return ret; 603 618 } ··· 656 649 unsigned int sge_idx; 657 650 unsigned int wqe_idx; 658 651 void *wqe = NULL; 659 - int nreq; 652 + u32 nreq; 660 653 int ret; 661 654 662 655 spin_lock_irqsave(&qp->sq.lock, flags); ··· 680 673 wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); 681 674 682 675 if (unlikely(wr->num_sge > qp->sq.max_gs)) { 683 - ibdev_err(ibdev, "num_sge=%d > qp->sq.max_gs=%d\n", 676 + ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n", 684 677 wr->num_sge, qp->sq.max_gs); 685 678 ret = -EINVAL; 686 679 *bad_wr = wr; ··· 693 686 ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); 694 687 695 688 /* Corresponding to the QP type, wqe process separately */ 696 - if (ibqp->qp_type == IB_QPT_GSI) 689 + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) 697 690 ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit); 698 691 else if (ibqp->qp_type == IB_QPT_RC) 699 692 ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); ··· 765 758 wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); 766 759 767 760 if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { 768 - ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n", 761 + ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", 769 762 wr->num_sge, hr_qp->rq.max_gs); 770 763 ret = -EINVAL; 771 764 *bad_wr = wr; ··· 834 827 return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); 835 828 } 836 829 837 - static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) 830 + static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) 838 831 { 839 832 return hns_roce_buf_offset(idx_que->mtr.kmem, 840 833 n << idx_que->entry_shift); ··· 875 868 struct hns_roce_v2_wqe_data_seg *dseg; 876 869 struct hns_roce_v2_db srq_db; 877 870 unsigned long flags; 871 + unsigned int ind; 878 872 __le32 *srq_idx; 879 873 int ret = 0; 880 874 int wqe_idx; 881 875 void *wqe; 882 876 int nreq; 883 - int ind; 884 877 int i; 885 878 886 879 spin_lock_irqsave(&srq->lock, flags); ··· 1025 1018 struct hns_roce_v2_priv *priv = hr_dev->priv; 1026 1019 struct hnae3_handle *handle = priv->handle; 1027 1020 const struct hnae3_ae_ops *ops = handle->ae_algo->ops; 1028 - unsigned long instance_stage; /* the current instance stage */ 1029 - unsigned long reset_stage; /* the current reset stage */ 1021 + unsigned long instance_stage; /* the current instance stage */ 1022 + unsigned long reset_stage; /* the current reset stage */ 1030 1023 unsigned long reset_cnt; 1031 1024 bool sw_resetting; 1032 1025 bool hw_resetting; ··· 1125 1118 roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, 1126 1119 upper_32_bits(dma)); 1127 1120 roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG, 1128 - ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); 1121 + (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); 1129 1122 roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0); 1130 1123 roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0); 1131 1124 } else { ··· 1133 1126 roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG, 1134 1127 upper_32_bits(dma)); 1135 1128 roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG, 1136 - ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); 1129 + (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); 1137 1130 roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0); 1138 1131 roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0); 1139 1132 } ··· 1580 1573 PF_RES_DATA_4_PF_SCCC_BT_NUM_M, 1581 1574 PF_RES_DATA_4_PF_SCCC_BT_NUM_S); 1582 1575 1576 + hr_dev->caps.gmv_bt_num = roce_get_field(req_b->gmv_idx_num, 1577 + PF_RES_DATA_5_PF_GMV_BT_NUM_M, 1578 + PF_RES_DATA_5_PF_GMV_BT_NUM_S); 1579 + 1583 1580 return 0; 1584 1581 } 1585 1582 ··· 1907 1896 caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; 1908 1897 caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; 1909 1898 caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; 1899 + caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; 1900 + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; 1901 + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / 1902 + caps->gmv_entry_sz); 1903 + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; 1904 + caps->gmv_ba_pg_sz = 0; 1905 + caps->gmv_buf_pg_sz = 0; 1906 + caps->gid_table_len[0] = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / 1907 + caps->gmv_entry_sz); 1910 1908 } 1911 1909 } 1912 1910 1913 - static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, 1914 - int *buf_page_size, int *bt_page_size, u32 hem_type) 1911 + static void calc_pg_sz(u32 obj_num, u32 obj_size, u32 hop_num, u32 ctx_bt_num, 1912 + u32 *buf_page_size, u32 *bt_page_size, u32 hem_type) 1915 1913 { 1916 1914 u64 obj_per_chunk; 1917 1915 u64 bt_chunk_size = PAGE_SIZE; ··· 1950 1930 obj_per_chunk = ctx_bt_num * obj_per_chunk_default; 1951 1931 break; 1952 1932 default: 1953 - pr_err("Table %d not support hop_num = %d!\n", hem_type, 1954 - hop_num); 1933 + pr_err("table %u not support hop_num = %u!\n", hem_type, 1934 + hop_num); 1955 1935 return; 1956 1936 } 1957 1937 ··· 2142 2122 caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; 2143 2123 caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; 2144 2124 caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; 2125 + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; 2126 + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / 2127 + caps->gmv_entry_sz); 2128 + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; 2129 + caps->gmv_ba_pg_sz = 0; 2130 + caps->gmv_buf_pg_sz = 0; 2131 + caps->gid_table_len[0] = caps->gmv_bt_num * 2132 + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); 2145 2133 } 2146 2134 2147 2135 calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, ··· 2399 2371 u32 buf_chk_sz; 2400 2372 dma_addr_t t; 2401 2373 int func_num = 1; 2402 - int pg_num_a; 2403 - int pg_num_b; 2404 - int pg_num; 2405 - int size; 2374 + u32 pg_num_a; 2375 + u32 pg_num_b; 2376 + u32 pg_num; 2377 + u32 size; 2406 2378 int i; 2407 2379 2408 2380 switch (type) { ··· 2451 2423 if (i < (pg_num - 1)) 2452 2424 entry[i].blk_ba1_nxt_ptr |= 2453 2425 (i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S; 2454 - 2455 2426 } 2456 2427 link_tbl->npages = pg_num; 2457 2428 link_tbl->pg_sz = buf_chk_sz; ··· 2492 2465 link_tbl->table.map); 2493 2466 } 2494 2467 2495 - static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) 2468 + static int get_hem_table(struct hns_roce_dev *hr_dev) 2496 2469 { 2497 - struct hns_roce_v2_priv *priv = hr_dev->priv; 2498 - int qpc_count, cqc_count; 2499 - int ret, i; 2500 - 2501 - /* TSQ includes SQ doorbell and ack doorbell */ 2502 - ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); 2503 - if (ret) { 2504 - dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); 2505 - return ret; 2506 - } 2507 - 2508 - ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); 2509 - if (ret) { 2510 - dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret); 2511 - goto err_tpq_init_failed; 2512 - } 2470 + unsigned int qpc_count; 2471 + unsigned int cqc_count; 2472 + unsigned int gmv_count; 2473 + int ret; 2474 + int i; 2513 2475 2514 2476 /* Alloc memory for QPC Timer buffer space chunk */ 2515 2477 for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num; ··· 2522 2506 } 2523 2507 } 2524 2508 2509 + /* Alloc memory for GMV(GID/MAC/VLAN) table buffer space chunk */ 2510 + for (gmv_count = 0; gmv_count < hr_dev->caps.gmv_entry_num; 2511 + gmv_count++) { 2512 + ret = hns_roce_table_get(hr_dev, &hr_dev->gmv_table, gmv_count); 2513 + if (ret) { 2514 + dev_err(hr_dev->dev, 2515 + "failed to get gmv table, ret = %d.\n", ret); 2516 + goto err_gmv_failed; 2517 + } 2518 + } 2519 + 2525 2520 return 0; 2521 + 2522 + err_gmv_failed: 2523 + for (i = 0; i < gmv_count; i++) 2524 + hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i); 2526 2525 2527 2526 err_cqc_timer_failed: 2528 2527 for (i = 0; i < cqc_count; i++) ··· 2547 2516 for (i = 0; i < qpc_count; i++) 2548 2517 hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i); 2549 2518 2519 + return ret; 2520 + } 2521 + 2522 + static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) 2523 + { 2524 + struct hns_roce_v2_priv *priv = hr_dev->priv; 2525 + int ret; 2526 + 2527 + /* TSQ includes SQ doorbell and ack doorbell */ 2528 + ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); 2529 + if (ret) { 2530 + dev_err(hr_dev->dev, "failed to init TSQ, ret = %d.\n", ret); 2531 + return ret; 2532 + } 2533 + 2534 + ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); 2535 + if (ret) { 2536 + dev_err(hr_dev->dev, "failed to init TPQ, ret = %d.\n", ret); 2537 + goto err_tpq_init_failed; 2538 + } 2539 + 2540 + ret = get_hem_table(hr_dev); 2541 + if (ret) 2542 + goto err_get_hem_table_failed; 2543 + 2544 + return 0; 2545 + 2546 + err_get_hem_table_failed: 2550 2547 hns_roce_free_link_table(hr_dev, &priv->tpq); 2551 2548 2552 2549 err_tpq_init_failed: ··· 2598 2539 struct hns_roce_cmq_desc desc; 2599 2540 struct hns_roce_mbox_status *mb_st = 2600 2541 (struct hns_roce_mbox_status *)desc.data; 2601 - enum hns_roce_cmd_return_status status; 2542 + int status; 2602 2543 2603 2544 hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); 2604 2545 ··· 2669 2610 } 2670 2611 2671 2612 static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, 2672 - unsigned long timeout) 2613 + unsigned int timeout) 2673 2614 { 2674 2615 struct device *dev = hr_dev->dev; 2675 2616 unsigned long end; ··· 2696 2637 return 0; 2697 2638 } 2698 2639 2699 - static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, 2700 - int gid_index, const union ib_gid *gid, 2701 - enum hns_roce_sgid_type sgid_type) 2640 + static void copy_gid(void *dest, const union ib_gid *gid) 2641 + { 2642 + #define GID_SIZE 4 2643 + const union ib_gid *src = gid; 2644 + __le32 (*p)[GID_SIZE] = dest; 2645 + int i; 2646 + 2647 + if (!gid) 2648 + src = &zgid; 2649 + 2650 + for (i = 0; i < GID_SIZE; i++) 2651 + (*p)[i] = cpu_to_le32(*(u32 *)&src->raw[i * sizeof(u32)]); 2652 + } 2653 + 2654 + static int config_sgid_table(struct hns_roce_dev *hr_dev, 2655 + int gid_index, const union ib_gid *gid, 2656 + enum hns_roce_sgid_type sgid_type) 2702 2657 { 2703 2658 struct hns_roce_cmq_desc desc; 2704 2659 struct hns_roce_cfg_sgid_tb *sgid_tb = 2705 2660 (struct hns_roce_cfg_sgid_tb *)desc.data; 2706 - u32 *p; 2707 2661 2708 2662 hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); 2709 2663 ··· 2725 2653 roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, 2726 2654 CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); 2727 2655 2728 - p = (u32 *)&gid->raw[0]; 2729 - sgid_tb->vf_sgid_l = cpu_to_le32(*p); 2730 - 2731 - p = (u32 *)&gid->raw[4]; 2732 - sgid_tb->vf_sgid_ml = cpu_to_le32(*p); 2733 - 2734 - p = (u32 *)&gid->raw[8]; 2735 - sgid_tb->vf_sgid_mh = cpu_to_le32(*p); 2736 - 2737 - p = (u32 *)&gid->raw[0xc]; 2738 - sgid_tb->vf_sgid_h = cpu_to_le32(*p); 2656 + copy_gid(&sgid_tb->vf_sgid_l, gid); 2739 2657 2740 2658 return hns_roce_cmq_send(hr_dev, &desc, 1); 2659 + } 2660 + 2661 + static int config_gmv_table(struct hns_roce_dev *hr_dev, 2662 + int gid_index, const union ib_gid *gid, 2663 + enum hns_roce_sgid_type sgid_type, 2664 + const struct ib_gid_attr *attr) 2665 + { 2666 + struct hns_roce_cmq_desc desc[2]; 2667 + struct hns_roce_cfg_gmv_tb_a *tb_a = 2668 + (struct hns_roce_cfg_gmv_tb_a *)desc[0].data; 2669 + struct hns_roce_cfg_gmv_tb_b *tb_b = 2670 + (struct hns_roce_cfg_gmv_tb_b *)desc[1].data; 2671 + 2672 + u16 vlan_id = VLAN_CFI_MASK; 2673 + u8 mac[ETH_ALEN] = {}; 2674 + int ret; 2675 + 2676 + if (gid) { 2677 + ret = rdma_read_gid_l2_fields(attr, &vlan_id, mac); 2678 + if (ret) 2679 + return ret; 2680 + } 2681 + 2682 + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_CFG_GMV_TBL, false); 2683 + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); 2684 + 2685 + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_CFG_GMV_TBL, false); 2686 + 2687 + copy_gid(&tb_a->vf_sgid_l, gid); 2688 + 2689 + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M, 2690 + CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type); 2691 + roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S, 2692 + vlan_id < VLAN_CFI_MASK); 2693 + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M, 2694 + CFG_GMV_TB_VF_VLAN_ID_S, vlan_id); 2695 + 2696 + tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac); 2697 + roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M, 2698 + CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]); 2699 + 2700 + roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M, 2701 + CFG_GMV_TB_SGID_IDX_S, gid_index); 2702 + 2703 + return hns_roce_cmq_send(hr_dev, desc, 2); 2741 2704 } 2742 2705 2743 2706 static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, ··· 2782 2675 enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; 2783 2676 int ret; 2784 2677 2785 - if (!gid || !attr) 2786 - return -EINVAL; 2787 - 2788 - if (attr->gid_type == IB_GID_TYPE_ROCE) 2789 - sgid_type = GID_TYPE_FLAG_ROCE_V1; 2790 - 2791 - if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 2792 - if (ipv6_addr_v4mapped((void *)gid)) 2793 - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; 2794 - else 2795 - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; 2678 + if (gid) { 2679 + if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 2680 + if (ipv6_addr_v4mapped((void *)gid)) 2681 + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; 2682 + else 2683 + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; 2684 + } else if (attr->gid_type == IB_GID_TYPE_ROCE) { 2685 + sgid_type = GID_TYPE_FLAG_ROCE_V1; 2686 + } 2796 2687 } 2797 2688 2798 - ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); 2689 + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) 2690 + ret = config_gmv_table(hr_dev, gid_index, gid, sgid_type, attr); 2691 + else 2692 + ret = config_sgid_table(hr_dev, gid_index, gid, sgid_type); 2693 + 2799 2694 if (ret) 2800 - ibdev_err(&hr_dev->ib_dev, 2801 - "failed to configure sgid table, ret = %d!\n", 2695 + ibdev_err(&hr_dev->ib_dev, "failed to set gid, ret = %d!\n", 2802 2696 ret); 2803 2697 2804 2698 return ret; ··· 3067 2959 return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size); 3068 2960 } 3069 2961 3070 - static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) 2962 + static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, unsigned int n) 3071 2963 { 3072 2964 struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe); 3073 2965 ··· 3167 3059 roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQE_SIZE_M, 3168 3060 V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == 3169 3061 HNS_ROCE_V3_CQE_SIZE ? 1 : 0); 3062 + 3063 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) 3064 + hr_reg_enable(cq_context, CQC_STASH); 3170 3065 3171 3066 cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); 3172 3067 ··· 3414 3303 int is_send; 3415 3304 u16 wqe_ctr; 3416 3305 u32 opcode; 3417 - int qpn; 3306 + u32 qpn; 3418 3307 int ret; 3419 3308 3420 3309 /* Find cqe according to consumer index */ ··· 3683 3572 break; 3684 3573 default: 3685 3574 dev_warn(hr_dev->dev, 3686 - "Table %d not to be written by mailbox!\n", type); 3575 + "table %u not to be written by mailbox!\n", type); 3687 3576 return -EINVAL; 3688 3577 } 3689 3578 ··· 3694 3583 u32 hem_type, int step_idx) 3695 3584 { 3696 3585 struct hns_roce_cmd_mailbox *mailbox; 3586 + struct hns_roce_cmq_desc desc; 3587 + struct hns_roce_cfg_gmv_bt *gmv_bt = 3588 + (struct hns_roce_cfg_gmv_bt *)desc.data; 3697 3589 int ret; 3698 3590 int op; 3591 + 3592 + if (hem_type == HEM_TYPE_GMV) { 3593 + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, 3594 + false); 3595 + 3596 + gmv_bt->gmv_ba_l = cpu_to_le32(bt_ba >> HNS_HW_PAGE_SHIFT); 3597 + gmv_bt->gmv_ba_h = cpu_to_le32(bt_ba >> (HNS_HW_PAGE_SHIFT + 3598 + 32)); 3599 + gmv_bt->gmv_bt_idx = cpu_to_le32(obj / 3600 + (HNS_HW_PAGE_SIZE / hr_dev->caps.gmv_entry_sz)); 3601 + 3602 + return hns_roce_cmq_send(hr_dev, &desc, 1); 3603 + } 3699 3604 3700 3605 op = get_op_for_set_hem(hr_dev, hem_type, step_idx); 3701 3606 if (op < 0) ··· 3810 3683 case HEM_TYPE_CQC: 3811 3684 op = HNS_ROCE_CMD_DESTROY_CQC_BT0; 3812 3685 break; 3813 - case HEM_TYPE_SCCC: 3814 - case HEM_TYPE_QPC_TIMER: 3815 - case HEM_TYPE_CQC_TIMER: 3816 - break; 3817 3686 case HEM_TYPE_SRQC: 3818 3687 op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; 3819 3688 break; 3689 + case HEM_TYPE_SCCC: 3690 + case HEM_TYPE_QPC_TIMER: 3691 + case HEM_TYPE_CQC_TIMER: 3692 + case HEM_TYPE_GMV: 3693 + return 0; 3820 3694 default: 3821 - dev_warn(dev, "Table %d not to be destroyed by mailbox!\n", 3695 + dev_warn(dev, "table %u not to be destroyed by mailbox!\n", 3822 3696 table->type); 3823 3697 return 0; 3824 3698 } 3825 - 3826 - if (table->type == HEM_TYPE_SCCC || 3827 - table->type == HEM_TYPE_QPC_TIMER || 3828 - table->type == HEM_TYPE_CQC_TIMER) 3829 - return 0; 3830 3699 3831 3700 op += step_idx; 3832 3701 ··· 3974 3851 3975 3852 roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1); 3976 3853 3977 - hr_qp->access_flags = attr->qp_access_flags; 3978 3854 roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, 3979 3855 V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); 3856 + 3857 + if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ) 3858 + return; 3859 + 3860 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) 3861 + hr_reg_enable(&context->ext, QPCEX_STASH); 3980 3862 } 3981 3863 3982 3864 static void modify_qp_init_to_init(struct ib_qp *ibqp, ··· 4001 3873 V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type)); 4002 3874 roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, 4003 3875 V2_QPC_BYTE_4_TST_S, 0); 4004 - 4005 - if (attr_mask & IB_QP_ACCESS_FLAGS) { 4006 - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 4007 - !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); 4008 - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 4009 - 0); 4010 - 4011 - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 4012 - !!(attr->qp_access_flags & 4013 - IB_ACCESS_REMOTE_WRITE)); 4014 - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 4015 - 0); 4016 - 4017 - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 4018 - !!(attr->qp_access_flags & 4019 - IB_ACCESS_REMOTE_ATOMIC)); 4020 - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 4021 - 0); 4022 - roce_set_bit(context->byte_76_srqn_op_en, 4023 - V2_QPC_BYTE_76_EXT_ATE_S, 4024 - !!(attr->qp_access_flags & 4025 - IB_ACCESS_REMOTE_ATOMIC)); 4026 - roce_set_bit(qpc_mask->byte_76_srqn_op_en, 4027 - V2_QPC_BYTE_76_EXT_ATE_S, 0); 4028 - } else { 4029 - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 4030 - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); 4031 - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 4032 - 0); 4033 - 4034 - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 4035 - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_WRITE)); 4036 - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 4037 - 0); 4038 - 4039 - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 4040 - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); 4041 - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 4042 - 0); 4043 - roce_set_bit(context->byte_76_srqn_op_en, 4044 - V2_QPC_BYTE_76_EXT_ATE_S, 4045 - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); 4046 - roce_set_bit(qpc_mask->byte_76_srqn_op_en, 4047 - V2_QPC_BYTE_76_EXT_ATE_S, 0); 4048 - } 4049 3876 4050 3877 roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, 4051 3878 V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn); ··· 4411 4328 4412 4329 ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask); 4413 4330 if (ret) { 4414 - ibdev_err(ibdev, "failed to config sq buf, ret %d\n", ret); 4331 + ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret); 4415 4332 return ret; 4416 4333 } 4417 4334 ··· 4504 4421 IB_GID_TYPE_ROCE_UDP_ENCAP); 4505 4422 } 4506 4423 4507 - if (vlan_id < VLAN_N_VID) { 4424 + /* Only HIP08 needs to set the vlan_en bits in QPC */ 4425 + if (vlan_id < VLAN_N_VID && 4426 + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { 4508 4427 roce_set_bit(context->byte_76_srqn_op_en, 4509 4428 V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); 4510 4429 roce_set_bit(qpc_mask->byte_76_srqn_op_en, ··· 4553 4468 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, 4554 4469 V2_QPC_BYTE_24_HOP_LIMIT_S, 0); 4555 4470 4556 - if (is_udp) 4557 - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 4558 - V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); 4559 - else 4560 - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 4561 - V2_QPC_BYTE_24_TC_S, grh->traffic_class); 4562 - 4471 + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 4472 + V2_QPC_BYTE_24_TC_S, get_tclass(&attr->ah_attr.grh)); 4563 4473 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 4564 4474 V2_QPC_BYTE_24_TC_S, 0); 4475 + 4565 4476 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, 4566 4477 V2_QPC_BYTE_28_FL_S, grh->flow_label); 4567 4478 roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, ··· 4839 4758 unsigned long rq_flag = 0; 4840 4759 int ret; 4841 4760 4761 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 4762 + return -EOPNOTSUPP; 4763 + 4842 4764 /* 4843 4765 * In v2 engine, software pass context and context mask to hardware 4844 4766 * when modifying qp. If software need modify some fields in context, ··· 4902 4818 /* SW pass context to HW */ 4903 4819 ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); 4904 4820 if (ret) { 4905 - ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret); 4821 + ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret); 4906 4822 goto out; 4907 4823 } 4908 4824 ··· 4995 4911 4996 4912 ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); 4997 4913 if (ret) { 4998 - ibdev_err(ibdev, "failed to query QPC, ret = %d\n", ret); 4914 + ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); 4999 4915 ret = -EINVAL; 5000 4916 goto out; 5001 4917 } ··· 5110 5026 unsigned long flags; 5111 5027 int ret = 0; 5112 5028 5113 - if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { 5029 + if ((hr_qp->ibqp.qp_type == IB_QPT_RC || 5030 + hr_qp->ibqp.qp_type == IB_QPT_UD) && 5031 + hr_qp->state != IB_QPS_RESET) { 5114 5032 /* Modify qp to reset before destroying qp */ 5115 5033 ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, 5116 5034 hr_qp->state, IB_QPS_RESET); 5117 5035 if (ret) 5118 5036 ibdev_err(ibdev, 5119 - "failed to modify QP to RST, ret = %d\n", 5037 + "failed to modify QP to RST, ret = %d.\n", 5120 5038 ret); 5121 5039 } 5122 5040 ··· 5157 5071 ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); 5158 5072 if (ret) 5159 5073 ibdev_err(&hr_dev->ib_dev, 5160 - "failed to destroy QP 0x%06lx, ret = %d\n", 5074 + "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", 5161 5075 hr_qp->qpn, ret); 5162 5076 5163 5077 hns_roce_qp_destroy(hr_dev, hr_qp, udata); ··· 5180 5094 hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false); 5181 5095 ret = hns_roce_cmq_send(hr_dev, &desc, 1); 5182 5096 if (ret) { 5183 - ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d\n", ret); 5097 + ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d.\n", ret); 5184 5098 goto out; 5185 5099 } 5186 5100 ··· 5190 5104 clr->qpn = cpu_to_le32(hr_qp->qpn); 5191 5105 ret = hns_roce_cmq_send(hr_dev, &desc, 1); 5192 5106 if (ret) { 5193 - ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d\n", ret); 5107 + ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d.\n", ret); 5194 5108 goto out; 5195 5109 } 5196 5110 ··· 5439 5353 hns_roce_free_cmd_mailbox(hr_dev, mailbox); 5440 5354 if (ret) 5441 5355 ibdev_err(&hr_dev->ib_dev, 5442 - "failed to process cmd when modifying CQ, ret = %d\n", 5356 + "failed to process cmd when modifying CQ, ret = %d.\n", 5443 5357 ret); 5444 5358 5445 5359 return ret; ··· 5450 5364 struct hns_roce_work *irq_work = 5451 5365 container_of(work, struct hns_roce_work, work); 5452 5366 struct ib_device *ibdev = &irq_work->hr_dev->ib_dev; 5453 - u32 qpn = irq_work->qpn; 5454 - u32 cqn = irq_work->cqn; 5455 5367 5456 5368 switch (irq_work->event_type) { 5457 5369 case HNS_ROCE_EVENT_TYPE_PATH_MIG: ··· 5465 5381 break; 5466 5382 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 5467 5383 ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n", 5468 - qpn, irq_work->sub_type); 5384 + irq_work->queue_num, irq_work->sub_type); 5469 5385 break; 5470 5386 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 5471 5387 ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n", 5472 - qpn); 5388 + irq_work->queue_num); 5473 5389 break; 5474 5390 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 5475 5391 ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n", 5476 - qpn, irq_work->sub_type); 5392 + irq_work->queue_num, irq_work->sub_type); 5477 5393 break; 5478 5394 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: 5479 5395 ibdev_warn(ibdev, "SRQ limit reach.\n"); ··· 5485 5401 ibdev_err(ibdev, "SRQ catas error.\n"); 5486 5402 break; 5487 5403 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 5488 - ibdev_err(ibdev, "CQ 0x%x access err.\n", cqn); 5404 + ibdev_err(ibdev, "CQ 0x%x access err.\n", irq_work->queue_num); 5489 5405 break; 5490 5406 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: 5491 - ibdev_warn(ibdev, "CQ 0x%x overflow\n", cqn); 5407 + ibdev_warn(ibdev, "CQ 0x%x overflow\n", irq_work->queue_num); 5492 5408 break; 5493 5409 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 5494 5410 ibdev_warn(ibdev, "DB overflow.\n"); ··· 5504 5420 } 5505 5421 5506 5422 static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, 5507 - struct hns_roce_eq *eq, 5508 - u32 qpn, u32 cqn) 5423 + struct hns_roce_eq *eq, u32 queue_num) 5509 5424 { 5510 5425 struct hns_roce_work *irq_work; 5511 5426 ··· 5514 5431 5515 5432 INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); 5516 5433 irq_work->hr_dev = hr_dev; 5517 - irq_work->qpn = qpn; 5518 - irq_work->cqn = cqn; 5519 5434 irq_work->event_type = eq->event_type; 5520 5435 irq_work->sub_type = eq->sub_type; 5436 + irq_work->queue_num = queue_num; 5521 5437 queue_work(hr_dev->irq_workq, &(irq_work->work)); 5522 5438 } 5523 5439 ··· 5568 5486 struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); 5569 5487 int aeqe_found = 0; 5570 5488 int event_type; 5489 + u32 queue_num; 5571 5490 int sub_type; 5572 - u32 srqn; 5573 - u32 qpn; 5574 - u32 cqn; 5575 5491 5576 5492 while (aeqe) { 5577 5493 /* Make sure we read AEQ entry after we have checked the ··· 5583 5503 sub_type = roce_get_field(aeqe->asyn, 5584 5504 HNS_ROCE_V2_AEQE_SUB_TYPE_M, 5585 5505 HNS_ROCE_V2_AEQE_SUB_TYPE_S); 5586 - qpn = roce_get_field(aeqe->event.qp_event.qp, 5587 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, 5588 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); 5589 - cqn = roce_get_field(aeqe->event.cq_event.cq, 5590 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, 5591 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); 5592 - srqn = roce_get_field(aeqe->event.srq_event.srq, 5593 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, 5594 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); 5506 + queue_num = roce_get_field(aeqe->event.queue_event.num, 5507 + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, 5508 + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); 5595 5509 5596 5510 switch (event_type) { 5597 5511 case HNS_ROCE_EVENT_TYPE_PATH_MIG: ··· 5596 5522 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: 5597 5523 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 5598 5524 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 5599 - hns_roce_qp_event(hr_dev, qpn, event_type); 5525 + hns_roce_qp_event(hr_dev, queue_num, event_type); 5600 5526 break; 5601 5527 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: 5602 5528 case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: 5603 - hns_roce_srq_event(hr_dev, srqn, event_type); 5529 + hns_roce_srq_event(hr_dev, queue_num, event_type); 5604 5530 break; 5605 5531 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 5606 5532 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: 5607 - hns_roce_cq_event(hr_dev, cqn, event_type); 5608 - break; 5609 - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 5533 + hns_roce_cq_event(hr_dev, queue_num, event_type); 5610 5534 break; 5611 5535 case HNS_ROCE_EVENT_TYPE_MB: 5612 5536 hns_roce_cmd_event(hr_dev, ··· 5612 5540 aeqe->event.cmd.status, 5613 5541 le64_to_cpu(aeqe->event.cmd.out_param)); 5614 5542 break; 5615 - case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: 5616 - break; 5543 + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 5617 5544 case HNS_ROCE_EVENT_TYPE_FLR: 5618 5545 break; 5619 5546 default: ··· 5629 5558 if (eq->cons_index > (2 * eq->entries - 1)) 5630 5559 eq->cons_index = 0; 5631 5560 5632 - hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn); 5561 + hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); 5633 5562 5634 5563 aeqe = next_aeqe_sw_v2(eq); 5635 5564 } ··· 6264 6193 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, 6265 6194 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, 6266 6195 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, 6196 + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0}, 6267 6197 /* required last entry */ 6268 6198 {0, } 6269 6199 };
+154 -111
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
··· 44 44 #define HNS_ROCE_VF_SMAC_NUM 32 45 45 #define HNS_ROCE_VF_SGID_NUM 32 46 46 #define HNS_ROCE_VF_SL_NUM 8 47 + #define HNS_ROCE_VF_GMV_BT_NUM 256 47 48 48 49 #define HNS_ROCE_V2_MAX_QP_NUM 0x100000 49 50 #define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200 ··· 90 89 91 90 #define HNS_ROCE_V2_SCCC_SZ 32 92 91 #define HNS_ROCE_V3_SCCC_SZ 64 92 + #define HNS_ROCE_V3_GMV_ENTRY_SZ 32 93 93 94 94 #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE 95 95 #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE ··· 243 241 HNS_ROCE_OPC_CLR_SCCC = 0x8509, 244 242 HNS_ROCE_OPC_QUERY_SCCC = 0x850a, 245 243 HNS_ROCE_OPC_RESET_SCCC = 0x850b, 244 + HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, 245 + HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, 246 246 HNS_SWITCH_PARAMETER_CFG = 0x1033, 247 247 }; 248 248 ··· 267 263 }; 268 264 269 265 struct hns_roce_v2_cq_context { 270 - __le32 byte_4_pg_ceqn; 271 - __le32 byte_8_cqn; 272 - __le32 cqe_cur_blk_addr; 273 - __le32 byte_16_hop_addr; 274 - __le32 cqe_nxt_blk_addr; 275 - __le32 byte_24_pgsz_addr; 276 - __le32 byte_28_cq_pi; 277 - __le32 byte_32_cq_ci; 278 - __le32 cqe_ba; 279 - __le32 byte_40_cqe_ba; 280 - __le32 byte_44_db_record; 281 - __le32 db_record_addr; 282 - __le32 byte_52_cqe_cnt; 283 - __le32 byte_56_cqe_period_maxcnt; 284 - __le32 cqe_report_timer; 285 - __le32 byte_64_se_cqe_idx; 266 + __le32 byte_4_pg_ceqn; 267 + __le32 byte_8_cqn; 268 + __le32 cqe_cur_blk_addr; 269 + __le32 byte_16_hop_addr; 270 + __le32 cqe_nxt_blk_addr; 271 + __le32 byte_24_pgsz_addr; 272 + __le32 byte_28_cq_pi; 273 + __le32 byte_32_cq_ci; 274 + __le32 cqe_ba; 275 + __le32 byte_40_cqe_ba; 276 + __le32 byte_44_db_record; 277 + __le32 db_record_addr; 278 + __le32 byte_52_cqe_cnt; 279 + __le32 byte_56_cqe_period_maxcnt; 280 + __le32 cqe_report_timer; 281 + __le32 byte_64_se_cqe_idx; 286 282 }; 283 + 287 284 #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 288 285 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 289 286 ··· 360 355 361 356 #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 362 357 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) 358 + 359 + #define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l) 360 + 361 + #define CQC_STASH CQC_FIELD_LOC(63, 63) 363 362 364 363 struct hns_roce_srq_context { 365 364 __le32 byte_4_srqn_srqst; ··· 449 440 #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1 450 441 #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1) 451 442 452 - enum{ 443 + enum { 453 444 V2_MPT_ST_VALID = 0x1, 454 445 V2_MPT_ST_FREE = 0x2, 455 446 }; ··· 466 457 HNS_ROCE_QP_NUM_ST 467 458 }; 468 459 460 + struct hns_roce_v2_qp_context_ex { 461 + __le32 data[64]; 462 + }; 469 463 struct hns_roce_v2_qp_context { 470 - __le32 byte_4_sqpn_tst; 471 - __le32 wqe_sge_ba; 472 - __le32 byte_12_sq_hop; 473 - __le32 byte_16_buf_ba_pg_sz; 474 - __le32 byte_20_smac_sgid_idx; 475 - __le32 byte_24_mtu_tc; 476 - __le32 byte_28_at_fl; 477 - u8 dgid[GID_LEN_V2]; 478 - __le32 dmac; 479 - __le32 byte_52_udpspn_dmac; 480 - __le32 byte_56_dqpn_err; 481 - __le32 byte_60_qpst_tempid; 482 - __le32 qkey_xrcd; 483 - __le32 byte_68_rq_db; 484 - __le32 rq_db_record_addr; 485 - __le32 byte_76_srqn_op_en; 486 - __le32 byte_80_rnr_rx_cqn; 487 - __le32 byte_84_rq_ci_pi; 488 - __le32 rq_cur_blk_addr; 489 - __le32 byte_92_srq_info; 490 - __le32 byte_96_rx_reqmsn; 491 - __le32 rq_nxt_blk_addr; 492 - __le32 byte_104_rq_sge; 493 - __le32 byte_108_rx_reqepsn; 494 - __le32 rq_rnr_timer; 495 - __le32 rx_msg_len; 496 - __le32 rx_rkey_pkt_info; 497 - __le64 rx_va; 498 - __le32 byte_132_trrl; 499 - __le32 trrl_ba; 500 - __le32 byte_140_raq; 501 - __le32 byte_144_raq; 502 - __le32 byte_148_raq; 503 - __le32 byte_152_raq; 504 - __le32 byte_156_raq; 505 - __le32 byte_160_sq_ci_pi; 506 - __le32 sq_cur_blk_addr; 507 - __le32 byte_168_irrl_idx; 508 - __le32 byte_172_sq_psn; 509 - __le32 byte_176_msg_pktn; 510 - __le32 sq_cur_sge_blk_addr; 511 - __le32 byte_184_irrl_idx; 512 - __le32 cur_sge_offset; 513 - __le32 byte_192_ext_sge; 514 - __le32 byte_196_sq_psn; 515 - __le32 byte_200_sq_max; 516 - __le32 irrl_ba; 517 - __le32 byte_208_irrl; 518 - __le32 byte_212_lsn; 519 - __le32 sq_timer; 520 - __le32 byte_220_retry_psn_msn; 521 - __le32 byte_224_retry_msg; 522 - __le32 rx_sq_cur_blk_addr; 523 - __le32 byte_232_irrl_sge; 524 - __le32 irrl_cur_sge_offset; 525 - __le32 byte_240_irrl_tail; 526 - __le32 byte_244_rnr_rxack; 527 - __le32 byte_248_ack_psn; 528 - __le32 byte_252_err_txcqn; 529 - __le32 byte_256_sqflush_rqcqe; 530 - __le32 ext[64]; 464 + __le32 byte_4_sqpn_tst; 465 + __le32 wqe_sge_ba; 466 + __le32 byte_12_sq_hop; 467 + __le32 byte_16_buf_ba_pg_sz; 468 + __le32 byte_20_smac_sgid_idx; 469 + __le32 byte_24_mtu_tc; 470 + __le32 byte_28_at_fl; 471 + u8 dgid[GID_LEN_V2]; 472 + __le32 dmac; 473 + __le32 byte_52_udpspn_dmac; 474 + __le32 byte_56_dqpn_err; 475 + __le32 byte_60_qpst_tempid; 476 + __le32 qkey_xrcd; 477 + __le32 byte_68_rq_db; 478 + __le32 rq_db_record_addr; 479 + __le32 byte_76_srqn_op_en; 480 + __le32 byte_80_rnr_rx_cqn; 481 + __le32 byte_84_rq_ci_pi; 482 + __le32 rq_cur_blk_addr; 483 + __le32 byte_92_srq_info; 484 + __le32 byte_96_rx_reqmsn; 485 + __le32 rq_nxt_blk_addr; 486 + __le32 byte_104_rq_sge; 487 + __le32 byte_108_rx_reqepsn; 488 + __le32 rq_rnr_timer; 489 + __le32 rx_msg_len; 490 + __le32 rx_rkey_pkt_info; 491 + __le64 rx_va; 492 + __le32 byte_132_trrl; 493 + __le32 trrl_ba; 494 + __le32 byte_140_raq; 495 + __le32 byte_144_raq; 496 + __le32 byte_148_raq; 497 + __le32 byte_152_raq; 498 + __le32 byte_156_raq; 499 + __le32 byte_160_sq_ci_pi; 500 + __le32 sq_cur_blk_addr; 501 + __le32 byte_168_irrl_idx; 502 + __le32 byte_172_sq_psn; 503 + __le32 byte_176_msg_pktn; 504 + __le32 sq_cur_sge_blk_addr; 505 + __le32 byte_184_irrl_idx; 506 + __le32 cur_sge_offset; 507 + __le32 byte_192_ext_sge; 508 + __le32 byte_196_sq_psn; 509 + __le32 byte_200_sq_max; 510 + __le32 irrl_ba; 511 + __le32 byte_208_irrl; 512 + __le32 byte_212_lsn; 513 + __le32 sq_timer; 514 + __le32 byte_220_retry_psn_msn; 515 + __le32 byte_224_retry_msg; 516 + __le32 rx_sq_cur_blk_addr; 517 + __le32 byte_232_irrl_sge; 518 + __le32 irrl_cur_sge_offset; 519 + __le32 byte_240_irrl_tail; 520 + __le32 byte_244_rnr_rxack; 521 + __le32 byte_248_ack_psn; 522 + __le32 byte_252_err_txcqn; 523 + __le32 byte_256_sqflush_rqcqe; 524 + 525 + struct hns_roce_v2_qp_context_ex ext; 531 526 }; 532 527 533 528 #define V2_QPC_BYTE_4_TST_S 0 ··· 900 887 #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16 901 888 #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16) 902 889 890 + #define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l) 891 + 892 + #define QPCEX_STASH QPCEX_FIELD_LOC(82, 82) 893 + 903 894 #define V2_QP_RWE_S 1 /* rdma write enable */ 904 895 #define V2_QP_RRE_S 2 /* rdma read enable */ 905 896 #define V2_QP_ATE_S 3 /* rdma atomic enable */ ··· 1090 1073 __le32 byte_32; 1091 1074 __le32 byte_36; 1092 1075 __le32 byte_40; 1093 - __le32 dmac; 1094 - __le32 byte_48; 1076 + u8 dmac[ETH_ALEN]; 1077 + u8 sgid_index; 1078 + u8 smac_index; 1095 1079 u8 dgid[GID_LEN_V2]; 1096 - 1097 1080 }; 1098 - #define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 1081 + 1082 + #define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 1099 1083 #define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) 1100 1084 1101 1085 #define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7 ··· 1135 1117 #define V2_UD_SEND_WQE_BYTE_40_SL_S 20 1136 1118 #define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20) 1137 1119 1138 - #define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24 1139 - #define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24) 1140 - 1141 1120 #define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 1142 1121 1143 1122 #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 1144 - 1145 - #define V2_UD_SEND_WQE_DMAC_0_S 0 1146 - #define V2_UD_SEND_WQE_DMAC_0_M GENMASK(7, 0) 1147 - 1148 - #define V2_UD_SEND_WQE_DMAC_1_S 8 1149 - #define V2_UD_SEND_WQE_DMAC_1_M GENMASK(15, 8) 1150 - 1151 - #define V2_UD_SEND_WQE_DMAC_2_S 16 1152 - #define V2_UD_SEND_WQE_DMAC_2_M GENMASK(23, 16) 1153 - 1154 - #define V2_UD_SEND_WQE_DMAC_3_S 24 1155 - #define V2_UD_SEND_WQE_DMAC_3_M GENMASK(31, 24) 1156 - 1157 - #define V2_UD_SEND_WQE_BYTE_48_DMAC_4_S 0 1158 - #define V2_UD_SEND_WQE_BYTE_48_DMAC_4_M GENMASK(7, 0) 1159 - 1160 - #define V2_UD_SEND_WQE_BYTE_48_DMAC_5_S 8 1161 - #define V2_UD_SEND_WQE_BYTE_48_DMAC_5_M GENMASK(15, 8) 1162 - 1163 - #define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S 16 1164 - #define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M GENMASK(23, 16) 1165 - 1166 - #define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_S 24 1167 - #define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_M GENMASK(31, 24) 1168 1123 1169 1124 struct hns_roce_v2_rc_send_wqe { 1170 1125 __le32 byte_4; ··· 1325 1334 __le32 sgid_idx_num; 1326 1335 __le32 qid_idx_sl_num; 1327 1336 __le32 sccc_bt_idx_num; 1328 - __le32 rsv; 1337 + __le32 gmv_idx_num; 1329 1338 }; 1330 1339 1331 1340 #define PF_RES_DATA_1_PF_SMAC_IDX_S 0 ··· 1351 1360 1352 1361 #define PF_RES_DATA_4_PF_SCCC_BT_NUM_S 9 1353 1362 #define PF_RES_DATA_4_PF_SCCC_BT_NUM_M GENMASK(17, 9) 1363 + 1364 + #define PF_RES_DATA_5_PF_GMV_BT_IDX_S 0 1365 + #define PF_RES_DATA_5_PF_GMV_BT_IDX_M GENMASK(7, 0) 1366 + 1367 + #define PF_RES_DATA_5_PF_GMV_BT_NUM_S 8 1368 + #define PF_RES_DATA_5_PF_GMV_BT_NUM_M GENMASK(16, 8) 1354 1369 1355 1370 struct hns_roce_pf_timer_res_a { 1356 1371 __le32 rsv0; ··· 1422 1425 __le32 vf_sgid_idx_num; 1423 1426 __le32 vf_qid_idx_sl_num; 1424 1427 __le32 vf_sccc_idx_num; 1425 - __le32 rsv1; 1428 + __le32 vf_gmv_idx_num; 1426 1429 }; 1427 1430 1428 1431 #define VF_RES_B_DATA_0_VF_ID_S 0 ··· 1451 1454 1452 1455 #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S 9 1453 1456 #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M GENMASK(17, 9) 1457 + 1458 + #define VF_RES_B_DATA_5_VF_GMV_BT_IDX_S 0 1459 + #define VF_RES_B_DATA_5_VF_GMV_BT_IDX_M GENMASK(7, 0) 1460 + 1461 + #define VF_RES_B_DATA_5_VF_GMV_BT_NUM_S 16 1462 + #define VF_RES_B_DATA_5_VF_GMV_BT_NUM_M GENMASK(24, 16) 1454 1463 1455 1464 struct hns_roce_vf_switch { 1456 1465 __le32 rocee_sel; ··· 1579 1576 1580 1577 #define CFG_SMAC_TB_VF_SMAC_H_S 0 1581 1578 #define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) 1579 + 1580 + struct hns_roce_cfg_gmv_bt { 1581 + __le32 gmv_ba_l; 1582 + __le32 gmv_ba_h; 1583 + __le32 gmv_bt_idx; 1584 + __le32 rsv[3]; 1585 + }; 1586 + 1587 + #define CFG_GMV_BA_H_S 0 1588 + #define CFG_GMV_BA_H_M GENMASK(19, 0) 1589 + 1590 + struct hns_roce_cfg_gmv_tb_a { 1591 + __le32 vf_sgid_l; 1592 + __le32 vf_sgid_ml; 1593 + __le32 vf_sgid_mh; 1594 + __le32 vf_sgid_h; 1595 + __le32 vf_sgid_type_vlan; 1596 + __le32 resv; 1597 + }; 1598 + 1599 + #define CFG_GMV_TB_SGID_IDX_S 0 1600 + #define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0) 1601 + 1602 + #define CFG_GMV_TB_VF_SGID_TYPE_S 0 1603 + #define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0) 1604 + 1605 + #define CFG_GMV_TB_VF_VLAN_EN_S 2 1606 + 1607 + #define CFG_GMV_TB_VF_VLAN_ID_S 16 1608 + #define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16) 1609 + 1610 + struct hns_roce_cfg_gmv_tb_b { 1611 + __le32 vf_smac_l; 1612 + __le32 vf_smac_h; 1613 + __le32 table_idx_rsv; 1614 + __le32 resv[3]; 1615 + }; 1616 + 1617 + #define CFG_GMV_TB_SMAC_H_S 0 1618 + #define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0) 1582 1619 1583 1620 #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 1584 1621 struct hns_roce_query_pf_caps_a {
+36 -44
drivers/infiniband/hw/hns/hns_roce_main.c
··· 33 33 #include <linux/acpi.h> 34 34 #include <linux/of_platform.h> 35 35 #include <linux/module.h> 36 + #include <linux/pci.h> 36 37 #include <rdma/ib_addr.h> 37 38 #include <rdma/ib_smi.h> 38 39 #include <rdma/ib_user_verbs.h> 39 40 #include <rdma/ib_cache.h> 40 41 #include "hns_roce_common.h" 41 42 #include "hns_roce_device.h" 42 - #include <rdma/hns-abi.h> 43 43 #include "hns_roce_hem.h" 44 44 45 45 /** ··· 53 53 * GID[0][0], GID[1][0],.....GID[N - 1][0], 54 54 * And so on 55 55 */ 56 - int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) 56 + u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) 57 57 { 58 58 return gid_index * hr_dev->caps.num_ports + port; 59 59 } ··· 61 61 static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) 62 62 { 63 63 u8 phy_port; 64 - u32 i = 0; 64 + u32 i; 65 + 66 + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) 67 + return 0; 65 68 66 69 if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN)) 67 70 return 0; ··· 93 90 static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) 94 91 { 95 92 struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); 96 - struct ib_gid_attr zattr = {}; 97 93 u8 port = attr->port_num - 1; 98 94 int ret; 99 95 100 96 if (port >= hr_dev->caps.num_ports) 101 97 return -EINVAL; 102 98 103 - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr); 99 + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, NULL, NULL); 104 100 105 101 return ret; 106 102 } ··· 327 325 328 326 resp.cqe_size = hr_dev->caps.cqe_sz; 329 327 330 - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); 328 + ret = ib_copy_to_udata(udata, &resp, 329 + min(udata->outlen, sizeof(resp))); 331 330 if (ret) 332 331 goto error_fail_copy_to_udata; 333 332 ··· 424 421 .alloc_pd = hns_roce_alloc_pd, 425 422 .alloc_ucontext = hns_roce_alloc_ucontext, 426 423 .create_ah = hns_roce_create_ah, 424 + .create_user_ah = hns_roce_create_ah, 427 425 .create_cq = hns_roce_create_cq, 428 426 .create_qp = hns_roce_create_qp, 429 427 .dealloc_pd = hns_roce_dealloc_pd, ··· 495 491 ib_dev->phys_port_cnt = hr_dev->caps.num_ports; 496 492 ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; 497 493 ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; 498 - ib_dev->uverbs_cmd_mask = 499 - (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) | 500 - (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) | 501 - (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) | 502 - (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) | 503 - (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) | 504 - (1ULL << IB_USER_VERBS_CMD_REG_MR) | 505 - (1ULL << IB_USER_VERBS_CMD_DEREG_MR) | 506 - (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 507 - (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) | 508 - (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) | 509 - (1ULL << IB_USER_VERBS_CMD_CREATE_QP) | 510 - (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) | 511 - (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | 512 - (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); 513 494 514 - ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); 515 - 516 - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { 517 - ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); 495 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) 518 496 ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); 519 - } 520 497 521 498 /* MW */ 522 - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { 523 - ib_dev->uverbs_cmd_mask |= 524 - (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | 525 - (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); 499 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) 526 500 ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops); 527 - } 528 501 529 502 /* FRMR */ 530 503 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) ··· 509 528 510 529 /* SRQ */ 511 530 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { 512 - ib_dev->uverbs_cmd_mask |= 513 - (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) | 514 - (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) | 515 - (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) | 516 - (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) | 517 - (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); 518 531 ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); 519 532 ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); 520 533 } ··· 555 580 556 581 static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) 557 582 { 558 - int ret; 559 583 struct device *dev = hr_dev->dev; 584 + int ret; 560 585 561 586 ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, 562 587 HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, ··· 606 631 goto err_unmap_trrl; 607 632 } 608 633 609 - if (hr_dev->caps.srqc_entry_sz) { 634 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { 610 635 ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, 611 636 HEM_TYPE_SRQC, 612 637 hr_dev->caps.srqc_entry_sz, ··· 618 643 } 619 644 } 620 645 621 - if (hr_dev->caps.sccc_sz) { 646 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { 622 647 ret = hns_roce_init_hem_table(hr_dev, 623 648 &hr_dev->qp_table.sccc_table, 624 649 HEM_TYPE_SCCC, ··· 655 680 } 656 681 } 657 682 683 + if (hr_dev->caps.gmv_entry_sz) { 684 + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table, 685 + HEM_TYPE_GMV, 686 + hr_dev->caps.gmv_entry_sz, 687 + hr_dev->caps.gmv_entry_num, 1); 688 + if (ret) { 689 + dev_err(dev, 690 + "failed to init gmv table memory, ret = %d\n", 691 + ret); 692 + goto err_unmap_cqc_timer; 693 + } 694 + } 695 + 658 696 return 0; 697 + 698 + err_unmap_cqc_timer: 699 + if (hr_dev->caps.cqc_timer_entry_sz) 700 + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); 659 701 660 702 err_unmap_qpc_timer: 661 703 if (hr_dev->caps.qpc_timer_entry_sz) 662 704 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); 663 705 664 706 err_unmap_ctx: 665 - if (hr_dev->caps.sccc_sz) 707 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) 666 708 hns_roce_cleanup_hem_table(hr_dev, 667 709 &hr_dev->qp_table.sccc_table); 668 710 err_unmap_srq: 669 - if (hr_dev->caps.srqc_entry_sz) 711 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) 670 712 hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); 671 713 672 714 err_unmap_cq: ··· 713 721 */ 714 722 static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) 715 723 { 716 - int ret; 717 724 struct device *dev = hr_dev->dev; 725 + int ret; 718 726 719 727 spin_lock_init(&hr_dev->sm_lock); 720 728 spin_lock_init(&hr_dev->bt_cmd_lock); ··· 838 846 839 847 int hns_roce_init(struct hns_roce_dev *hr_dev) 840 848 { 841 - int ret; 842 849 struct device *dev = hr_dev->dev; 850 + int ret; 843 851 844 852 if (hr_dev->hw->reset) { 845 853 ret = hr_dev->hw->reset(hr_dev, true);
+33 -46
drivers/infiniband/hw/hns/hns_roce_mr.c
··· 167 167 static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, 168 168 struct hns_roce_mr *mr) 169 169 { 170 - int ret; 171 170 unsigned long mtpt_idx = key_to_hw_index(mr->key); 172 - struct device *dev = hr_dev->dev; 173 171 struct hns_roce_cmd_mailbox *mailbox; 172 + struct device *dev = hr_dev->dev; 173 + int ret; 174 174 175 175 /* Allocate mailbox memory */ 176 176 mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); ··· 185 185 else 186 186 ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); 187 187 if (ret) { 188 - dev_err(dev, "Write mtpt fail!\n"); 188 + dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); 189 189 goto err_page; 190 190 } 191 191 192 192 ret = hns_roce_hw_create_mpt(hr_dev, mailbox, 193 193 mtpt_idx & (hr_dev->caps.num_mtpts - 1)); 194 194 if (ret) { 195 - dev_err(dev, "CREATE_MPT failed (%d)\n", ret); 195 + dev_err(dev, "failed to create mpt, ret = %d.\n", ret); 196 196 goto err_page; 197 197 } 198 198 ··· 328 328 return ret; 329 329 } 330 330 331 - int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, 332 - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, 333 - struct ib_udata *udata) 331 + struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, 332 + u64 length, u64 virt_addr, 333 + int mr_access_flags, struct ib_pd *pd, 334 + struct ib_udata *udata) 334 335 { 335 336 struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); 336 337 struct ib_device *ib_dev = &hr_dev->ib_dev; ··· 342 341 int ret; 343 342 344 343 if (!mr->enabled) 345 - return -EINVAL; 344 + return ERR_PTR(-EINVAL); 346 345 347 346 mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); 348 347 if (IS_ERR(mailbox)) 349 - return PTR_ERR(mailbox); 348 + return ERR_CAST(mailbox); 350 349 351 350 mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); 352 351 ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, ··· 391 390 392 391 hns_roce_free_cmd_mailbox(hr_dev, mailbox); 393 392 394 - return 0; 393 + return NULL; 395 394 396 395 free_cmd_mbox: 397 396 hns_roce_free_cmd_mailbox(hr_dev, mailbox); 398 397 399 - return ret; 398 + return ERR_PTR(ret); 400 399 } 401 400 402 401 int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) ··· 496 495 497 496 ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); 498 497 if (ret < 1) { 499 - ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n", 498 + ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", 500 499 mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); 501 500 goto err_page_list; 502 501 } ··· 510 509 ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); 511 510 ret = 0; 512 511 } else { 513 - mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size); 512 + mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size); 514 513 ret = mr->npages; 515 514 } 516 515 ··· 696 695 return size; 697 696 } 698 697 699 - static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, 700 - unsigned int page_shift) 701 - { 702 - if (is_direct) 703 - return ALIGN(alloc_size, 1 << page_shift); 704 - else 705 - return HNS_HW_DIRECT_PAGE_COUNT << page_shift; 706 - } 707 - 708 698 /* 709 699 * check the given pages in continuous address space 710 700 * Returns 0 on success, or the error page num. ··· 724 732 /* release kernel buffers */ 725 733 if (mtr->kmem) { 726 734 hns_roce_buf_free(hr_dev, mtr->kmem); 727 - kfree(mtr->kmem); 728 735 mtr->kmem = NULL; 729 736 } 730 737 } ··· 735 744 struct ib_device *ibdev = &hr_dev->ib_dev; 736 745 unsigned int best_pg_shift; 737 746 int all_pg_count = 0; 738 - size_t direct_size; 739 747 size_t total_size; 740 748 int ret; 741 749 742 750 total_size = mtr_bufs_size(buf_attr); 743 751 if (total_size < 1) { 744 - ibdev_err(ibdev, "Failed to check mtr size\n"); 752 + ibdev_err(ibdev, "failed to check mtr size\n."); 745 753 return -EINVAL; 746 754 } 747 755 ··· 752 762 mtr->umem = ib_umem_get(ibdev, user_addr, total_size, 753 763 buf_attr->user_access); 754 764 if (IS_ERR_OR_NULL(mtr->umem)) { 755 - ibdev_err(ibdev, "Failed to get umem, ret %ld\n", 765 + ibdev_err(ibdev, "failed to get umem, ret = %ld.\n", 756 766 PTR_ERR(mtr->umem)); 757 767 return -ENOMEM; 758 768 } ··· 770 780 ret = 0; 771 781 } else { 772 782 mtr->umem = NULL; 773 - mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); 774 - if (!mtr->kmem) { 775 - ibdev_err(ibdev, "Failed to alloc kmem\n"); 776 - return -ENOMEM; 783 + mtr->kmem = 784 + hns_roce_buf_alloc(hr_dev, total_size, 785 + buf_attr->page_shift, 786 + is_direct ? HNS_ROCE_BUF_DIRECT : 0); 787 + if (IS_ERR(mtr->kmem)) { 788 + ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", 789 + PTR_ERR(mtr->kmem)); 790 + return PTR_ERR(mtr->kmem); 777 791 } 778 - direct_size = mtr_kmem_direct_size(is_direct, total_size, 779 - buf_attr->page_shift); 780 - ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, 781 - mtr->kmem, buf_attr->page_shift); 782 - if (ret) { 783 - ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); 784 - goto err_alloc_mem; 785 - } 792 + 786 793 best_pg_shift = buf_attr->page_shift; 787 794 all_pg_count = mtr->kmem->npages; 788 795 } ··· 787 800 /* must bigger than minimum hardware page shift */ 788 801 if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { 789 802 ret = -EINVAL; 790 - ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", 803 + ibdev_err(ibdev, 804 + "failed to check mtr, page shift = %u count = %d.\n", 791 805 best_pg_shift, all_pg_count); 792 806 goto err_alloc_mem; 793 807 } ··· 829 841 } 830 842 831 843 int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, 832 - dma_addr_t *pages, int page_cnt) 844 + dma_addr_t *pages, unsigned int page_cnt) 833 845 { 834 846 struct ib_device *ibdev = &hr_dev->ib_dev; 835 847 struct hns_roce_buf_region *r; 848 + unsigned int i; 836 849 int err; 837 - int i; 838 850 839 851 /* 840 852 * Only use the first page address as root ba when hopnum is 0, this ··· 850 862 if (r->offset + r->count > page_cnt) { 851 863 err = -EINVAL; 852 864 ibdev_err(ibdev, 853 - "Failed to check mtr%d end %d + %d, max %d\n", 865 + "failed to check mtr%u end %u + %u, max %u.\n", 854 866 i, r->offset, r->count, page_cnt); 855 867 return err; 856 868 } ··· 858 870 err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); 859 871 if (err) { 860 872 ibdev_err(ibdev, 861 - "Failed to map mtr%d offset %d, err %d\n", 873 + "failed to map mtr%u offset %u, ret = %d.\n", 862 874 i, r->offset, err); 863 875 return err; 864 876 } ··· 871 883 int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) 872 884 { 873 885 struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; 886 + int mtt_count, left; 874 887 int start_index; 875 - int mtt_count; 876 888 int total = 0; 877 889 __le64 *mtts; 878 - int npage; 890 + u32 npage; 879 891 u64 addr; 880 - int left; 881 892 882 893 if (!mtt_buf || mtt_max < 1) 883 894 goto done;
+7 -7
drivers/infiniband/hw/hns/hns_roce_pd.c
··· 32 32 33 33 #include <linux/platform_device.h> 34 34 #include <linux/pci.h> 35 - #include <uapi/rdma/hns-abi.h> 36 35 #include "hns_roce_device.h" 37 36 38 37 static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) ··· 64 65 65 66 ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); 66 67 if (ret) { 67 - ibdev_err(ib_dev, "failed to alloc pd, ret = %d\n", ret); 68 + ibdev_err(ib_dev, "failed to alloc pd, ret = %d.\n", ret); 68 69 return ret; 69 70 } 70 71 71 72 if (udata) { 72 - struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn}; 73 + struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn}; 73 74 74 - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { 75 + ret = ib_copy_to_udata(udata, &resp, 76 + min(udata->outlen, sizeof(resp))); 77 + if (ret) { 75 78 hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); 76 - ibdev_err(ib_dev, "failed to copy to udata\n"); 77 - return -EFAULT; 79 + ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret); 78 80 } 79 81 } 80 82 81 - return 0; 83 + return ret; 82 84 } 83 85 84 86 int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+200 -108
drivers/infiniband/hw/hns/hns_roce_qp.c
··· 39 39 #include "hns_roce_common.h" 40 40 #include "hns_roce_device.h" 41 41 #include "hns_roce_hem.h" 42 - #include <rdma/hns-abi.h> 43 42 44 43 static void flush_work_handle(struct work_struct *work) 45 44 { ··· 113 114 static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, 114 115 enum hns_roce_event type) 115 116 { 116 - struct ib_event event; 117 117 struct ib_qp *ibqp = &hr_qp->ibqp; 118 + struct ib_event event; 118 119 119 120 if (ibqp->event_handler) { 120 121 event.device = ibqp->device; ··· 153 154 } 154 155 } 155 156 157 + static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank) 158 + { 159 + u32 least_load = bank[0].inuse; 160 + u8 bankid = 0; 161 + u32 bankcnt; 162 + u8 i; 163 + 164 + for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) { 165 + bankcnt = bank[i].inuse; 166 + if (bankcnt < least_load) { 167 + least_load = bankcnt; 168 + bankid = i; 169 + } 170 + } 171 + 172 + return bankid; 173 + } 174 + 175 + static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid, 176 + unsigned long *qpn) 177 + { 178 + int id; 179 + 180 + id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL); 181 + if (id < 0) { 182 + id = ida_alloc_range(&bank->ida, bank->min, bank->max, 183 + GFP_KERNEL); 184 + if (id < 0) 185 + return id; 186 + } 187 + 188 + /* the QPN should keep increasing until the max value is reached. */ 189 + bank->next = (id + 1) > bank->max ? bank->min : id + 1; 190 + 191 + /* the lower 3 bits is bankid */ 192 + *qpn = (id << 3) | bankid; 193 + 194 + return 0; 195 + } 156 196 static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) 157 197 { 198 + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; 158 199 unsigned long num = 0; 200 + u8 bankid; 159 201 int ret; 160 202 161 203 if (hr_qp->ibqp.qp_type == IB_QPT_GSI) { ··· 209 169 210 170 hr_qp->doorbell_qpn = 1; 211 171 } else { 212 - ret = hns_roce_bitmap_alloc_range(&hr_dev->qp_table.bitmap, 213 - 1, 1, &num); 172 + spin_lock(&qp_table->bank_lock); 173 + bankid = get_least_load_bankid_for_qp(qp_table->bank); 174 + 175 + ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, 176 + &num); 214 177 if (ret) { 215 - ibdev_err(&hr_dev->ib_dev, "Failed to alloc bitmap\n"); 216 - return -ENOMEM; 178 + ibdev_err(&hr_dev->ib_dev, 179 + "failed to alloc QPN, ret = %d\n", ret); 180 + spin_unlock(&qp_table->bank_lock); 181 + return ret; 217 182 } 183 + 184 + qp_table->bank[bankid].inuse++; 185 + spin_unlock(&qp_table->bank_lock); 218 186 219 187 hr_qp->doorbell_qpn = (u32)num; 220 188 } ··· 334 286 } 335 287 } 336 288 337 - if (hr_dev->caps.sccc_sz) { 289 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { 338 290 /* Alloc memory for SCC CTX */ 339 291 ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, 340 292 hr_qp->qpn); ··· 388 340 hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); 389 341 } 390 342 343 + static inline u8 get_qp_bankid(unsigned long qpn) 344 + { 345 + /* The lower 3 bits of QPN are used to hash to different banks */ 346 + return (u8)(qpn & GENMASK(2, 0)); 347 + } 348 + 391 349 static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) 392 350 { 393 - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; 351 + u8 bankid; 394 352 395 353 if (hr_qp->ibqp.qp_type == IB_QPT_GSI) 396 354 return; ··· 404 350 if (hr_qp->qpn < hr_dev->caps.reserved_qps) 405 351 return; 406 352 407 - hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); 353 + bankid = get_qp_bankid(hr_qp->qpn); 354 + 355 + ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3); 356 + 357 + spin_lock(&hr_dev->qp_table.bank_lock); 358 + hr_dev->qp_table.bank[bankid].inuse--; 359 + spin_unlock(&hr_dev->qp_table.bank_lock); 408 360 } 409 361 410 362 static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, ··· 464 404 return 0; 465 405 } 466 406 467 - static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, 468 - struct hns_roce_qp *hr_qp, 469 - struct ib_qp_cap *cap) 407 + static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) 470 408 { 471 - u32 cnt; 409 + /* GSI/UD QP only has extended sge */ 410 + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) 411 + return qp->sq.max_gs; 472 412 473 - cnt = max(1U, cap->max_send_sge); 474 - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { 475 - hr_qp->sq.max_gs = roundup_pow_of_two(cnt); 476 - hr_qp->sge.sge_cnt = 0; 477 - 478 - return 0; 479 - } 480 - 481 - hr_qp->sq.max_gs = cnt; 482 - 483 - /* UD sqwqe's sge use extend sge */ 484 - if (hr_qp->ibqp.qp_type == IB_QPT_GSI || 485 - hr_qp->ibqp.qp_type == IB_QPT_UD) { 486 - cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs); 487 - } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { 488 - cnt = roundup_pow_of_two(sq_wqe_cnt * 489 - (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); 490 - } else { 491 - cnt = 0; 492 - } 493 - 494 - hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; 495 - hr_qp->sge.sge_cnt = cnt; 413 + if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) 414 + return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; 496 415 497 416 return 0; 417 + } 418 + 419 + static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, 420 + struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) 421 + { 422 + u32 total_sge_cnt; 423 + u32 wqe_sge_cnt; 424 + 425 + hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; 426 + 427 + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { 428 + hr_qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; 429 + return; 430 + } 431 + 432 + hr_qp->sq.max_gs = max(1U, cap->max_send_sge); 433 + 434 + wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); 435 + 436 + /* If the number of extended sge is not zero, they MUST use the 437 + * space of HNS_HW_PAGE_SIZE at least. 438 + */ 439 + if (wqe_sge_cnt) { 440 + total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt); 441 + hr_qp->sge.sge_cnt = max(total_sge_cnt, 442 + (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); 443 + } 498 444 } 499 445 500 446 static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, ··· 513 447 /* Sanity check SQ size before proceeding */ 514 448 if (ucmd->log_sq_stride > max_sq_stride || 515 449 ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { 516 - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ stride size\n"); 450 + ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n"); 517 451 return -EINVAL; 518 452 } 519 453 520 454 if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { 521 - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ SGE size %d\n", 455 + ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n", 522 456 cap->max_send_sge); 523 457 return -EINVAL; 524 458 } ··· 545 479 return ret; 546 480 } 547 481 548 - ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); 549 - if (ret) 550 - return ret; 482 + set_ext_sge_param(hr_dev, cnt, hr_qp, cap); 551 483 552 484 hr_qp->sq.wqe_shift = ucmd->log_sq_stride; 553 485 hr_qp->sq.wqe_cnt = cnt; ··· 610 546 { 611 547 struct ib_device *ibdev = &hr_dev->ib_dev; 612 548 u32 cnt; 613 - int ret; 614 549 615 550 if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || 616 551 cap->max_send_sge > hr_dev->caps.max_sq_sg) { ··· 621 558 622 559 cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); 623 560 if (cnt > hr_dev->caps.max_wqes) { 624 - ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n", 561 + ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n", 625 562 cnt); 626 563 return -EINVAL; 627 564 } ··· 629 566 hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); 630 567 hr_qp->sq.wqe_cnt = cnt; 631 568 632 - ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); 633 - if (ret) 634 - return ret; 569 + set_ext_sge_param(hr_dev, cnt, hr_qp, cap); 635 570 636 571 /* sync the parameters of kernel QP to user's configuration */ 637 572 cap->max_send_wr = cnt; ··· 786 725 struct ib_device *ibdev = &hr_dev->ib_dev; 787 726 int ret; 788 727 728 + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE) 729 + hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB; 730 + 789 731 if (udata) { 790 732 if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { 791 733 ret = hns_roce_db_map_user(uctx, udata, ucmd->sdb_addr, 792 734 &hr_qp->sdb); 793 735 if (ret) { 794 736 ibdev_err(ibdev, 795 - "Failed to map user SQ doorbell\n"); 737 + "failed to map user SQ doorbell, ret = %d.\n", 738 + ret); 796 739 goto err_out; 797 740 } 798 741 hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; ··· 808 743 &hr_qp->rdb); 809 744 if (ret) { 810 745 ibdev_err(ibdev, 811 - "Failed to map user RQ doorbell\n"); 746 + "failed to map user RQ doorbell, ret = %d.\n", 747 + ret); 812 748 goto err_sdb; 813 749 } 814 750 hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; ··· 826 760 ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0); 827 761 if (ret) { 828 762 ibdev_err(ibdev, 829 - "Failed to alloc kernel RQ doorbell\n"); 763 + "failed to alloc kernel RQ doorbell, ret = %d.\n", 764 + ret); 830 765 goto err_out; 831 766 } 832 767 *hr_qp->rdb.db_record = 0; ··· 870 803 871 804 sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL); 872 805 if (ZERO_OR_NULL_PTR(sq_wrid)) { 873 - ibdev_err(ibdev, "Failed to alloc SQ wrid\n"); 806 + ibdev_err(ibdev, "failed to alloc SQ wrid.\n"); 874 807 return -ENOMEM; 875 808 } 876 809 877 810 if (hr_qp->rq.wqe_cnt) { 878 811 rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL); 879 812 if (ZERO_OR_NULL_PTR(rq_wrid)) { 880 - ibdev_err(ibdev, "Failed to alloc RQ wrid\n"); 813 + ibdev_err(ibdev, "failed to alloc RQ wrid.\n"); 881 814 ret = -ENOMEM; 882 815 goto err_sq; 883 816 } ··· 927 860 } 928 861 929 862 if (udata) { 930 - if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) { 931 - ibdev_err(ibdev, "Failed to copy QP ucmd\n"); 932 - return -EFAULT; 863 + ret = ib_copy_from_udata(ucmd, udata, 864 + min(udata->inlen, sizeof(*ucmd))); 865 + if (ret) { 866 + ibdev_err(ibdev, 867 + "failed to copy QP ucmd, ret = %d\n", ret); 868 + return ret; 933 869 } 934 870 935 871 ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); 936 872 if (ret) 937 - ibdev_err(ibdev, "Failed to set user SQ size\n"); 873 + ibdev_err(ibdev, 874 + "failed to set user SQ size, ret = %d.\n", 875 + ret); 938 876 } else { 939 - if (init_attr->create_flags & 940 - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { 941 - ibdev_err(ibdev, "Failed to check multicast loopback\n"); 942 - return -EINVAL; 943 - } 944 - 945 - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { 946 - ibdev_err(ibdev, "Failed to check ipoib ud lso\n"); 947 - return -EINVAL; 948 - } 949 - 950 877 ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); 951 878 if (ret) 952 - ibdev_err(ibdev, "Failed to set kernel SQ size\n"); 879 + ibdev_err(ibdev, 880 + "failed to set kernel SQ size, ret = %d.\n", 881 + ret); 953 882 } 954 883 955 884 return ret; ··· 969 906 hr_qp->state = IB_QPS_RESET; 970 907 hr_qp->flush_flag = 0; 971 908 909 + if (init_attr->create_flags) 910 + return -EOPNOTSUPP; 911 + 972 912 ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); 973 913 if (ret) { 974 - ibdev_err(ibdev, "Failed to set QP param\n"); 914 + ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret); 975 915 return ret; 976 916 } 977 917 978 918 if (!udata) { 979 919 ret = alloc_kernel_wrid(hr_dev, hr_qp); 980 920 if (ret) { 981 - ibdev_err(ibdev, "Failed to alloc wrid\n"); 921 + ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n", 922 + ret); 982 923 return ret; 983 924 } 984 925 } 985 926 986 927 ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp); 987 928 if (ret) { 988 - ibdev_err(ibdev, "Failed to alloc QP doorbell\n"); 929 + ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n", 930 + ret); 989 931 goto err_wrid; 990 932 } 991 933 992 934 ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr); 993 935 if (ret) { 994 - ibdev_err(ibdev, "Failed to alloc QP buffer\n"); 936 + ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret); 995 937 goto err_db; 996 938 } 997 939 998 940 ret = alloc_qpn(hr_dev, hr_qp); 999 941 if (ret) { 1000 - ibdev_err(ibdev, "Failed to alloc QPN\n"); 942 + ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret); 1001 943 goto err_buf; 1002 944 } 1003 945 1004 946 ret = alloc_qpc(hr_dev, hr_qp); 1005 947 if (ret) { 1006 - ibdev_err(ibdev, "Failed to alloc QP context\n"); 948 + ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n", 949 + ret); 1007 950 goto err_qpn; 1008 951 } 1009 952 1010 953 ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr); 1011 954 if (ret) { 1012 - ibdev_err(ibdev, "Failed to store QP\n"); 955 + ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret); 1013 956 goto err_qpc; 1014 957 } 1015 958 ··· 1072 1003 kfree(hr_qp); 1073 1004 } 1074 1005 1006 + static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type, 1007 + bool is_user) 1008 + { 1009 + switch (type) { 1010 + case IB_QPT_UD: 1011 + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && 1012 + is_user) 1013 + goto out; 1014 + fallthrough; 1015 + case IB_QPT_RC: 1016 + case IB_QPT_GSI: 1017 + break; 1018 + default: 1019 + goto out; 1020 + } 1021 + 1022 + return 0; 1023 + 1024 + out: 1025 + ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type); 1026 + 1027 + return -EOPNOTSUPP; 1028 + } 1029 + 1075 1030 struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, 1076 1031 struct ib_qp_init_attr *init_attr, 1077 1032 struct ib_udata *udata) ··· 1105 1012 struct hns_roce_qp *hr_qp; 1106 1013 int ret; 1107 1014 1108 - switch (init_attr->qp_type) { 1109 - case IB_QPT_RC: 1110 - case IB_QPT_GSI: 1111 - break; 1112 - default: 1113 - ibdev_err(ibdev, "not support QP type %d\n", 1114 - init_attr->qp_type); 1115 - return ERR_PTR(-EOPNOTSUPP); 1116 - } 1015 + ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); 1016 + if (ret) 1017 + return ERR_PTR(ret); 1117 1018 1118 1019 hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); 1119 1020 if (!hr_qp) ··· 1122 1035 if (ret) { 1123 1036 ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n", 1124 1037 init_attr->qp_type, ret); 1125 - ibdev_err(ibdev, "Create GSI QP failed!\n"); 1038 + 1126 1039 kfree(hr_qp); 1127 1040 return ERR_PTR(ret); 1128 1041 } 1042 + 1129 1043 return &hr_qp->ibqp; 1130 1044 } 1131 1045 ··· 1179 1091 1180 1092 if ((attr_mask & IB_QP_PORT) && 1181 1093 (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) { 1182 - ibdev_err(&hr_dev->ib_dev, 1183 - "attr port_num invalid.attr->port_num=%d\n", 1184 - attr->port_num); 1094 + ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n", 1095 + attr->port_num); 1185 1096 return -EINVAL; 1186 1097 } 1187 1098 ··· 1188 1101 p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; 1189 1102 if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) { 1190 1103 ibdev_err(&hr_dev->ib_dev, 1191 - "attr pkey_index invalid.attr->pkey_index=%d\n", 1192 - attr->pkey_index); 1104 + "invalid attr, pkey_index = %u.\n", 1105 + attr->pkey_index); 1193 1106 return -EINVAL; 1194 1107 } 1195 1108 } ··· 1197 1110 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 1198 1111 attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) { 1199 1112 ibdev_err(&hr_dev->ib_dev, 1200 - "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n", 1201 - attr->max_rd_atomic); 1113 + "invalid attr, max_rd_atomic = %u.\n", 1114 + attr->max_rd_atomic); 1202 1115 return -EINVAL; 1203 1116 } 1204 1117 1205 1118 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 1206 1119 attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) { 1207 1120 ibdev_err(&hr_dev->ib_dev, 1208 - "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n", 1209 - attr->max_dest_rd_atomic); 1121 + "invalid attr, max_dest_rd_atomic = %u.\n", 1122 + attr->max_dest_rd_atomic); 1210 1123 return -EINVAL; 1211 1124 } 1212 1125 ··· 1331 1244 return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); 1332 1245 } 1333 1246 1334 - void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n) 1247 + void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n) 1335 1248 { 1336 1249 return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift)); 1337 1250 } 1338 1251 1339 - void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n) 1252 + void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n) 1340 1253 { 1341 1254 return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift)); 1342 1255 } 1343 1256 1344 - void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n) 1257 + void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n) 1345 1258 { 1346 1259 return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); 1347 1260 } 1348 1261 1349 - bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, 1262 + bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, 1350 1263 struct ib_cq *ib_cq) 1351 1264 { 1352 1265 struct hns_roce_cq *hr_cq; ··· 1367 1280 int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) 1368 1281 { 1369 1282 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; 1370 - int reserved_from_top = 0; 1371 - int reserved_from_bot; 1372 - int ret; 1283 + unsigned int reserved_from_bot; 1284 + unsigned int i; 1373 1285 1374 1286 mutex_init(&qp_table->scc_mutex); 1375 1287 xa_init(&hr_dev->qp_table_xa); 1376 1288 1377 1289 reserved_from_bot = hr_dev->caps.reserved_qps; 1378 1290 1379 - ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, 1380 - hr_dev->caps.num_qps - 1, reserved_from_bot, 1381 - reserved_from_top); 1382 - if (ret) { 1383 - dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n", 1384 - ret); 1385 - return ret; 1291 + for (i = 0; i < reserved_from_bot; i++) { 1292 + hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++; 1293 + hr_dev->qp_table.bank[get_qp_bankid(i)].min++; 1294 + } 1295 + 1296 + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { 1297 + ida_init(&hr_dev->qp_table.bank[i].ida); 1298 + hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps / 1299 + HNS_ROCE_QP_BANK_NUM - 1; 1300 + hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min; 1386 1301 } 1387 1302 1388 1303 return 0; ··· 1392 1303 1393 1304 void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) 1394 1305 { 1395 - hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap); 1306 + int i; 1307 + 1308 + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) 1309 + ida_destroy(&hr_dev->qp_table.bank[i].ida); 1396 1310 }
+31 -22
drivers/infiniband/hw/hns/hns_roce_srq.c
··· 4 4 */ 5 5 6 6 #include <rdma/ib_umem.h> 7 - #include <rdma/hns-abi.h> 8 7 #include "hns_roce_device.h" 9 8 #include "hns_roce_cmd.h" 10 9 #include "hns_roce_hem.h" ··· 92 93 ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, 93 94 ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); 94 95 if (ret < 1) { 95 - ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n"); 96 + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", 97 + ret); 96 98 return -ENOBUFS; 97 99 } 98 100 ··· 101 101 ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, 102 102 ARRAY_SIZE(mtts_idx), &dma_handle_idx); 103 103 if (ret < 1) { 104 - ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n"); 104 + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", 105 + ret); 105 106 return -ENOBUFS; 106 107 } 107 108 108 109 ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); 109 110 if (ret) { 110 - ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret); 111 + ibdev_err(ibdev, 112 + "failed to alloc SRQ number, ret = %d.\n", ret); 111 113 return -ENOMEM; 112 114 } 113 115 114 116 ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); 115 117 if (ret) { 116 - ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret); 118 + ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret); 117 119 goto err_out; 118 120 } 119 121 120 122 ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); 121 123 if (ret) { 122 - ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret); 124 + ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); 123 125 goto err_put; 124 126 } 125 127 126 128 mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); 127 129 if (IS_ERR_OR_NULL(mailbox)) { 128 130 ret = -ENOMEM; 129 - ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n"); 131 + ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); 130 132 goto err_xa; 131 133 } 132 134 ··· 139 137 ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); 140 138 hns_roce_free_cmd_mailbox(hr_dev, mailbox); 141 139 if (ret) { 142 - ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret); 140 + ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); 143 141 goto err_xa; 144 142 } 145 143 ··· 200 198 hr_dev->caps.srqwqe_ba_pg_sz + 201 199 HNS_HW_PAGE_SHIFT, udata, addr); 202 200 if (err) 203 - ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err); 201 + ibdev_err(ibdev, 202 + "failed to alloc SRQ buf mtr, ret = %d.\n", err); 204 203 205 204 return err; 206 205 } ··· 232 229 hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, 233 230 udata, addr); 234 231 if (err) { 235 - ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err); 232 + ibdev_err(ibdev, 233 + "failed to alloc SRQ idx mtr, ret = %d.\n", err); 236 234 return err; 237 235 } 238 236 239 237 if (!udata) { 240 238 idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); 241 239 if (!idx_que->bitmap) { 242 - ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n"); 240 + ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); 243 241 err = -ENOMEM; 244 242 goto err_idx_mtr; 245 243 } 246 - 247 244 } 248 245 249 246 return 0; ··· 291 288 int ret; 292 289 u32 cqn; 293 290 291 + if (init_attr->srq_type != IB_SRQT_BASIC && 292 + init_attr->srq_type != IB_SRQT_XRC) 293 + return -EOPNOTSUPP; 294 + 294 295 /* Check the actual SRQ wqe and SRQ sge num */ 295 296 if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || 296 297 init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) ··· 307 300 srq->max_gs = init_attr->attr.max_sge; 308 301 309 302 if (udata) { 310 - ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); 303 + ret = ib_copy_from_udata(&ucmd, udata, 304 + min(udata->inlen, sizeof(ucmd))); 311 305 if (ret) { 312 - ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", 306 + ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", 313 307 ret); 314 308 return ret; 315 309 } ··· 318 310 319 311 ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); 320 312 if (ret) { 321 - ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret); 313 + ibdev_err(ibdev, 314 + "failed to alloc SRQ buffer, ret = %d.\n", ret); 322 315 return ret; 323 316 } 324 317 325 318 ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); 326 319 if (ret) { 327 - ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret); 320 + ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); 328 321 goto err_buf_alloc; 329 322 } 330 323 331 324 if (!udata) { 332 325 ret = alloc_srq_wrid(hr_dev, srq); 333 326 if (ret) { 334 - ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n", 327 + ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", 335 328 ret); 336 329 goto err_idx_alloc; 337 330 } ··· 344 335 345 336 ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); 346 337 if (ret) { 347 - ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret); 338 + ibdev_err(ibdev, 339 + "failed to alloc SRQ context, ret = %d.\n", ret); 348 340 goto err_wrid_alloc; 349 341 } 350 342 ··· 353 343 resp.srqn = srq->srqn; 354 344 355 345 if (udata) { 356 - if (ib_copy_to_udata(udata, &resp, 357 - min(udata->outlen, sizeof(resp)))) { 358 - ret = -EFAULT; 346 + ret = ib_copy_to_udata(udata, &resp, 347 + min(udata->outlen, sizeof(resp))); 348 + if (ret) 359 349 goto err_srqc_alloc; 360 - } 361 350 } 362 351 363 352 return 0;
-1
drivers/infiniband/hw/i40iw/i40iw.h
··· 274 274 u8 max_sge; 275 275 u8 iw_status; 276 276 u8 send_term_ok; 277 - bool push_mode; /* Initialized from parameter passed to driver */ 278 277 279 278 /* x710 specific */ 280 279 struct mutex pbl_mutex;
+3 -3
drivers/infiniband/hw/i40iw/i40iw_cm.c
··· 2426 2426 } 2427 2427 break; 2428 2428 case I40IW_CM_STATE_MPAREQ_RCVD: 2429 - atomic_add_return(1, &cm_node->passive_state); 2429 + atomic_inc(&cm_node->passive_state); 2430 2430 break; 2431 2431 case I40IW_CM_STATE_ESTABLISHED: 2432 2432 case I40IW_CM_STATE_SYN_RCVD: ··· 3020 3020 i40iw_cleanup_retrans_entry(cm_node); 3021 3021 3022 3022 if (!loopback) { 3023 - passive_state = atomic_add_return(1, &cm_node->passive_state); 3023 + passive_state = atomic_inc_return(&cm_node->passive_state); 3024 3024 if (passive_state == I40IW_SEND_RESET_EVENT) { 3025 3025 cm_node->state = I40IW_CM_STATE_CLOSED; 3026 3026 i40iw_rem_ref_cm_node(cm_node); ··· 3678 3678 return -EINVAL; 3679 3679 } 3680 3680 3681 - passive_state = atomic_add_return(1, &cm_node->passive_state); 3681 + passive_state = atomic_inc_return(&cm_node->passive_state); 3682 3682 if (passive_state == I40IW_SEND_RESET_EVENT) { 3683 3683 i40iw_rem_ref_cm_node(cm_node); 3684 3684 return -ECONNRESET;
+11 -61
drivers/infiniband/hw/i40iw/i40iw_ctrl.c
··· 820 820 } 821 821 822 822 /** 823 - * i40iw_sc_manage_push_page - Handle push page 824 - * @cqp: struct for cqp hw 825 - * @info: push page info 826 - * @scratch: u64 saved to be used during cqp completion 827 - * @post_sq: flag for cqp db to ring 828 - */ 829 - static enum i40iw_status_code i40iw_sc_manage_push_page( 830 - struct i40iw_sc_cqp *cqp, 831 - struct i40iw_cqp_manage_push_page_info *info, 832 - u64 scratch, 833 - bool post_sq) 834 - { 835 - u64 *wqe; 836 - u64 header; 837 - 838 - if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT) 839 - return I40IW_ERR_INVALID_PUSH_PAGE_INDEX; 840 - 841 - wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); 842 - if (!wqe) 843 - return I40IW_ERR_RING_FULL; 844 - 845 - set_64bit_val(wqe, 16, info->qs_handle); 846 - 847 - header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) | 848 - LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) | 849 - LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) | 850 - LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE); 851 - 852 - i40iw_insert_wqe_hdr(wqe, header); 853 - 854 - i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE", 855 - wqe, I40IW_CQP_WQE_SIZE * 8); 856 - 857 - if (post_sq) 858 - i40iw_sc_cqp_post_sq(cqp); 859 - return 0; 860 - } 861 - 862 - /** 863 823 * i40iw_sc_manage_hmc_pm_func_table - manage of function table 864 824 * @cqp: struct for cqp hw 865 825 * @scratch: u64 saved to be used during cqp completion ··· 2819 2859 LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) | 2820 2860 LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) | 2821 2861 LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) | 2822 - LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) | 2823 - LS_64(info->push_idx, I40IWQPC_PPIDX) | 2824 - LS_64(info->push_mode_en, I40IWQPC_PMENA); 2862 + LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN); 2825 2863 2826 2864 set_64bit_val(qp_ctx, 8, qp->sq_pa); 2827 2865 set_64bit_val(qp_ctx, 16, qp->rq_pa); ··· 4249 4291 pcmdinfo->in.u.add_arp_cache_entry.scratch, 4250 4292 pcmdinfo->post_sq); 4251 4293 break; 4252 - case OP_MANAGE_PUSH_PAGE: 4253 - status = i40iw_sc_manage_push_page( 4254 - pcmdinfo->in.u.manage_push_page.cqp, 4255 - &pcmdinfo->in.u.manage_push_page.info, 4256 - pcmdinfo->in.u.manage_push_page.scratch, 4257 - pcmdinfo->post_sq); 4258 - break; 4259 4294 case OP_UPDATE_PE_SDS: 4260 4295 /* case I40IW_CQP_OP_UPDATE_PE_SDS */ 4261 4296 status = i40iw_update_pe_sds( ··· 5049 5098 i40iw_hw_stats_stop_timer(vsi); 5050 5099 } 5051 5100 5052 - static struct i40iw_cqp_ops iw_cqp_ops = { 5101 + static const struct i40iw_cqp_ops iw_cqp_ops = { 5053 5102 .cqp_init = i40iw_sc_cqp_init, 5054 5103 .cqp_create = i40iw_sc_cqp_create, 5055 5104 .cqp_post_sq = i40iw_sc_cqp_post_sq, ··· 5058 5107 .poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done 5059 5108 }; 5060 5109 5061 - static struct i40iw_ccq_ops iw_ccq_ops = { 5110 + static const struct i40iw_ccq_ops iw_ccq_ops = { 5062 5111 .ccq_init = i40iw_sc_ccq_init, 5063 5112 .ccq_create = i40iw_sc_ccq_create, 5064 5113 .ccq_destroy = i40iw_sc_ccq_destroy, ··· 5067 5116 .ccq_arm = i40iw_sc_ccq_arm 5068 5117 }; 5069 5118 5070 - static struct i40iw_ceq_ops iw_ceq_ops = { 5119 + static const struct i40iw_ceq_ops iw_ceq_ops = { 5071 5120 .ceq_init = i40iw_sc_ceq_init, 5072 5121 .ceq_create = i40iw_sc_ceq_create, 5073 5122 .cceq_create_done = i40iw_sc_cceq_create_done, ··· 5077 5126 .process_ceq = i40iw_sc_process_ceq 5078 5127 }; 5079 5128 5080 - static struct i40iw_aeq_ops iw_aeq_ops = { 5129 + static const struct i40iw_aeq_ops iw_aeq_ops = { 5081 5130 .aeq_init = i40iw_sc_aeq_init, 5082 5131 .aeq_create = i40iw_sc_aeq_create, 5083 5132 .aeq_destroy = i40iw_sc_aeq_destroy, ··· 5088 5137 }; 5089 5138 5090 5139 /* iwarp pd ops */ 5091 - static struct i40iw_pd_ops iw_pd_ops = { 5140 + static const struct i40iw_pd_ops iw_pd_ops = { 5092 5141 .pd_init = i40iw_sc_pd_init, 5093 5142 }; 5094 5143 5095 - static struct i40iw_priv_qp_ops iw_priv_qp_ops = { 5144 + static const struct i40iw_priv_qp_ops iw_priv_qp_ops = { 5096 5145 .qp_init = i40iw_sc_qp_init, 5097 5146 .qp_create = i40iw_sc_qp_create, 5098 5147 .qp_modify = i40iw_sc_qp_modify, ··· 5107 5156 .iw_mr_fast_register = i40iw_sc_mr_fast_register 5108 5157 }; 5109 5158 5110 - static struct i40iw_priv_cq_ops iw_priv_cq_ops = { 5159 + static const struct i40iw_priv_cq_ops iw_priv_cq_ops = { 5111 5160 .cq_init = i40iw_sc_cq_init, 5112 5161 .cq_create = i40iw_sc_cq_create, 5113 5162 .cq_destroy = i40iw_sc_cq_destroy, 5114 5163 .cq_modify = i40iw_sc_cq_modify, 5115 5164 }; 5116 5165 5117 - static struct i40iw_mr_ops iw_mr_ops = { 5166 + static const struct i40iw_mr_ops iw_mr_ops = { 5118 5167 .alloc_stag = i40iw_sc_alloc_stag, 5119 5168 .mr_reg_non_shared = i40iw_sc_mr_reg_non_shared, 5120 5169 .mr_reg_shared = i40iw_sc_mr_reg_shared, ··· 5123 5172 .mw_alloc = i40iw_sc_mw_alloc 5124 5173 }; 5125 5174 5126 - static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { 5127 - .manage_push_page = i40iw_sc_manage_push_page, 5175 + static const struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { 5128 5176 .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table, 5129 5177 .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile, 5130 5178 .commit_fpm_values = i40iw_sc_commit_fpm_values, ··· 5145 5195 .update_resume_qp = i40iw_sc_resume_qp 5146 5196 }; 5147 5197 5148 - static struct i40iw_hmc_ops iw_hmc_ops = { 5198 + static const struct i40iw_hmc_ops iw_hmc_ops = { 5149 5199 .init_iw_hmc = i40iw_sc_init_iw_hmc, 5150 5200 .parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf, 5151 5201 .configure_iw_fpm = i40iw_sc_configure_iw_fpm,
+12 -23
drivers/infiniband/hw/i40iw/i40iw_d.h
··· 40 40 #define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024) 41 41 #define I40IW_VF_DB_ADDR_OFFSET (64 * 1024) 42 42 43 - #define I40IW_PUSH_OFFSET (4 * 1024 * 1024) 44 - #define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16 45 - #define I40IW_VF_PUSH_OFFSET ((8 + 64) * 1024) 46 - #define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2 47 - 48 43 #define I40IW_PE_DB_SIZE_4M 1 49 44 #define I40IW_PE_DB_SIZE_8M 2 50 45 ··· 397 402 #define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE 0x0e 398 403 #define I40IW_CQP_OP_MANAGE_ARP 0x0f 399 404 #define I40IW_CQP_OP_MANAGE_VF_PBLE_BP 0x10 400 - #define I40IW_CQP_OP_MANAGE_PUSH_PAGES 0x11 401 405 #define I40IW_CQP_OP_QUERY_RDMA_FEATURES 0x12 402 406 #define I40IW_CQP_OP_UPLOAD_CONTEXT 0x13 403 407 #define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14 ··· 837 843 #define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \ 838 844 (0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT) 839 845 840 - /* Manage Push Page - MPP */ 841 846 #define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff 842 847 843 848 #define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0 ··· 1345 1352 #define I40IWQPSQ_ADDFRAGCNT_SHIFT 38 1346 1353 #define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT) 1347 1354 1348 - #define I40IWQPSQ_PUSHWQE_SHIFT 56 1349 - #define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT) 1350 - 1351 1355 #define I40IWQPSQ_STREAMMODE_SHIFT 58 1352 1356 #define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT) 1353 1357 ··· 1730 1740 #define OP_MW_ALLOC 20 1731 1741 #define OP_QP_FLUSH_WQES 21 1732 1742 #define OP_ADD_ARP_CACHE_ENTRY 22 1733 - #define OP_MANAGE_PUSH_PAGE 23 1734 - #define OP_UPDATE_PE_SDS 24 1735 - #define OP_MANAGE_HMC_PM_FUNC_TABLE 25 1736 - #define OP_SUSPEND 26 1737 - #define OP_RESUME 27 1738 - #define OP_MANAGE_VF_PBLE_BP 28 1739 - #define OP_QUERY_FPM_VALUES 29 1740 - #define OP_COMMIT_FPM_VALUES 30 1741 - #define OP_REQUESTED_COMMANDS 31 1742 - #define OP_COMPLETED_COMMANDS 32 1743 - #define OP_GEN_AE 33 1744 - #define OP_QUERY_RDMA_FEATURES 34 1745 - #define OP_SIZE_CQP_STAT_ARRAY 35 1743 + #define OP_UPDATE_PE_SDS 23 1744 + #define OP_MANAGE_HMC_PM_FUNC_TABLE 24 1745 + #define OP_SUSPEND 25 1746 + #define OP_RESUME 26 1747 + #define OP_MANAGE_VF_PBLE_BP 27 1748 + #define OP_QUERY_FPM_VALUES 28 1749 + #define OP_COMMIT_FPM_VALUES 29 1750 + #define OP_REQUESTED_COMMANDS 30 1751 + #define OP_COMPLETED_COMMANDS 31 1752 + #define OP_GEN_AE 32 1753 + #define OP_QUERY_RDMA_FEATURES 33 1754 + #define OP_SIZE_CQP_STAT_ARRAY 34 1746 1755 1747 1756 #endif
-1
drivers/infiniband/hw/i40iw/i40iw_status.h
··· 61 61 I40IW_ERR_QUEUE_EMPTY = -22, 62 62 I40IW_ERR_INVALID_ALIGNMENT = -23, 63 63 I40IW_ERR_FLUSHED_QUEUE = -24, 64 - I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25, 65 64 I40IW_ERR_INVALID_INLINE_DATA_SIZE = -26, 66 65 I40IW_ERR_TIMEOUT = -27, 67 66 I40IW_ERR_OPCODE_MISMATCH = -28,
+10 -28
drivers/infiniband/hw/i40iw/i40iw_type.h
··· 387 387 u8 *q2_buf; 388 388 u64 qp_compl_ctx; 389 389 u16 qs_handle; 390 - u16 push_idx; 391 390 u8 sq_tph_val; 392 391 u8 rq_tph_val; 393 392 u8 qp_state; ··· 492 493 struct i40iw_sc_aeq *aeq; 493 494 struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT]; 494 495 struct i40iw_sc_cq *ccq; 495 - struct i40iw_cqp_ops *cqp_ops; 496 - struct i40iw_ccq_ops *ccq_ops; 497 - struct i40iw_ceq_ops *ceq_ops; 498 - struct i40iw_aeq_ops *aeq_ops; 499 - struct i40iw_pd_ops *iw_pd_ops; 500 - struct i40iw_priv_qp_ops *iw_priv_qp_ops; 501 - struct i40iw_priv_cq_ops *iw_priv_cq_ops; 502 - struct i40iw_mr_ops *mr_ops; 503 - struct i40iw_cqp_misc_ops *cqp_misc_ops; 504 - struct i40iw_hmc_ops *hmc_ops; 496 + const struct i40iw_cqp_ops *cqp_ops; 497 + const struct i40iw_ccq_ops *ccq_ops; 498 + const struct i40iw_ceq_ops *ceq_ops; 499 + const struct i40iw_aeq_ops *aeq_ops; 500 + const struct i40iw_pd_ops *iw_pd_ops; 501 + const struct i40iw_priv_qp_ops *iw_priv_qp_ops; 502 + const struct i40iw_priv_cq_ops *iw_priv_cq_ops; 503 + const struct i40iw_mr_ops *mr_ops; 504 + const struct i40iw_cqp_misc_ops *cqp_misc_ops; 505 + const struct i40iw_hmc_ops *hmc_ops; 505 506 struct i40iw_vchnl_if vchnl_if; 506 507 const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; 507 508 ··· 748 749 struct i40iwarp_offload_info *iwarp_info; 749 750 u32 send_cq_num; 750 751 u32 rcv_cq_num; 751 - u16 push_idx; 752 - bool push_mode_en; 753 752 bool tcp_info_valid; 754 753 bool iwarp_info_valid; 755 754 bool err_rq_idx_valid; ··· 934 937 u8 entry_idx; 935 938 }; 936 939 937 - struct i40iw_cqp_manage_push_page_info { 938 - u32 push_idx; 939 - u16 qs_handle; 940 - u8 free_page; 941 - }; 942 - 943 940 struct i40iw_qp_flush_info { 944 941 u16 sq_minor_code; 945 942 u16 sq_major_code; ··· 1105 1114 }; 1106 1115 1107 1116 struct i40iw_cqp_misc_ops { 1108 - enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *, 1109 - struct i40iw_cqp_manage_push_page_info *, 1110 - u64, bool); 1111 1117 enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *, 1112 1118 u64, u8, bool, bool); 1113 1119 enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *, ··· 1240 1252 struct i40iw_manage_vf_pble_info info; 1241 1253 u64 scratch; 1242 1254 } manage_vf_pble_bp; 1243 - 1244 - struct { 1245 - struct i40iw_sc_cqp *cqp; 1246 - struct i40iw_cqp_manage_push_page_info info; 1247 - u64 scratch; 1248 - } manage_push_page; 1249 1255 1250 1256 struct { 1251 1257 struct i40iw_sc_dev *dev;
+4 -37
drivers/infiniband/hw/i40iw/i40iw_uk.c
··· 115 115 } 116 116 117 117 /** 118 - * i40iw_qp_ring_push_db - ring qp doorbell 119 - * @qp: hw qp ptr 120 - * @wqe_idx: wqe index 121 - */ 122 - static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx) 123 - { 124 - set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id); 125 - qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); 126 - } 127 - 128 - /** 129 118 * i40iw_qp_get_next_send_wqe - return next wqe ptr 130 119 * @qp: hw qp ptr 131 120 * @wqe_idx: return wqe index ··· 415 426 u64 *wqe; 416 427 u8 *dest, *src; 417 428 struct i40iw_inline_rdma_write *op_info; 418 - u64 *push; 419 429 u64 header = 0; 420 430 u32 wqe_idx; 421 431 enum i40iw_status_code ret_code; ··· 441 453 LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) | 442 454 LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | 443 455 LS_64(1, I40IWQPSQ_INLINEDATAFLAG) | 444 - LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) | 445 456 LS_64(read_fence, I40IWQPSQ_READFENCE) | 446 457 LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | 447 458 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | ··· 462 475 463 476 set_64bit_val(wqe, 24, header); 464 477 465 - if (qp->push_db) { 466 - push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20); 467 - memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32); 468 - i40iw_qp_ring_push_db(qp, wqe_idx); 469 - } else { 470 - if (post_sq) 471 - i40iw_qp_post_wr(qp); 472 - } 478 + if (post_sq) 479 + i40iw_qp_post_wr(qp); 473 480 474 481 return 0; 475 482 } ··· 488 507 enum i40iw_status_code ret_code; 489 508 bool read_fence = false; 490 509 u8 wqe_size; 491 - u64 *push; 492 510 493 511 op_info = &info->op.inline_send; 494 512 if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE) ··· 506 526 LS_64(info->op_type, I40IWQPSQ_OPCODE) | 507 527 LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | 508 528 LS_64(1, I40IWQPSQ_INLINEDATAFLAG) | 509 - LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) | 510 529 LS_64(read_fence, I40IWQPSQ_READFENCE) | 511 530 LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | 512 531 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | ··· 527 548 528 549 set_64bit_val(wqe, 24, header); 529 550 530 - if (qp->push_db) { 531 - push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20); 532 - memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32); 533 - i40iw_qp_ring_push_db(qp, wqe_idx); 534 - } else { 535 - if (post_sq) 536 - i40iw_qp_post_wr(qp); 537 - } 551 + if (post_sq) 552 + i40iw_qp_post_wr(qp); 538 553 539 554 return 0; 540 555 } ··· 745 772 746 773 q_type = (u8)RS_64(qword3, I40IW_CQ_SQ); 747 774 info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR); 748 - info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP); 749 775 if (info->error) { 750 776 info->comp_status = I40IW_COMPL_STATUS_FLUSHED; 751 777 info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR); ··· 923 951 924 952 static const struct i40iw_qp_uk_ops iw_qp_uk_ops = { 925 953 .iw_qp_post_wr = i40iw_qp_post_wr, 926 - .iw_qp_ring_push_db = i40iw_qp_ring_push_db, 927 954 .iw_rdma_write = i40iw_rdma_write, 928 955 .iw_rdma_read = i40iw_rdma_read, 929 956 .iw_send = i40iw_send, ··· 980 1009 981 1010 qp->wqe_alloc_reg = info->wqe_alloc_reg; 982 1011 qp->qp_id = info->qp_id; 983 - 984 1012 qp->sq_size = info->sq_size; 985 - qp->push_db = info->push_db; 986 - qp->push_wqe = info->push_wqe; 987 - 988 1013 qp->max_sq_frag_cnt = info->max_sq_frag_cnt; 989 1014 sq_ring_size = qp->sq_size << sqshift; 990 1015
-8
drivers/infiniband/hw/i40iw/i40iw_user.h
··· 64 64 I40IW_MAX_SGE_RD = 1, 65 65 I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647, 66 66 I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647, 67 - I40IW_MAX_PUSH_PAGE_COUNT = 4096, 68 67 I40IW_MAX_PE_ENABLED_VF_COUNT = 32, 69 68 I40IW_MAX_VF_FPM_ID = 47, 70 69 I40IW_MAX_VF_PER_PF = 127, 71 70 I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, 72 71 I40IW_MAX_INLINE_DATA_SIZE = 48, 73 - I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48, 74 72 I40IW_MAX_IRD_SIZE = 64, 75 73 I40IW_MAX_ORD_SIZE = 127, 76 74 I40IW_MAX_WQ_ENTRIES = 2048, ··· 270 272 u16 minor_err; 271 273 u8 op_type; 272 274 bool stag_invalid_set; 273 - bool push_dropped; 274 275 bool error; 275 276 bool is_srq; 276 277 bool solicited_event; ··· 277 280 278 281 struct i40iw_qp_uk_ops { 279 282 void (*iw_qp_post_wr)(struct i40iw_qp_uk *); 280 - void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32); 281 283 enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *, 282 284 struct i40iw_post_sq_info *, bool); 283 285 enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *, ··· 336 340 struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array; 337 341 u64 *rq_wrid_array; 338 342 u64 *shadow_area; 339 - u32 *push_db; 340 - u64 *push_wqe; 341 343 struct i40iw_ring sq_ring; 342 344 struct i40iw_ring rq_ring; 343 345 struct i40iw_ring initial_ring; ··· 375 381 u64 *shadow_area; 376 382 struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array; 377 383 u64 *rq_wrid_array; 378 - u32 *push_db; 379 - u64 *push_wqe; 380 384 u32 qp_id; 381 385 u32 sq_size; 382 386 u32 rq_size;
+11 -110
drivers/infiniband/hw/i40iw/i40iw_verbs.c
··· 180 180 } 181 181 182 182 /** 183 - * i40iw_alloc_push_page - allocate a push page for qp 184 - * @iwdev: iwarp device 185 - * @qp: hardware control qp 186 - */ 187 - static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp) 188 - { 189 - struct i40iw_cqp_request *cqp_request; 190 - struct cqp_commands_info *cqp_info; 191 - enum i40iw_status_code status; 192 - 193 - if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX) 194 - return; 195 - 196 - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true); 197 - if (!cqp_request) 198 - return; 199 - 200 - atomic_inc(&cqp_request->refcount); 201 - 202 - cqp_info = &cqp_request->info; 203 - cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; 204 - cqp_info->post_sq = 1; 205 - 206 - cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; 207 - cqp_info->in.u.manage_push_page.info.free_page = 0; 208 - cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; 209 - cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; 210 - 211 - status = i40iw_handle_cqp_op(iwdev, cqp_request); 212 - if (!status) 213 - qp->push_idx = cqp_request->compl_info.op_ret_val; 214 - else 215 - i40iw_pr_err("CQP-OP Push page fail"); 216 - i40iw_put_cqp_request(&iwdev->cqp, cqp_request); 217 - } 218 - 219 - /** 220 - * i40iw_dealloc_push_page - free a push page for qp 221 - * @iwdev: iwarp device 222 - * @qp: hardware control qp 223 - */ 224 - static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp) 225 - { 226 - struct i40iw_cqp_request *cqp_request; 227 - struct cqp_commands_info *cqp_info; 228 - enum i40iw_status_code status; 229 - 230 - if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) 231 - return; 232 - 233 - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); 234 - if (!cqp_request) 235 - return; 236 - 237 - cqp_info = &cqp_request->info; 238 - cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; 239 - cqp_info->post_sq = 1; 240 - 241 - cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx; 242 - cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; 243 - cqp_info->in.u.manage_push_page.info.free_page = 1; 244 - cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; 245 - cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; 246 - 247 - status = i40iw_handle_cqp_op(iwdev, cqp_request); 248 - if (!status) 249 - qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; 250 - else 251 - i40iw_pr_err("CQP-OP Push page fail"); 252 - } 253 - 254 - /** 255 183 * i40iw_alloc_pd - allocate protection domain 256 184 * @pd: PD pointer 257 185 * @udata: user data ··· 276 348 u32 qp_num = iwqp->ibqp.qp_num; 277 349 278 350 i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); 279 - i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp); 280 351 if (qp_num) 281 352 i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num); 282 353 if (iwpbl->pbl_allocated) ··· 460 533 return ERR_PTR(-ENODEV); 461 534 462 535 if (init_attr->create_flags) 463 - return ERR_PTR(-EINVAL); 536 + return ERR_PTR(-EOPNOTSUPP); 464 537 if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) 465 538 init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; 466 539 ··· 488 561 489 562 qp = &iwqp->sc_qp; 490 563 qp->back_qp = (void *)iwqp; 491 - qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; 492 - 493 564 iwqp->iwdev = iwdev; 494 565 iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info; 495 566 ··· 531 606 err_code = -EOPNOTSUPP; 532 607 goto error; 533 608 } 534 - if (iwdev->push_mode) 535 - i40iw_alloc_push_page(iwdev, qp); 536 609 if (udata) { 537 610 err_code = ib_copy_from_udata(&req, udata, sizeof(req)); 538 611 if (err_code) { ··· 589 666 ctx_info->iwarp_info_valid = true; 590 667 ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; 591 668 ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; 592 - if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) { 593 - ctx_info->push_mode_en = false; 594 - } else { 595 - ctx_info->push_mode_en = true; 596 - ctx_info->push_idx = qp->push_idx; 597 - } 598 - 599 669 ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, 600 670 (u64 *)iwqp->host_ctx.va, 601 671 ctx_info); ··· 628 712 uresp.actual_sq_size = sq_size; 629 713 uresp.actual_rq_size = rq_size; 630 714 uresp.qp_id = qp_num; 631 - uresp.push_idx = qp->push_idx; 715 + uresp.push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; 632 716 err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 633 717 if (err_code) { 634 718 i40iw_pr_err("copy_to_udata failed\n"); ··· 747 831 u8 dont_wait = 0; 748 832 u32 err; 749 833 unsigned long flags; 834 + 835 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 836 + return -EOPNOTSUPP; 750 837 751 838 memset(&info, 0, sizeof(info)); 752 839 ctx_info = &iwqp->ctx_info; ··· 999 1080 unsigned long flags; 1000 1081 int err_code; 1001 1082 int entries = attr->cqe; 1083 + 1084 + if (attr->flags) 1085 + return -EOPNOTSUPP; 1002 1086 1003 1087 if (iwdev->closing) 1004 1088 return -ENODEV; ··· 1955 2033 rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev); 1956 2034 u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev; 1957 2035 1958 - return sprintf(buf, "%x\n", hw_rev); 2036 + return sysfs_emit(buf, "%x\n", hw_rev); 1959 2037 } 1960 2038 static DEVICE_ATTR_RO(hw_rev); 1961 2039 ··· 1965 2043 static ssize_t hca_type_show(struct device *dev, 1966 2044 struct device_attribute *attr, char *buf) 1967 2045 { 1968 - return sprintf(buf, "I40IW\n"); 2046 + return sysfs_emit(buf, "I40IW\n"); 1969 2047 } 1970 2048 static DEVICE_ATTR_RO(hca_type); 1971 2049 ··· 1975 2053 static ssize_t board_id_show(struct device *dev, 1976 2054 struct device_attribute *attr, char *buf) 1977 2055 { 1978 - return sprintf(buf, "%.*s\n", 32, "I40IW Board ID"); 2056 + return sysfs_emit(buf, "%.*s\n", 32, "I40IW Board ID"); 1979 2057 } 1980 2058 static DEVICE_ATTR_RO(board_id); 1981 2059 ··· 2583 2661 iwibdev->ibdev.node_type = RDMA_NODE_RNIC; 2584 2662 ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr); 2585 2663 2586 - iwibdev->ibdev.uverbs_cmd_mask = 2587 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 2588 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 2589 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 2590 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2591 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2592 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 2593 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2594 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2595 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2596 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 2597 - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 2598 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 2599 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 2600 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 2601 - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 2602 - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 2603 - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | 2604 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 2605 - (1ull << IB_USER_VERBS_CMD_POST_RECV) | 2606 - (1ull << IB_USER_VERBS_CMD_POST_SEND); 2607 2664 iwibdev->ibdev.phys_port_cnt = 1; 2608 2665 iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; 2609 2666 iwibdev->ibdev.dev.parent = &pcidev->dev;
+1
drivers/infiniband/hw/mlx4/mad.c
··· 1523 1523 return; 1524 1524 } else 1525 1525 *slave_id = slave; 1526 + break; 1526 1527 default: 1527 1528 /* nothing */; 1528 1529 }
+7 -57
drivers/infiniband/hw/mlx4/main.c
··· 2024 2024 { 2025 2025 struct mlx4_ib_dev *dev = 2026 2026 rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); 2027 - return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); 2027 + 2028 + return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device); 2028 2029 } 2029 2030 static DEVICE_ATTR_RO(hca_type); 2030 2031 ··· 2034 2033 { 2035 2034 struct mlx4_ib_dev *dev = 2036 2035 rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); 2037 - return sprintf(buf, "%x\n", dev->dev->rev_id); 2036 + 2037 + return sysfs_emit(buf, "%x\n", dev->dev->rev_id); 2038 2038 } 2039 2039 static DEVICE_ATTR_RO(hw_rev); 2040 2040 ··· 2045 2043 struct mlx4_ib_dev *dev = 2046 2044 rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); 2047 2045 2048 - return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, 2049 - dev->dev->board_id); 2046 + return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); 2050 2047 } 2051 2048 static DEVICE_ATTR_RO(board_id); 2052 2049 ··· 2265 2264 u64 release_mac = MLX4_IB_INVALID_MAC; 2266 2265 struct mlx4_ib_qp *qp; 2267 2266 2268 - read_lock(&dev_base_lock); 2269 2267 new_smac = mlx4_mac_to_u64(dev->dev_addr); 2270 - read_unlock(&dev_base_lock); 2271 - 2272 2268 atomic64_set(&ibdev->iboe.mac[port - 1], new_smac); 2273 2269 2274 2270 /* no need for update QP1 and mac registration in non-SRIOV */ ··· 2655 2657 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; 2656 2658 ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; 2657 2659 2658 - ibdev->ib_dev.uverbs_cmd_mask = 2659 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 2660 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 2661 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 2662 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2663 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2664 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 2665 - (1ull << IB_USER_VERBS_CMD_REREG_MR) | 2666 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2667 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2668 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2669 - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 2670 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 2671 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 2672 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 2673 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 2674 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 2675 - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 2676 - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 2677 - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 2678 - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 2679 - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 2680 - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 2681 - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | 2682 - (1ull << IB_USER_VERBS_CMD_OPEN_QP); 2683 - 2684 2660 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); 2685 - ibdev->ib_dev.uverbs_ex_cmd_mask |= 2686 - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | 2687 - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | 2688 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | 2689 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); 2690 2661 2691 2662 if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && 2692 2663 ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == 2693 2664 IB_LINK_LAYER_ETHERNET) || 2694 2665 (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == 2695 - IB_LINK_LAYER_ETHERNET))) { 2696 - ibdev->ib_dev.uverbs_ex_cmd_mask |= 2697 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | 2698 - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | 2699 - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | 2700 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | 2701 - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); 2666 + IB_LINK_LAYER_ETHERNET))) 2702 2667 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); 2703 - } 2704 2668 2705 2669 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || 2706 - dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { 2707 - ibdev->ib_dev.uverbs_cmd_mask |= 2708 - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | 2709 - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); 2670 + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) 2710 2671 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops); 2711 - } 2712 2672 2713 2673 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { 2714 - ibdev->ib_dev.uverbs_cmd_mask |= 2715 - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | 2716 - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); 2717 2674 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops); 2718 2675 } 2719 2676 2720 2677 if (check_flow_steering_support(dev)) { 2721 2678 ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; 2722 - ibdev->ib_dev.uverbs_ex_cmd_mask |= 2723 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | 2724 - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); 2725 2679 ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); 2726 2680 } 2727 2681
+48 -38
drivers/infiniband/hw/mlx4/mcg.c
··· 988 988 } 989 989 990 990 static ssize_t sysfs_show_group(struct device *dev, 991 - struct device_attribute *attr, char *buf) 991 + struct device_attribute *attr, char *buf) 992 992 { 993 993 struct mcast_group *group = 994 994 container_of(attr, struct mcast_group, dentry); 995 995 struct mcast_req *req = NULL; 996 - char pending_str[40]; 997 996 char state_str[40]; 998 - ssize_t len = 0; 999 - int f; 997 + char pending_str[40]; 998 + int len; 999 + int i; 1000 + u32 hoplimit; 1000 1001 1001 1002 if (group->state == MCAST_IDLE) 1002 - sprintf(state_str, "%s", get_state_string(group->state)); 1003 + scnprintf(state_str, sizeof(state_str), "%s", 1004 + get_state_string(group->state)); 1003 1005 else 1004 - sprintf(state_str, "%s(TID=0x%llx)", 1005 - get_state_string(group->state), 1006 - be64_to_cpu(group->last_req_tid)); 1007 - if (list_empty(&group->pending_list)) { 1008 - sprintf(pending_str, "No"); 1009 - } else { 1010 - req = list_first_entry(&group->pending_list, struct mcast_req, group_list); 1011 - sprintf(pending_str, "Yes(TID=0x%llx)", 1012 - be64_to_cpu(req->sa_mad.mad_hdr.tid)); 1013 - } 1014 - len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ", 1015 - group->rec.scope_join_state & 0xf, 1016 - group->members[2], group->members[1], group->members[0], 1017 - atomic_read(&group->refcount), 1018 - pending_str, 1019 - state_str); 1020 - for (f = 0; f < MAX_VFS; ++f) 1021 - if (group->func[f].state == MCAST_MEMBER) 1022 - len += sprintf(buf + len, "%d[%1x] ", 1023 - f, group->func[f].join_state); 1006 + scnprintf(state_str, sizeof(state_str), "%s(TID=0x%llx)", 1007 + get_state_string(group->state), 1008 + be64_to_cpu(group->last_req_tid)); 1024 1009 1025 - len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x " 1026 - "%4x %4x %2x %2x)\n", 1027 - be16_to_cpu(group->rec.pkey), 1028 - be32_to_cpu(group->rec.qkey), 1029 - (group->rec.mtusel_mtu & 0xc0) >> 6, 1030 - group->rec.mtusel_mtu & 0x3f, 1031 - group->rec.tclass, 1032 - (group->rec.ratesel_rate & 0xc0) >> 6, 1033 - group->rec.ratesel_rate & 0x3f, 1034 - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28, 1035 - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8, 1036 - be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff, 1037 - group->rec.proxy_join); 1010 + if (list_empty(&group->pending_list)) { 1011 + scnprintf(pending_str, sizeof(pending_str), "No"); 1012 + } else { 1013 + req = list_first_entry(&group->pending_list, struct mcast_req, 1014 + group_list); 1015 + scnprintf(pending_str, sizeof(pending_str), "Yes(TID=0x%llx)", 1016 + be64_to_cpu(req->sa_mad.mad_hdr.tid)); 1017 + } 1018 + 1019 + len = sysfs_emit(buf, "%1d [%02d,%02d,%02d] %4d %4s %5s ", 1020 + group->rec.scope_join_state & 0xf, 1021 + group->members[2], 1022 + group->members[1], 1023 + group->members[0], 1024 + atomic_read(&group->refcount), 1025 + pending_str, 1026 + state_str); 1027 + 1028 + for (i = 0; i < MAX_VFS; i++) { 1029 + if (group->func[i].state == MCAST_MEMBER) 1030 + len += sysfs_emit_at(buf, len, "%d[%1x] ", i, 1031 + group->func[i].join_state); 1032 + } 1033 + 1034 + hoplimit = be32_to_cpu(group->rec.sl_flowlabel_hoplimit); 1035 + len += sysfs_emit_at(buf, len, 1036 + "\t\t(%4hx %4x %2x %2x %2x %2x %2x %4x %4x %2x %2x)\n", 1037 + be16_to_cpu(group->rec.pkey), 1038 + be32_to_cpu(group->rec.qkey), 1039 + (group->rec.mtusel_mtu & 0xc0) >> 6, 1040 + (group->rec.mtusel_mtu & 0x3f), 1041 + group->rec.tclass, 1042 + (group->rec.ratesel_rate & 0xc0) >> 6, 1043 + (group->rec.ratesel_rate & 0x3f), 1044 + (hoplimit & 0xf0000000) >> 28, 1045 + (hoplimit & 0x0fffff00) >> 8, 1046 + (hoplimit & 0x000000ff), 1047 + group->rec.proxy_join); 1038 1048 1039 1049 return len; 1040 1050 }
+4 -4
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 908 908 void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); 909 909 int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 910 910 int is_attach); 911 - int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, 912 - u64 start, u64 length, u64 virt_addr, 913 - int mr_access_flags, struct ib_pd *pd, 914 - struct ib_udata *udata); 911 + struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, 912 + u64 length, u64 virt_addr, 913 + int mr_access_flags, struct ib_pd *pd, 914 + struct ib_udata *udata); 915 915 int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, 916 916 const struct ib_gid_attr *attr); 917 917
+8 -8
drivers/infiniband/hw/mlx4/mr.c
··· 456 456 return ERR_PTR(err); 457 457 } 458 458 459 - int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, 460 - u64 start, u64 length, u64 virt_addr, 461 - int mr_access_flags, struct ib_pd *pd, 462 - struct ib_udata *udata) 459 + struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, 460 + u64 length, u64 virt_addr, 461 + int mr_access_flags, struct ib_pd *pd, 462 + struct ib_udata *udata) 463 463 { 464 464 struct mlx4_ib_dev *dev = to_mdev(mr->device); 465 465 struct mlx4_ib_mr *mmr = to_mmr(mr); ··· 472 472 * race exists. 473 473 */ 474 474 err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); 475 - 476 475 if (err) 477 - return err; 476 + return ERR_PTR(err); 478 477 479 478 if (flags & IB_MR_REREG_PD) { 480 479 err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, ··· 541 542 542 543 release_mpt_entry: 543 544 mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); 544 - 545 - return err; 545 + if (err) 546 + return ERR_PTR(err); 547 + return NULL; 546 548 } 547 549 548 550 static int
+12 -2
drivers/infiniband/hw/mlx4/qp.c
··· 1493 1493 MLX4_IB_SRIOV_SQP | 1494 1494 MLX4_IB_QP_NETIF | 1495 1495 MLX4_IB_QP_CREATE_ROCE_V2_GSI)) 1496 - return -EINVAL; 1496 + return -EOPNOTSUPP; 1497 1497 1498 1498 if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { 1499 1499 if (init_attr->qp_type != IB_QPT_UD) ··· 1560 1560 err = create_qp_common(pd, init_attr, udata, sqpn, qp); 1561 1561 if (err) 1562 1562 return err; 1563 + 1564 + if (init_attr->create_flags & 1565 + (MLX4_IB_SRIOV_SQP | MLX4_IB_SRIOV_TUNNEL_QP)) 1566 + /* Internal QP created with ib_create_qp */ 1567 + rdma_restrack_no_track(&qp->ibqp.res); 1563 1568 1564 1569 qp->port = init_attr->port_num; 1565 1570 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : ··· 2792 2787 struct mlx4_ib_qp *mqp = to_mqp(ibqp); 2793 2788 int ret; 2794 2789 2790 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 2791 + return -EOPNOTSUPP; 2792 + 2795 2793 ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); 2796 2794 2797 2795 if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { ··· 4015 4007 qp_attr->qp_access_flags = 4016 4008 to_ib_qp_access_flags(be32_to_cpu(context.params2)); 4017 4009 4018 - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { 4010 + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || 4011 + qp->ibqp.qp_type == IB_QPT_XRC_INI || 4012 + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { 4019 4013 to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); 4020 4014 to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); 4021 4015 qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
+4
drivers/infiniband/hw/mlx4/srq.c
··· 86 86 int err; 87 87 int i; 88 88 89 + if (init_attr->srq_type != IB_SRQT_BASIC && 90 + init_attr->srq_type != IB_SRQT_XRC) 91 + return -EOPNOTSUPP; 92 + 89 93 /* Sanity check SRQ size before proceeding */ 90 94 if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || 91 95 init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
+27 -37
drivers/infiniband/hw/mlx4/sysfs.c
··· 56 56 mlx4_ib_iov_dentry->entry_num, 57 57 port->num); 58 58 59 - return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); 59 + return sysfs_emit(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); 60 60 } 61 61 62 62 /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. ··· 117 117 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; 118 118 struct mlx4_ib_dev *mdev = port->dev; 119 119 union ib_gid gid; 120 - ssize_t ret; 120 + int ret; 121 + __be16 *raw; 121 122 122 123 ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num, 123 124 mlx4_ib_iov_dentry->entry_num, &gid, 1); 124 125 if (ret) 125 126 return ret; 126 - ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 127 - be16_to_cpu(((__be16 *) gid.raw)[0]), 128 - be16_to_cpu(((__be16 *) gid.raw)[1]), 129 - be16_to_cpu(((__be16 *) gid.raw)[2]), 130 - be16_to_cpu(((__be16 *) gid.raw)[3]), 131 - be16_to_cpu(((__be16 *) gid.raw)[4]), 132 - be16_to_cpu(((__be16 *) gid.raw)[5]), 133 - be16_to_cpu(((__be16 *) gid.raw)[6]), 134 - be16_to_cpu(((__be16 *) gid.raw)[7])); 135 - return ret; 127 + 128 + raw = (__be16 *)gid.raw; 129 + return sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 130 + be16_to_cpu(raw[0]), 131 + be16_to_cpu(raw[1]), 132 + be16_to_cpu(raw[2]), 133 + be16_to_cpu(raw[3]), 134 + be16_to_cpu(raw[4]), 135 + be16_to_cpu(raw[5]), 136 + be16_to_cpu(raw[6]), 137 + be16_to_cpu(raw[7])); 136 138 } 137 139 138 140 static ssize_t show_phys_port_pkey(struct device *dev, ··· 153 151 if (ret) 154 152 return ret; 155 153 156 - return sprintf(buf, "0x%04x\n", pkey); 154 + return sysfs_emit(buf, "0x%04x\n", pkey); 157 155 } 158 156 159 157 #define DENTRY_REMOVE(_dentry) \ ··· 443 441 { 444 442 struct port_table_attribute *tab_attr = 445 443 container_of(attr, struct port_table_attribute, attr); 446 - ssize_t ret = -ENODEV; 444 + struct pkey_mgt *m = &p->dev->pkeys; 445 + u8 key = m->virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index]; 447 446 448 - if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >= 449 - (p->dev->dev->caps.pkey_table_len[p->port_num])) 450 - ret = sprintf(buf, "none\n"); 451 - else 452 - ret = sprintf(buf, "%d\n", 453 - p->dev->pkeys.virt2phys_pkey[p->slave] 454 - [p->port_num - 1][tab_attr->index]); 455 - return ret; 447 + if (key >= p->dev->dev->caps.pkey_table_len[p->port_num]) 448 + return sysfs_emit(buf, "none\n"); 449 + return sysfs_emit(buf, "%d\n", key); 456 450 } 457 451 458 452 static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, ··· 486 488 static ssize_t show_port_gid_idx(struct mlx4_port *p, 487 489 struct port_attribute *attr, char *buf) 488 490 { 489 - return sprintf(buf, "%d\n", p->slave); 491 + return sysfs_emit(buf, "%d\n", p->slave); 490 492 } 491 493 492 494 static struct attribute ** ··· 540 542 { 541 543 struct mlx4_port *p = 542 544 container_of(attr, struct mlx4_port, smi_enabled); 543 - ssize_t len = 0; 544 545 545 - if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) 546 - len = sprintf(buf, "%d\n", 1); 547 - else 548 - len = sprintf(buf, "%d\n", 0); 549 - 550 - return len; 546 + return sysfs_emit(buf, "%d\n", 547 + !!mlx4_vf_smi_enabled(p->dev->dev, p->slave, 548 + p->port_num)); 551 549 } 552 550 553 551 static ssize_t sysfs_show_enable_smi_admin(struct device *dev, ··· 552 558 { 553 559 struct mlx4_port *p = 554 560 container_of(attr, struct mlx4_port, enable_smi_admin); 555 - ssize_t len = 0; 556 561 557 - if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) 558 - len = sprintf(buf, "%d\n", 1); 559 - else 560 - len = sprintf(buf, "%d\n", 0); 561 - 562 - return len; 562 + return sysfs_emit(buf, "%d\n", 563 + !!mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, 564 + p->port_num)); 563 565 } 564 566 565 567 static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
+49 -28
drivers/infiniband/hw/mlx5/cq.c
··· 707 707 int *cqe_size, int *index, int *inlen) 708 708 { 709 709 struct mlx5_ib_create_cq ucmd = {}; 710 + unsigned long page_size; 711 + unsigned int page_offset_quantized; 710 712 size_t ucmdlen; 711 - int page_shift; 712 713 __be64 *pas; 713 - int npages; 714 714 int ncont; 715 715 void *cqc; 716 716 int err; ··· 742 742 return err; 743 743 } 744 744 745 + page_size = mlx5_umem_find_best_cq_quantized_pgoff( 746 + cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, 747 + page_offset, 64, &page_offset_quantized); 748 + if (!page_size) { 749 + err = -EINVAL; 750 + goto err_umem; 751 + } 752 + 745 753 err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); 746 754 if (err) 747 755 goto err_umem; 748 756 749 - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, 750 - &ncont, NULL); 751 - mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", 752 - ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); 757 + ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size); 758 + mlx5_ib_dbg( 759 + dev, 760 + "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n", 761 + ucmd.buf_addr, entries * ucmd.cqe_size, 762 + ib_umem_num_pages(cq->buf.umem), page_size, ncont); 753 763 754 764 *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 755 765 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; ··· 770 760 } 771 761 772 762 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); 773 - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); 763 + mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0); 774 764 775 765 cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); 776 766 MLX5_SET(cqc, cqc, log_page_size, 777 - page_shift - MLX5_ADAPTER_PAGE_SHIFT); 767 + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); 768 + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); 778 769 779 770 if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { 780 771 *index = ucmd.uar_page_index; ··· 1139 1128 } 1140 1129 1141 1130 static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, 1142 - int entries, struct ib_udata *udata, int *npas, 1143 - int *page_shift, int *cqe_size) 1131 + int entries, struct ib_udata *udata, 1132 + int *cqe_size) 1144 1133 { 1145 1134 struct mlx5_ib_resize_cq ucmd; 1146 1135 struct ib_umem *umem; 1147 1136 int err; 1148 - int npages; 1149 1137 1150 1138 err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); 1151 1139 if (err) ··· 1164 1154 err = PTR_ERR(umem); 1165 1155 return err; 1166 1156 } 1167 - 1168 - mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, 1169 - npas, NULL); 1170 1157 1171 1158 cq->resize_umem = umem; 1172 1159 *cqe_size = ucmd.cqe_size; ··· 1257 1250 int err; 1258 1251 int npas; 1259 1252 __be64 *pas; 1260 - int page_shift; 1253 + unsigned int page_offset_quantized = 0; 1254 + unsigned int page_shift; 1261 1255 int inlen; 1262 1256 int cqe_size; 1263 1257 unsigned long flags; ··· 1285 1277 1286 1278 mutex_lock(&cq->resize_mutex); 1287 1279 if (udata) { 1288 - err = resize_user(dev, cq, entries, udata, &npas, &page_shift, 1289 - &cqe_size); 1280 + unsigned long page_size; 1281 + 1282 + err = resize_user(dev, cq, entries, udata, &cqe_size); 1283 + if (err) 1284 + goto ex; 1285 + 1286 + page_size = mlx5_umem_find_best_cq_quantized_pgoff( 1287 + cq->resize_umem, cqc, log_page_size, 1288 + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, 1289 + &page_offset_quantized); 1290 + if (!page_size) { 1291 + err = -EINVAL; 1292 + goto ex_resize; 1293 + } 1294 + npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size); 1295 + page_shift = order_base_2(page_size); 1290 1296 } else { 1297 + struct mlx5_frag_buf *frag_buf; 1298 + 1291 1299 cqe_size = 64; 1292 1300 err = resize_kernel(dev, cq, entries, cqe_size); 1293 - if (!err) { 1294 - struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf; 1295 - 1296 - npas = frag_buf->npages; 1297 - page_shift = frag_buf->page_shift; 1298 - } 1301 + if (err) 1302 + goto ex; 1303 + frag_buf = &cq->resize_buf->frag_buf; 1304 + npas = frag_buf->npages; 1305 + page_shift = frag_buf->page_shift; 1299 1306 } 1300 - 1301 - if (err) 1302 - goto ex; 1303 1307 1304 1308 inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + 1305 1309 MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; ··· 1324 1304 1325 1305 pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); 1326 1306 if (udata) 1327 - mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, 1328 - pas, 0); 1307 + mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas, 1308 + 0); 1329 1309 else 1330 1310 mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas); 1331 1311 ··· 1339 1319 1340 1320 MLX5_SET(cqc, cqc, log_page_size, 1341 1321 page_shift - MLX5_ADAPTER_PAGE_SHIFT); 1322 + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); 1342 1323 MLX5_SET(cqc, cqc, cqe_sz, 1343 1324 cqe_sz_to_mlx_sz(cqe_size, 1344 1325 cq->private_flags &
+44 -48
drivers/infiniband/hw/mlx5/devx.c
··· 93 93 struct devx_umem { 94 94 struct mlx5_core_dev *mdev; 95 95 struct ib_umem *umem; 96 - u32 page_offset; 97 - int page_shift; 98 - int ncont; 99 96 u32 dinlen; 100 97 u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; 101 98 }; ··· 1308 1311 else 1309 1312 ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, 1310 1313 obj->dinlen, out, sizeof(out)); 1311 - if (ib_is_destroy_retryable(ret, why, uobject)) 1314 + if (ret) 1312 1315 return ret; 1313 1316 1314 1317 devx_event_table = &dev->devx_event_table; ··· 2054 2057 u64 addr; 2055 2058 size_t size; 2056 2059 u32 access; 2057 - int npages; 2058 2060 int err; 2059 - u32 page_mask; 2060 2061 2061 2062 if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || 2062 2063 uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) ··· 2068 2073 if (err) 2069 2074 return err; 2070 2075 2071 - err = ib_check_mr_access(access); 2076 + err = ib_check_mr_access(&dev->ib_dev, access); 2072 2077 if (err) 2073 2078 return err; 2074 2079 2075 2080 obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); 2076 2081 if (IS_ERR(obj->umem)) 2077 2082 return PTR_ERR(obj->umem); 2078 - 2079 - mlx5_ib_cont_pages(obj->umem, obj->umem->address, 2080 - MLX5_MKEY_PAGE_SHIFT_MASK, &npages, 2081 - &obj->page_shift, &obj->ncont, NULL); 2082 - 2083 - if (!npages) { 2084 - ib_umem_release(obj->umem); 2085 - return -EINVAL; 2086 - } 2087 - 2088 - page_mask = (1 << obj->page_shift) - 1; 2089 - obj->page_offset = obj->umem->address & page_mask; 2090 - 2091 2083 return 0; 2092 2084 } 2093 2085 2094 - static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, 2086 + static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, 2087 + struct uverbs_attr_bundle *attrs, 2095 2088 struct devx_umem *obj, 2096 2089 struct devx_umem_reg_cmd *cmd) 2097 2090 { 2098 - cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + 2099 - (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); 2100 - cmd->in = uverbs_zalloc(attrs, cmd->inlen); 2101 - return PTR_ERR_OR_ZERO(cmd->in); 2102 - } 2103 - 2104 - static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, 2105 - struct devx_umem *obj, 2106 - struct devx_umem_reg_cmd *cmd) 2107 - { 2108 - void *umem; 2091 + unsigned int page_size; 2109 2092 __be64 *mtt; 2093 + void *umem; 2094 + 2095 + /* 2096 + * We don't know what the user intends to use this umem for, but the HW 2097 + * restrictions must be met. MR, doorbell records, QP, WQ and CQ all 2098 + * have different requirements. Since we have no idea how to sort this 2099 + * out, only support PAGE_SIZE with the expectation that userspace will 2100 + * provide the necessary alignments inside the known PAGE_SIZE and that 2101 + * FW will check everything. 2102 + */ 2103 + page_size = ib_umem_find_best_pgoff( 2104 + obj->umem, PAGE_SIZE, 2105 + __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset), 2106 + 0)); 2107 + if (!page_size) 2108 + return -EINVAL; 2109 + 2110 + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + 2111 + (MLX5_ST_SZ_BYTES(mtt) * 2112 + ib_umem_num_dma_blocks(obj->umem, page_size)); 2113 + cmd->in = uverbs_zalloc(attrs, cmd->inlen); 2114 + if (IS_ERR(cmd->in)) 2115 + return PTR_ERR(cmd->in); 2110 2116 2111 2117 umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); 2112 2118 mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); 2113 2119 2114 2120 MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); 2115 - MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); 2116 - MLX5_SET(umem, umem, log_page_size, obj->page_shift - 2117 - MLX5_ADAPTER_PAGE_SHIFT); 2118 - MLX5_SET(umem, umem, page_offset, obj->page_offset); 2119 - mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, 2121 + MLX5_SET64(umem, umem, num_of_mtt, 2122 + ib_umem_num_dma_blocks(obj->umem, page_size)); 2123 + MLX5_SET(umem, umem, log_page_size, 2124 + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); 2125 + MLX5_SET(umem, umem, page_offset, 2126 + ib_umem_dma_offset(obj->umem, page_size)); 2127 + 2128 + mlx5_ib_populate_pas(obj->umem, page_size, mtt, 2120 2129 (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | 2121 - MLX5_IB_MTT_READ); 2130 + MLX5_IB_MTT_READ); 2131 + return 0; 2122 2132 } 2123 2133 2124 2134 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( ··· 2150 2150 if (err) 2151 2151 goto err_obj_free; 2152 2152 2153 - err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); 2153 + err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd); 2154 2154 if (err) 2155 2155 goto err_umem_release; 2156 - 2157 - devx_umem_reg_cmd_build(dev, obj, &cmd); 2158 2156 2159 2157 MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); 2160 2158 err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, ··· 2185 2187 int err; 2186 2188 2187 2189 err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); 2188 - if (ib_is_destroy_retryable(err, why, uobject)) 2190 + if (err) 2189 2191 return err; 2190 2192 2191 2193 ib_umem_release(obj->umem); ··· 2598 2600 .llseek = no_llseek, 2599 2601 }; 2600 2602 2601 - static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, 2602 - enum rdma_remove_reason why) 2603 + static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, 2604 + enum rdma_remove_reason why) 2603 2605 { 2604 2606 struct devx_async_cmd_event_file *comp_ev_file = 2605 2607 container_of(uobj, struct devx_async_cmd_event_file, ··· 2621 2623 kvfree(entry); 2622 2624 } 2623 2625 spin_unlock_irq(&comp_ev_file->ev_queue.lock); 2624 - return 0; 2625 2626 }; 2626 2627 2627 - static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, 2628 - enum rdma_remove_reason why) 2628 + static void devx_async_event_destroy_uobj(struct ib_uobject *uobj, 2629 + enum rdma_remove_reason why) 2629 2630 { 2630 2631 struct devx_async_event_file *ev_file = 2631 2632 container_of(uobj, struct devx_async_event_file, ··· 2668 2671 mutex_unlock(&dev->devx_event_table.event_xa_lock); 2669 2672 2670 2673 put_device(&dev->ib_dev.dev); 2671 - return 0; 2672 2674 }; 2673 2675 2674 2676 DECLARE_UVERBS_NAMED_METHOD(
+2 -4
drivers/infiniband/hw/mlx5/fs.c
··· 2035 2035 struct uverbs_attr_bundle *attrs) 2036 2036 { 2037 2037 struct mlx5_ib_flow_matcher *obj = uobject->object; 2038 - int ret; 2039 2038 2040 - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); 2041 - if (ret) 2042 - return ret; 2039 + if (atomic_read(&obj->usecnt)) 2040 + return -EBUSY; 2043 2041 2044 2042 kfree(obj); 2045 2043 return 0;
+24 -81
drivers/infiniband/hw/mlx5/main.c
··· 75 75 */ 76 76 static DEFINE_MUTEX(mlx5_ib_multiport_mutex); 77 77 78 - /* We can't use an array for xlt_emergency_page because dma_map_single 79 - * doesn't work on kernel modules memory 80 - */ 81 - static unsigned long xlt_emergency_page; 82 - static struct mutex xlt_emergency_page_mutex; 83 - 84 78 struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) 85 79 { 86 80 struct mlx5_ib_dev *dev; ··· 419 425 *active_width = IB_WIDTH_2X; 420 426 *active_speed = IB_SPEED_HDR; 421 427 break; 428 + case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR): 429 + *active_width = IB_WIDTH_1X; 430 + *active_speed = IB_SPEED_NDR; 431 + break; 422 432 case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): 423 433 *active_width = IB_WIDTH_4X; 424 434 *active_speed = IB_SPEED_HDR; 435 + break; 436 + case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2): 437 + *active_width = IB_WIDTH_2X; 438 + *active_speed = IB_SPEED_NDR; 439 + break; 440 + case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): 441 + *active_width = IB_WIDTH_4X; 442 + *active_speed = IB_SPEED_NDR; 425 443 break; 426 444 default: 427 445 return -EINVAL; ··· 2634 2628 struct mlx5_ib_dev *dev = 2635 2629 rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); 2636 2630 2637 - return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); 2631 + return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages); 2638 2632 } 2639 2633 static DEVICE_ATTR_RO(fw_pages); 2640 2634 ··· 2644 2638 struct mlx5_ib_dev *dev = 2645 2639 rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); 2646 2640 2647 - return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 2641 + return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 2648 2642 } 2649 2643 static DEVICE_ATTR_RO(reg_pages); 2650 2644 ··· 2654 2648 struct mlx5_ib_dev *dev = 2655 2649 rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); 2656 2650 2657 - return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); 2651 + return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device); 2658 2652 } 2659 2653 static DEVICE_ATTR_RO(hca_type); 2660 2654 ··· 2664 2658 struct mlx5_ib_dev *dev = 2665 2659 rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); 2666 2660 2667 - return sprintf(buf, "%x\n", dev->mdev->rev_id); 2661 + return sysfs_emit(buf, "%x\n", dev->mdev->rev_id); 2668 2662 } 2669 2663 static DEVICE_ATTR_RO(hw_rev); 2670 2664 ··· 2674 2668 struct mlx5_ib_dev *dev = 2675 2669 rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); 2676 2670 2677 - return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 2678 - dev->mdev->board_id); 2671 + return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 2672 + dev->mdev->board_id); 2679 2673 } 2680 2674 static DEVICE_ATTR_RO(board_id); 2681 2675 ··· 4030 4024 .create_cq = mlx5_ib_create_cq, 4031 4025 .create_qp = mlx5_ib_create_qp, 4032 4026 .create_srq = mlx5_ib_create_srq, 4027 + .create_user_ah = mlx5_ib_create_ah, 4033 4028 .dealloc_pd = mlx5_ib_dealloc_pd, 4034 4029 .dealloc_ucontext = mlx5_ib_dealloc_ucontext, 4035 4030 .del_gid = mlx5_ib_del_gid, ··· 4148 4141 struct mlx5_core_dev *mdev = dev->mdev; 4149 4142 int err; 4150 4143 4151 - dev->ib_dev.uverbs_cmd_mask = 4152 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 4153 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 4154 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 4155 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 4156 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 4157 - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 4158 - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | 4159 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 4160 - (1ull << IB_USER_VERBS_CMD_REREG_MR) | 4161 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 4162 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 4163 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 4164 - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 4165 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 4166 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 4167 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 4168 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 4169 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 4170 - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 4171 - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 4172 - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 4173 - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 4174 - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 4175 - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 4176 - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | 4177 - (1ull << IB_USER_VERBS_CMD_OPEN_QP); 4178 - dev->ib_dev.uverbs_ex_cmd_mask = 4179 - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | 4180 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | 4181 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | 4182 - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | 4183 - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | 4184 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | 4185 - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); 4186 - 4187 4144 if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && 4188 4145 IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) 4189 4146 ib_set_device_ops(&dev->ib_dev, ··· 4158 4187 4159 4188 dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); 4160 4189 4161 - if (MLX5_CAP_GEN(mdev, imaicl)) { 4162 - dev->ib_dev.uverbs_cmd_mask |= 4163 - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | 4164 - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); 4190 + if (MLX5_CAP_GEN(mdev, imaicl)) 4165 4191 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); 4166 - } 4167 4192 4168 - if (MLX5_CAP_GEN(mdev, xrc)) { 4169 - dev->ib_dev.uverbs_cmd_mask |= 4170 - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | 4171 - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); 4193 + if (MLX5_CAP_GEN(mdev, xrc)) 4172 4194 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); 4173 - } 4174 4195 4175 4196 if (MLX5_CAP_DEV_MEM(mdev, memic) || 4176 4197 MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & ··· 4241 4278 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 4242 4279 4243 4280 if (ll == IB_LINK_LAYER_ETHERNET) { 4244 - dev->ib_dev.uverbs_ex_cmd_mask |= 4245 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | 4246 - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | 4247 - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | 4248 - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | 4249 - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); 4250 4281 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); 4251 4282 4252 4283 port_num = mlx5_core_native_port_num(dev->mdev) - 1; ··· 4835 4878 .id_table = mlx5r_id_table, 4836 4879 }; 4837 4880 4838 - unsigned long mlx5_ib_get_xlt_emergency_page(void) 4839 - { 4840 - mutex_lock(&xlt_emergency_page_mutex); 4841 - return xlt_emergency_page; 4842 - } 4843 - 4844 - void mlx5_ib_put_xlt_emergency_page(void) 4845 - { 4846 - mutex_unlock(&xlt_emergency_page_mutex); 4847 - } 4848 - 4849 4881 static int __init mlx5_ib_init(void) 4850 4882 { 4851 4883 int ret; 4852 4884 4853 - xlt_emergency_page = __get_free_page(GFP_KERNEL); 4885 + xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL); 4854 4886 if (!xlt_emergency_page) 4855 4887 return -ENOMEM; 4856 4888 4857 - mutex_init(&xlt_emergency_page_mutex); 4858 - 4859 4889 mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); 4860 4890 if (!mlx5_ib_event_wq) { 4861 - free_page(xlt_emergency_page); 4891 + free_page((unsigned long)xlt_emergency_page); 4862 4892 return -ENOMEM; 4863 4893 } 4864 4894 ··· 4878 4934 mlx5r_rep_cleanup(); 4879 4935 4880 4936 destroy_workqueue(mlx5_ib_event_wq); 4881 - mutex_destroy(&xlt_emergency_page_mutex); 4882 - free_page(xlt_emergency_page); 4937 + free_page((unsigned long)xlt_emergency_page); 4883 4938 } 4884 4939 4885 4940 module_init(mlx5_ib_init);
+45 -141
drivers/infiniband/hw/mlx5/mem.c
··· 36 36 #include "mlx5_ib.h" 37 37 #include <linux/jiffies.h> 38 38 39 - /* @umem: umem object to scan 40 - * @addr: ib virtual address requested by the user 41 - * @max_page_shift: high limit for page_shift - 0 means no limit 42 - * @count: number of PAGE_SIZE pages covered by umem 43 - * @shift: page shift for the compound pages found in the region 44 - * @ncont: number of compund pages 45 - * @order: log2 of the number of compound pages 39 + /* 40 + * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be 41 + * filled in the pas array. 46 42 */ 47 - void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, 48 - unsigned long max_page_shift, 49 - int *count, int *shift, 50 - int *ncont, int *order) 43 + void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, 44 + u64 access_flags) 51 45 { 52 - unsigned long tmp; 53 - unsigned long m; 54 - u64 base = ~0, p = 0; 55 - u64 len, pfn; 56 - int i = 0; 57 - struct scatterlist *sg; 58 - int entry; 46 + struct ib_block_iter biter; 59 47 60 - addr = addr >> PAGE_SHIFT; 61 - tmp = (unsigned long)addr; 62 - m = find_first_bit(&tmp, BITS_PER_LONG); 63 - if (max_page_shift) 64 - m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); 65 - 66 - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 67 - len = sg_dma_len(sg) >> PAGE_SHIFT; 68 - pfn = sg_dma_address(sg) >> PAGE_SHIFT; 69 - if (base + p != pfn) { 70 - /* If either the offset or the new 71 - * base are unaligned update m 72 - */ 73 - tmp = (unsigned long)(pfn | p); 74 - if (!IS_ALIGNED(tmp, 1 << m)) 75 - m = find_first_bit(&tmp, BITS_PER_LONG); 76 - 77 - base = pfn; 78 - p = 0; 79 - } 80 - 81 - p += len; 82 - i += len; 48 + rdma_umem_for_each_dma_block (umem, &biter, page_size) { 49 + *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) | 50 + access_flags); 51 + pas++; 83 52 } 84 - 85 - if (i) { 86 - m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); 87 - 88 - if (order) 89 - *order = ilog2(roundup_pow_of_two(i) >> m); 90 - 91 - *ncont = DIV_ROUND_UP(i, (1 << m)); 92 - } else { 93 - m = 0; 94 - 95 - if (order) 96 - *order = 0; 97 - 98 - *ncont = 0; 99 - } 100 - *shift = PAGE_SHIFT + m; 101 - *count = i; 102 53 } 103 54 104 55 /* 105 - * Populate the given array with bus addresses from the umem. 106 - * 107 - * dev - mlx5_ib device 108 - * umem - umem to use to fill the pages 109 - * page_shift - determines the page size used in the resulting array 110 - * offset - offset into the umem to start from, 111 - * only implemented for ODP umems 112 - * num_pages - total number of pages to fill 113 - * pas - bus addresses array to fill 114 - * access_flags - access flags to set on all present pages. 115 - use enum mlx5_ib_mtt_access_flags for this. 56 + * Compute the page shift and page_offset for mailboxes that use a quantized 57 + * page_offset. The granulatity of the page offset scales according to page 58 + * size. 116 59 */ 117 - void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 118 - int page_shift, size_t offset, size_t num_pages, 119 - __be64 *pas, int access_flags) 60 + unsigned long __mlx5_umem_find_best_quantized_pgoff( 61 + struct ib_umem *umem, unsigned long pgsz_bitmap, 62 + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, 63 + unsigned int *page_offset_quantized) 120 64 { 121 - int shift = page_shift - PAGE_SHIFT; 122 - int mask = (1 << shift) - 1; 123 - int i, k, idx; 124 - u64 cur = 0; 125 - u64 base; 126 - int len; 127 - struct scatterlist *sg; 128 - int entry; 65 + const u64 page_offset_mask = (1UL << page_offset_bits) - 1; 66 + unsigned long page_size; 67 + u64 page_offset; 129 68 130 - i = 0; 131 - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 132 - len = sg_dma_len(sg) >> PAGE_SHIFT; 133 - base = sg_dma_address(sg); 69 + page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask); 70 + if (!page_size) 71 + return 0; 134 72 135 - /* Skip elements below offset */ 136 - if (i + len < offset << shift) { 137 - i += len; 138 - continue; 139 - } 140 - 141 - /* Skip pages below offset */ 142 - if (i < offset << shift) { 143 - k = (offset << shift) - i; 144 - i = offset << shift; 145 - } else { 146 - k = 0; 147 - } 148 - 149 - for (; k < len; k++) { 150 - if (!(i & mask)) { 151 - cur = base + (k << PAGE_SHIFT); 152 - cur |= access_flags; 153 - idx = (i >> shift) - offset; 154 - 155 - pas[idx] = cpu_to_be64(cur); 156 - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", 157 - i >> shift, be64_to_cpu(pas[idx])); 158 - } 159 - i++; 160 - 161 - /* Stop after num_pages reached */ 162 - if (i >> shift >= offset + num_pages) 163 - return; 164 - } 73 + /* 74 + * page size is the largest possible page size. 75 + * 76 + * Reduce the page_size, and thus the page_offset and quanta, until the 77 + * page_offset fits into the mailbox field. Once page_size < scale this 78 + * loop is guaranteed to terminate. 79 + */ 80 + page_offset = ib_umem_dma_offset(umem, page_size); 81 + while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) { 82 + page_size /= 2; 83 + page_offset = ib_umem_dma_offset(umem, page_size); 165 84 } 166 - } 167 85 168 - void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 169 - int page_shift, __be64 *pas, int access_flags) 170 - { 171 - return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, 172 - ib_umem_num_dma_blocks(umem, PAGE_SIZE), 173 - pas, access_flags); 174 - } 175 - int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) 176 - { 177 - u64 page_size; 178 - u64 page_mask; 179 - u64 off_size; 180 - u64 off_mask; 181 - u64 buf_off; 86 + /* 87 + * The address is not aligned, or otherwise cannot be represented by the 88 + * page_offset. 89 + */ 90 + if (!(pgsz_bitmap & page_size)) 91 + return 0; 182 92 183 - page_size = (u64)1 << page_shift; 184 - page_mask = page_size - 1; 185 - buf_off = addr & page_mask; 186 - off_size = page_size >> 6; 187 - off_mask = off_size - 1; 188 - 189 - if (buf_off & off_mask) 190 - return -EINVAL; 191 - 192 - *offset = buf_off >> ilog2(off_size); 193 - return 0; 93 + *page_offset_quantized = 94 + (unsigned long)page_offset / (page_size / scale); 95 + if (WARN_ON(*page_offset_quantized > page_offset_mask)) 96 + return 0; 97 + return page_size; 194 98 } 195 99 196 100 #define WR_ID_BF 0xBF
+81 -21
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 40 40 #define MLX5_IB_DEFAULT_UIDX 0xffffff 41 41 #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) 42 42 43 - #define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) 43 + static __always_inline unsigned long 44 + __mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits, 45 + unsigned int pgsz_shift) 46 + { 47 + unsigned int largest_pg_shift = 48 + min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift, 49 + BITS_PER_LONG - 1); 50 + 51 + /* 52 + * Despite a command allowing it, the device does not support lower than 53 + * 4k page size. 54 + */ 55 + pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift); 56 + return GENMASK(largest_pg_shift, pgsz_shift); 57 + } 58 + 59 + /* 60 + * For mkc users, instead of a page_offset the command has a start_iova which 61 + * specifies both the page_offset and the on-the-wire IOVA 62 + */ 63 + #define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \ 64 + ib_umem_find_best_pgsz(umem, \ 65 + __mlx5_log_page_size_to_bitmap( \ 66 + __mlx5_bit_sz(typ, log_pgsz_fld), \ 67 + pgsz_shift), \ 68 + iova) 69 + 70 + static __always_inline unsigned long 71 + __mlx5_page_offset_to_bitmask(unsigned int page_offset_bits, 72 + unsigned int offset_shift) 73 + { 74 + unsigned int largest_offset_shift = 75 + min_t(unsigned long, page_offset_bits - 1 + offset_shift, 76 + BITS_PER_LONG - 1); 77 + 78 + return GENMASK(largest_offset_shift, offset_shift); 79 + } 80 + 81 + /* 82 + * QP/CQ/WQ/etc type commands take a page offset that satisifies: 83 + * page_offset_quantized * (page_size/scale) = page_offset 84 + * Which restricts allowed page sizes to ones that satisify the above. 85 + */ 86 + unsigned long __mlx5_umem_find_best_quantized_pgoff( 87 + struct ib_umem *umem, unsigned long pgsz_bitmap, 88 + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, 89 + unsigned int *page_offset_quantized); 90 + #define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \ 91 + pgsz_shift, page_offset_fld, \ 92 + scale, page_offset_quantized) \ 93 + __mlx5_umem_find_best_quantized_pgoff( \ 94 + umem, \ 95 + __mlx5_log_page_size_to_bitmap( \ 96 + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ 97 + __mlx5_bit_sz(typ, page_offset_fld), \ 98 + GENMASK(31, order_base_2(scale)), scale, \ 99 + page_offset_quantized) 100 + 101 + #define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \ 102 + pgsz_shift, page_offset_fld, \ 103 + scale, page_offset_quantized) \ 104 + __mlx5_umem_find_best_quantized_pgoff( \ 105 + umem, \ 106 + __mlx5_log_page_size_to_bitmap( \ 107 + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ 108 + __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \ 109 + page_offset_quantized) 44 110 45 111 enum { 46 112 MLX5_IB_MMAP_OFFSET_START = 9, ··· 663 597 int max_descs; 664 598 int desc_size; 665 599 int access_mode; 600 + unsigned int page_shift; 666 601 struct mlx5_core_mkey mmkey; 667 602 struct ib_umem *umem; 668 603 struct mlx5_shared_mr_info *smr_info; 669 604 struct list_head list; 670 - unsigned int order; 671 605 struct mlx5_cache_ent *cache_ent; 672 - int npages; 673 - struct mlx5_ib_dev *dev; 674 606 u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; 675 607 struct mlx5_core_sig_ctx *sig; 676 608 void *descs_alloc; ··· 1106 1042 return container_of(ibdev, struct mlx5_ib_dev, ib_dev); 1107 1043 } 1108 1044 1045 + static inline struct mlx5_ib_dev *mr_to_mdev(struct mlx5_ib_mr *mr) 1046 + { 1047 + return to_mdev(mr->ibmr.device); 1048 + } 1049 + 1109 1050 static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata) 1110 1051 { 1111 1052 struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( ··· 1258 1189 int access_flags); 1259 1190 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); 1260 1191 void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); 1261 - int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1262 - u64 length, u64 virt_addr, int access_flags, 1263 - struct ib_pd *pd, struct ib_udata *udata); 1192 + struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1193 + u64 length, u64 virt_addr, int access_flags, 1194 + struct ib_pd *pd, struct ib_udata *udata); 1264 1195 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); 1265 1196 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1266 1197 u32 max_num_sg); ··· 1279 1210 size_t *out_mad_size, u16 *out_mad_pkey_index); 1280 1211 int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); 1281 1212 int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); 1282 - int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); 1283 1213 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); 1284 1214 int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, 1285 1215 struct ib_smp *out_mad); ··· 1298 1230 struct ib_port_attr *props); 1299 1231 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, 1300 1232 struct ib_port_attr *props); 1301 - void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, 1302 - unsigned long max_page_shift, 1303 - int *count, int *shift, 1304 - int *ncont, int *order); 1305 - void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 1306 - int page_shift, size_t offset, size_t num_pages, 1307 - __be64 *pas, int access_flags); 1308 - void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 1309 - int page_shift, __be64 *pas, int access_flags); 1233 + void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, 1234 + u64 access_flags); 1310 1235 void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); 1311 1236 int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); 1312 1237 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); ··· 1344 1283 int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, 1345 1284 enum ib_uverbs_advise_mr_advice advice, 1346 1285 u32 flags, struct ib_sge *sg_list, u32 num_sge); 1347 - int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable); 1286 + int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); 1348 1287 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 1349 1288 static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) 1350 1289 { ··· 1366 1305 { 1367 1306 return -EOPNOTSUPP; 1368 1307 } 1369 - static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) 1308 + static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) 1370 1309 { 1371 1310 return -EOPNOTSUPP; 1372 1311 } ··· 1517 1456 return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; 1518 1457 } 1519 1458 1520 - unsigned long mlx5_ib_get_xlt_emergency_page(void); 1521 - void mlx5_ib_put_xlt_emergency_page(void); 1459 + extern void *xlt_emergency_page; 1522 1460 1523 1461 int bfregn_to_uar_index(struct mlx5_ib_dev *dev, 1524 1462 struct mlx5_bfreg_info *bfregi, u32 bfregn,
+580 -398
drivers/infiniband/hw/mlx5/mr.c
··· 41 41 #include <rdma/ib_verbs.h> 42 42 #include "mlx5_ib.h" 43 43 44 + /* 45 + * We can't use an array for xlt_emergency_page because dma_map_single doesn't 46 + * work on kernel modules memory 47 + */ 48 + void *xlt_emergency_page; 49 + static DEFINE_MUTEX(xlt_emergency_page_mutex); 50 + 44 51 enum { 45 52 MAX_PENDING_REG_MR = 8, 46 53 }; ··· 56 49 57 50 static void 58 51 create_mkey_callback(int status, struct mlx5_async_work *context); 52 + static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, 53 + u64 iova, int access_flags, 54 + unsigned int page_size, bool populate); 59 55 60 56 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, 61 57 struct ib_pd *pd) ··· 133 123 return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 134 124 } 135 125 136 - static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start, 137 - u64 length) 138 - { 139 - return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= 140 - length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); 141 - } 142 - 143 126 static void create_mkey_callback(int status, struct mlx5_async_work *context) 144 127 { 145 128 struct mlx5_ib_mr *mr = 146 129 container_of(context, struct mlx5_ib_mr, cb_work); 147 - struct mlx5_ib_dev *dev = mr->dev; 148 130 struct mlx5_cache_ent *ent = mr->cache_ent; 131 + struct mlx5_ib_dev *dev = ent->dev; 149 132 unsigned long flags; 150 133 151 134 if (status) { ··· 175 172 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 176 173 if (!mr) 177 174 return NULL; 178 - mr->order = ent->order; 179 175 mr->cache_ent = ent; 180 - mr->dev = ent->dev; 181 176 182 177 set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); 183 178 MLX5_SET(mkc, mkc, free, 1); ··· 643 642 if (mlx5_mr_cache_invalidate(mr)) { 644 643 detach_mr_from_cache(mr); 645 644 destroy_mkey(dev, mr); 645 + kfree(mr); 646 646 return; 647 647 } 648 648 ··· 869 867 return MLX5_MAX_UMR_SHIFT; 870 868 } 871 869 872 - static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, 873 - int access_flags, struct ib_umem **umem, int *npages, 874 - int *page_shift, int *ncont, int *order) 875 - { 876 - struct ib_umem *u; 877 - 878 - *umem = NULL; 879 - 880 - if (access_flags & IB_ACCESS_ON_DEMAND) { 881 - struct ib_umem_odp *odp; 882 - 883 - odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, 884 - &mlx5_mn_ops); 885 - if (IS_ERR(odp)) { 886 - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", 887 - PTR_ERR(odp)); 888 - return PTR_ERR(odp); 889 - } 890 - 891 - u = &odp->umem; 892 - 893 - *page_shift = odp->page_shift; 894 - *ncont = ib_umem_odp_num_pages(odp); 895 - *npages = *ncont << (*page_shift - PAGE_SHIFT); 896 - if (order) 897 - *order = ilog2(roundup_pow_of_two(*ncont)); 898 - } else { 899 - u = ib_umem_get(&dev->ib_dev, start, length, access_flags); 900 - if (IS_ERR(u)) { 901 - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); 902 - return PTR_ERR(u); 903 - } 904 - 905 - mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, 906 - page_shift, ncont, order); 907 - } 908 - 909 - if (!*npages) { 910 - mlx5_ib_warn(dev, "avoid zero region\n"); 911 - ib_umem_release(u); 912 - return -EINVAL; 913 - } 914 - 915 - *umem = u; 916 - 917 - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 918 - *npages, *ncont, *order, *page_shift); 919 - 920 - return 0; 921 - } 922 - 923 870 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) 924 871 { 925 872 struct mlx5_ib_umr_context *context = ··· 925 974 return &cache->ent[order]; 926 975 } 927 976 928 - static struct mlx5_ib_mr * 929 - alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, 930 - u64 len, int npages, int page_shift, unsigned int order, 931 - int access_flags) 977 + static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 978 + u64 length, int access_flags) 979 + { 980 + mr->ibmr.lkey = mr->mmkey.key; 981 + mr->ibmr.rkey = mr->mmkey.key; 982 + mr->ibmr.length = length; 983 + mr->ibmr.device = &dev->ib_dev; 984 + mr->access_flags = access_flags; 985 + } 986 + 987 + static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, 988 + struct ib_umem *umem, u64 iova, 989 + int access_flags) 932 990 { 933 991 struct mlx5_ib_dev *dev = to_mdev(pd->device); 934 - struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); 992 + struct mlx5_cache_ent *ent; 935 993 struct mlx5_ib_mr *mr; 994 + unsigned int page_size; 936 995 937 - if (!ent) 938 - return ERR_PTR(-E2BIG); 939 - 940 - /* Matches access in alloc_cache_mr() */ 941 - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) 942 - return ERR_PTR(-EOPNOTSUPP); 996 + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); 997 + if (WARN_ON(!page_size)) 998 + return ERR_PTR(-EINVAL); 999 + ent = mr_cache_ent_from_order( 1000 + dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); 1001 + /* 1002 + * Matches access in alloc_cache_mr(). If the MR can't come from the 1003 + * cache then synchronously create an uncached one. 1004 + */ 1005 + if (!ent || ent->limit == 0 || 1006 + !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { 1007 + mutex_lock(&dev->slow_path_mutex); 1008 + mr = reg_create(pd, umem, iova, access_flags, page_size, false); 1009 + mutex_unlock(&dev->slow_path_mutex); 1010 + return mr; 1011 + } 943 1012 944 1013 mr = get_cache_mr(ent); 945 1014 if (!mr) { 946 1015 mr = create_cache_mr(ent); 1016 + /* 1017 + * The above already tried to do the same stuff as reg_create(), 1018 + * no reason to try it again. 1019 + */ 947 1020 if (IS_ERR(mr)) 948 1021 return mr; 949 1022 } ··· 976 1001 mr->umem = umem; 977 1002 mr->access_flags = access_flags; 978 1003 mr->desc_size = sizeof(struct mlx5_mtt); 979 - mr->mmkey.iova = virt_addr; 980 - mr->mmkey.size = len; 1004 + mr->mmkey.iova = iova; 1005 + mr->mmkey.size = umem->length; 981 1006 mr->mmkey.pd = to_mpd(pd)->pdn; 1007 + mr->page_shift = order_base_2(page_size); 1008 + mr->umem = umem; 1009 + set_mr_fields(dev, mr, umem->length, access_flags); 982 1010 983 1011 return mr; 984 1012 } ··· 990 1012 MLX5_UMR_MTT_ALIGNMENT) 991 1013 #define MLX5_SPARE_UMR_CHUNK 0x10000 992 1014 1015 + /* 1016 + * Allocate a temporary buffer to hold the per-page information to transfer to 1017 + * HW. For efficiency this should be as large as it can be, but buffer 1018 + * allocation failure is not allowed, so try smaller sizes. 1019 + */ 1020 + static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) 1021 + { 1022 + const size_t xlt_chunk_align = 1023 + MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size); 1024 + size_t size; 1025 + void *res = NULL; 1026 + 1027 + static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); 1028 + 1029 + /* 1030 + * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the 1031 + * allocation can't trigger any kind of reclaim. 1032 + */ 1033 + might_sleep(); 1034 + 1035 + gfp_mask |= __GFP_ZERO; 1036 + 1037 + /* 1038 + * If the system already has a suitable high order page then just use 1039 + * that, but don't try hard to create one. This max is about 1M, so a 1040 + * free x86 huge page will satisfy it. 1041 + */ 1042 + size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), 1043 + MLX5_MAX_UMR_CHUNK); 1044 + *nents = size / ent_size; 1045 + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 1046 + get_order(size)); 1047 + if (res) 1048 + return res; 1049 + 1050 + if (size > MLX5_SPARE_UMR_CHUNK) { 1051 + size = MLX5_SPARE_UMR_CHUNK; 1052 + *nents = get_order(size) / ent_size; 1053 + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 1054 + get_order(size)); 1055 + if (res) 1056 + return res; 1057 + } 1058 + 1059 + *nents = PAGE_SIZE / ent_size; 1060 + res = (void *)__get_free_page(gfp_mask); 1061 + if (res) 1062 + return res; 1063 + 1064 + mutex_lock(&xlt_emergency_page_mutex); 1065 + memset(xlt_emergency_page, 0, PAGE_SIZE); 1066 + return xlt_emergency_page; 1067 + } 1068 + 1069 + static void mlx5_ib_free_xlt(void *xlt, size_t length) 1070 + { 1071 + if (xlt == xlt_emergency_page) { 1072 + mutex_unlock(&xlt_emergency_page_mutex); 1073 + return; 1074 + } 1075 + 1076 + free_pages((unsigned long)xlt, get_order(length)); 1077 + } 1078 + 1079 + /* 1080 + * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for 1081 + * submission. 1082 + */ 1083 + static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, 1084 + struct mlx5_umr_wr *wr, struct ib_sge *sg, 1085 + size_t nents, size_t ent_size, 1086 + unsigned int flags) 1087 + { 1088 + struct mlx5_ib_dev *dev = mr_to_mdev(mr); 1089 + struct device *ddev = &dev->mdev->pdev->dev; 1090 + dma_addr_t dma; 1091 + void *xlt; 1092 + 1093 + xlt = mlx5_ib_alloc_xlt(&nents, ent_size, 1094 + flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : 1095 + GFP_KERNEL); 1096 + sg->length = nents * ent_size; 1097 + dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); 1098 + if (dma_mapping_error(ddev, dma)) { 1099 + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 1100 + mlx5_ib_free_xlt(xlt, sg->length); 1101 + return NULL; 1102 + } 1103 + sg->addr = dma; 1104 + sg->lkey = dev->umrc.pd->local_dma_lkey; 1105 + 1106 + memset(wr, 0, sizeof(*wr)); 1107 + wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; 1108 + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 1109 + wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1110 + wr->wr.sg_list = sg; 1111 + wr->wr.num_sge = 1; 1112 + wr->wr.opcode = MLX5_IB_WR_UMR; 1113 + wr->pd = mr->ibmr.pd; 1114 + wr->mkey = mr->mmkey.key; 1115 + wr->length = mr->mmkey.size; 1116 + wr->virt_addr = mr->mmkey.iova; 1117 + wr->access_flags = mr->access_flags; 1118 + wr->page_shift = mr->page_shift; 1119 + wr->xlt_size = sg->length; 1120 + return xlt; 1121 + } 1122 + 1123 + static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, 1124 + struct ib_sge *sg) 1125 + { 1126 + struct device *ddev = &dev->mdev->pdev->dev; 1127 + 1128 + dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); 1129 + mlx5_ib_free_xlt(xlt, sg->length); 1130 + } 1131 + 1132 + static unsigned int xlt_wr_final_send_flags(unsigned int flags) 1133 + { 1134 + unsigned int res = 0; 1135 + 1136 + if (flags & MLX5_IB_UPD_XLT_ENABLE) 1137 + res |= MLX5_IB_SEND_UMR_ENABLE_MR | 1138 + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | 1139 + MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1140 + if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) 1141 + res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1142 + if (flags & MLX5_IB_UPD_XLT_ADDR) 1143 + res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1144 + return res; 1145 + } 1146 + 993 1147 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 994 1148 int page_shift, int flags) 995 1149 { 996 - struct mlx5_ib_dev *dev = mr->dev; 997 - struct device *ddev = dev->ib_dev.dev.parent; 998 - int size; 1150 + struct mlx5_ib_dev *dev = mr_to_mdev(mr); 1151 + struct device *ddev = &dev->mdev->pdev->dev; 999 1152 void *xlt; 1000 - dma_addr_t dma; 1001 1153 struct mlx5_umr_wr wr; 1002 1154 struct ib_sge sg; 1003 1155 int err = 0; ··· 1138 1030 const int page_mask = page_align - 1; 1139 1031 size_t pages_mapped = 0; 1140 1032 size_t pages_to_map = 0; 1141 - size_t pages_iter = 0; 1033 + size_t pages_iter; 1142 1034 size_t size_to_map = 0; 1143 - gfp_t gfp; 1144 - bool use_emergency_page = false; 1035 + size_t orig_sg_length; 1145 1036 1146 1037 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && 1147 1038 !umr_can_use_indirect_mkey(dev)) 1148 1039 return -EPERM; 1040 + 1041 + if (WARN_ON(!mr->umem->is_odp)) 1042 + return -EINVAL; 1149 1043 1150 1044 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 1151 1045 * so we need to align the offset and length accordingly ··· 1156 1046 npages += idx & page_mask; 1157 1047 idx &= ~page_mask; 1158 1048 } 1159 - 1160 - gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; 1161 - gfp |= __GFP_ZERO | __GFP_NOWARN; 1162 - 1163 1049 pages_to_map = ALIGN(npages, page_align); 1164 - size = desc_size * pages_to_map; 1165 - size = min_t(int, size, MLX5_MAX_UMR_CHUNK); 1166 1050 1167 - xlt = (void *)__get_free_pages(gfp, get_order(size)); 1168 - if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { 1169 - mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", 1170 - size, get_order(size), MLX5_SPARE_UMR_CHUNK); 1051 + xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); 1052 + if (!xlt) 1053 + return -ENOMEM; 1054 + pages_iter = sg.length / desc_size; 1055 + orig_sg_length = sg.length; 1171 1056 1172 - size = MLX5_SPARE_UMR_CHUNK; 1173 - xlt = (void *)__get_free_pages(gfp, get_order(size)); 1057 + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { 1058 + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 1059 + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; 1060 + 1061 + pages_to_map = min_t(size_t, pages_to_map, max_pages); 1174 1062 } 1175 1063 1176 - if (!xlt) { 1177 - mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); 1178 - xlt = (void *)mlx5_ib_get_xlt_emergency_page(); 1179 - size = PAGE_SIZE; 1180 - memset(xlt, 0, size); 1181 - use_emergency_page = true; 1182 - } 1183 - pages_iter = size / desc_size; 1184 - dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); 1185 - if (dma_mapping_error(ddev, dma)) { 1186 - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 1187 - err = -ENOMEM; 1188 - goto free_xlt; 1189 - } 1190 - 1191 - if (mr->umem->is_odp) { 1192 - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { 1193 - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 1194 - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; 1195 - 1196 - pages_to_map = min_t(size_t, pages_to_map, max_pages); 1197 - } 1198 - } 1199 - 1200 - sg.addr = dma; 1201 - sg.lkey = dev->umrc.pd->local_dma_lkey; 1202 - 1203 - memset(&wr, 0, sizeof(wr)); 1204 - wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; 1205 - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 1206 - wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1207 - wr.wr.sg_list = &sg; 1208 - wr.wr.num_sge = 1; 1209 - wr.wr.opcode = MLX5_IB_WR_UMR; 1210 - 1211 - wr.pd = mr->ibmr.pd; 1212 - wr.mkey = mr->mmkey.key; 1213 - wr.length = mr->mmkey.size; 1214 - wr.virt_addr = mr->mmkey.iova; 1215 - wr.access_flags = mr->access_flags; 1216 1064 wr.page_shift = page_shift; 1217 1065 1218 1066 for (pages_mapped = 0; ··· 1178 1110 pages_mapped += pages_iter, idx += pages_iter) { 1179 1111 npages = min_t(int, pages_iter, pages_to_map - pages_mapped); 1180 1112 size_to_map = npages * desc_size; 1181 - dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); 1182 - if (mr->umem->is_odp) { 1183 - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); 1184 - } else { 1185 - __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, 1186 - npages, xlt, 1187 - MLX5_IB_MTT_PRESENT); 1188 - /* Clear padding after the pages 1189 - * brought from the umem. 1190 - */ 1191 - memset(xlt + size_to_map, 0, size - size_to_map); 1192 - } 1193 - dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 1113 + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 1114 + DMA_TO_DEVICE); 1115 + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); 1116 + dma_sync_single_for_device(ddev, sg.addr, sg.length, 1117 + DMA_TO_DEVICE); 1194 1118 1195 1119 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); 1196 1120 1197 - if (pages_mapped + pages_iter >= pages_to_map) { 1198 - if (flags & MLX5_IB_UPD_XLT_ENABLE) 1199 - wr.wr.send_flags |= 1200 - MLX5_IB_SEND_UMR_ENABLE_MR | 1201 - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | 1202 - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1203 - if (flags & MLX5_IB_UPD_XLT_PD || 1204 - flags & MLX5_IB_UPD_XLT_ACCESS) 1205 - wr.wr.send_flags |= 1206 - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1207 - if (flags & MLX5_IB_UPD_XLT_ADDR) 1208 - wr.wr.send_flags |= 1209 - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1210 - } 1121 + if (pages_mapped + pages_iter >= pages_to_map) 1122 + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); 1211 1123 1212 1124 wr.offset = idx * desc_size; 1213 1125 wr.xlt_size = sg.length; 1214 1126 1215 1127 err = mlx5_ib_post_send_wait(dev, &wr); 1216 1128 } 1217 - dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 1129 + sg.length = orig_sg_length; 1130 + mlx5_ib_unmap_free_xlt(dev, xlt, &sg); 1131 + return err; 1132 + } 1218 1133 1219 - free_xlt: 1220 - if (use_emergency_page) 1221 - mlx5_ib_put_xlt_emergency_page(); 1222 - else 1223 - free_pages((unsigned long)xlt, get_order(size)); 1134 + /* 1135 + * Send the DMA list to the HW for a normal MR using UMR. 1136 + */ 1137 + static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) 1138 + { 1139 + struct mlx5_ib_dev *dev = mr_to_mdev(mr); 1140 + struct device *ddev = &dev->mdev->pdev->dev; 1141 + struct ib_block_iter biter; 1142 + struct mlx5_mtt *cur_mtt; 1143 + struct mlx5_umr_wr wr; 1144 + size_t orig_sg_length; 1145 + struct mlx5_mtt *mtt; 1146 + size_t final_size; 1147 + struct ib_sge sg; 1148 + int err = 0; 1224 1149 1150 + if (WARN_ON(mr->umem->is_odp)) 1151 + return -EINVAL; 1152 + 1153 + mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, 1154 + ib_umem_num_dma_blocks(mr->umem, 1155 + 1 << mr->page_shift), 1156 + sizeof(*mtt), flags); 1157 + if (!mtt) 1158 + return -ENOMEM; 1159 + orig_sg_length = sg.length; 1160 + 1161 + cur_mtt = mtt; 1162 + rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap, 1163 + BIT(mr->page_shift)) { 1164 + if (cur_mtt == (void *)mtt + sg.length) { 1165 + dma_sync_single_for_device(ddev, sg.addr, sg.length, 1166 + DMA_TO_DEVICE); 1167 + err = mlx5_ib_post_send_wait(dev, &wr); 1168 + if (err) 1169 + goto err; 1170 + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 1171 + DMA_TO_DEVICE); 1172 + wr.offset += sg.length; 1173 + cur_mtt = mtt; 1174 + } 1175 + 1176 + cur_mtt->ptag = 1177 + cpu_to_be64(rdma_block_iter_dma_address(&biter) | 1178 + MLX5_IB_MTT_PRESENT); 1179 + cur_mtt++; 1180 + } 1181 + 1182 + final_size = (void *)cur_mtt - (void *)mtt; 1183 + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); 1184 + memset(cur_mtt, 0, sg.length - final_size); 1185 + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); 1186 + wr.xlt_size = sg.length; 1187 + 1188 + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); 1189 + err = mlx5_ib_post_send_wait(dev, &wr); 1190 + 1191 + err: 1192 + sg.length = orig_sg_length; 1193 + mlx5_ib_unmap_free_xlt(dev, mtt, &sg); 1225 1194 return err; 1226 1195 } 1227 1196 ··· 1266 1161 * If ibmr is NULL it will be allocated by reg_create. 1267 1162 * Else, the given ibmr will be used. 1268 1163 */ 1269 - static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, 1270 - u64 virt_addr, u64 length, 1271 - struct ib_umem *umem, int npages, 1272 - int page_shift, int access_flags, 1273 - bool populate) 1164 + static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, 1165 + u64 iova, int access_flags, 1166 + unsigned int page_size, bool populate) 1274 1167 { 1275 1168 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1276 1169 struct mlx5_ib_mr *mr; ··· 1279 1176 int err; 1280 1177 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 1281 1178 1282 - mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); 1179 + if (!page_size) 1180 + return ERR_PTR(-EINVAL); 1181 + mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1283 1182 if (!mr) 1284 1183 return ERR_PTR(-ENOMEM); 1285 1184 1286 1185 mr->ibmr.pd = pd; 1287 1186 mr->access_flags = access_flags; 1187 + mr->page_shift = order_base_2(page_size); 1288 1188 1289 1189 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1290 1190 if (populate) 1291 - inlen += sizeof(*pas) * roundup(npages, 2); 1191 + inlen += sizeof(*pas) * 1192 + roundup(ib_umem_num_dma_blocks(umem, page_size), 2); 1292 1193 in = kvzalloc(inlen, GFP_KERNEL); 1293 1194 if (!in) { 1294 1195 err = -ENOMEM; ··· 1304 1197 err = -EINVAL; 1305 1198 goto err_2; 1306 1199 } 1307 - mlx5_ib_populate_pas(dev, umem, page_shift, pas, 1200 + mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, 1308 1201 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 1309 1202 } 1310 1203 ··· 1313 1206 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1314 1207 1315 1208 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1316 - set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr, 1209 + set_mkc_access_pd_addr_fields(mkc, access_flags, iova, 1317 1210 populate ? pd : dev->umrc.pd); 1318 1211 MLX5_SET(mkc, mkc, free, !populate); 1319 1212 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); 1320 1213 MLX5_SET(mkc, mkc, umr_en, 1); 1321 1214 1322 - MLX5_SET64(mkc, mkc, len, length); 1215 + MLX5_SET64(mkc, mkc, len, umem->length); 1323 1216 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1324 1217 MLX5_SET(mkc, mkc, translations_octword_size, 1325 - get_octo_len(virt_addr, length, page_shift)); 1326 - MLX5_SET(mkc, mkc, log_page_size, page_shift); 1218 + get_octo_len(iova, umem->length, mr->page_shift)); 1219 + MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); 1327 1220 if (populate) { 1328 1221 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 1329 - get_octo_len(virt_addr, length, page_shift)); 1222 + get_octo_len(iova, umem->length, mr->page_shift)); 1330 1223 } 1331 1224 1332 1225 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); ··· 1336 1229 } 1337 1230 mr->mmkey.type = MLX5_MKEY_MR; 1338 1231 mr->desc_size = sizeof(struct mlx5_mtt); 1339 - mr->dev = dev; 1232 + mr->umem = umem; 1233 + set_mr_fields(dev, mr, umem->length, access_flags); 1340 1234 kvfree(in); 1341 1235 1342 1236 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); ··· 1346 1238 1347 1239 err_2: 1348 1240 kvfree(in); 1349 - 1350 1241 err_1: 1351 - if (!ibmr) 1352 - kfree(mr); 1353 - 1242 + kfree(mr); 1354 1243 return ERR_PTR(err); 1355 - } 1356 - 1357 - static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 1358 - int npages, u64 length, int access_flags) 1359 - { 1360 - mr->npages = npages; 1361 - atomic_add(npages, &dev->mdev->priv.reg_pages); 1362 - mr->ibmr.lkey = mr->mmkey.key; 1363 - mr->ibmr.rkey = mr->mmkey.key; 1364 - mr->ibmr.length = length; 1365 - mr->access_flags = access_flags; 1366 1244 } 1367 1245 1368 1246 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, ··· 1384 1290 1385 1291 kfree(in); 1386 1292 1387 - mr->umem = NULL; 1388 - set_mr_fields(dev, mr, 0, length, acc); 1293 + set_mr_fields(dev, mr, length, acc); 1389 1294 1390 1295 return &mr->ibmr; 1391 1296 ··· 1445 1352 attr->access_flags, mode); 1446 1353 } 1447 1354 1448 - struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1449 - u64 virt_addr, int access_flags, 1450 - struct ib_udata *udata) 1355 + static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, 1356 + u64 iova, int access_flags) 1451 1357 { 1452 1358 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1453 1359 struct mlx5_ib_mr *mr = NULL; 1454 1360 bool xlt_with_umr; 1455 - struct ib_umem *umem; 1456 - int page_shift; 1457 - int npages; 1458 - int ncont; 1459 - int order; 1460 1361 int err; 1461 1362 1462 - if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) 1363 + xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); 1364 + if (xlt_with_umr) { 1365 + mr = alloc_cacheable_mr(pd, umem, iova, access_flags); 1366 + } else { 1367 + unsigned int page_size = mlx5_umem_find_best_pgsz( 1368 + umem, mkc, log_page_size, 0, iova); 1369 + 1370 + mutex_lock(&dev->slow_path_mutex); 1371 + mr = reg_create(pd, umem, iova, access_flags, page_size, true); 1372 + mutex_unlock(&dev->slow_path_mutex); 1373 + } 1374 + if (IS_ERR(mr)) { 1375 + ib_umem_release(umem); 1376 + return ERR_CAST(mr); 1377 + } 1378 + 1379 + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1380 + 1381 + atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); 1382 + 1383 + if (xlt_with_umr) { 1384 + /* 1385 + * If the MR was created with reg_create then it will be 1386 + * configured properly but left disabled. It is safe to go ahead 1387 + * and configure it again via UMR while enabling it. 1388 + */ 1389 + err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); 1390 + if (err) { 1391 + dereg_mr(dev, mr); 1392 + return ERR_PTR(err); 1393 + } 1394 + } 1395 + return &mr->ibmr; 1396 + } 1397 + 1398 + static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, 1399 + u64 iova, int access_flags, 1400 + struct ib_udata *udata) 1401 + { 1402 + struct mlx5_ib_dev *dev = to_mdev(pd->device); 1403 + struct ib_umem_odp *odp; 1404 + struct mlx5_ib_mr *mr; 1405 + int err; 1406 + 1407 + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 1463 1408 return ERR_PTR(-EOPNOTSUPP); 1464 1409 1465 - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1466 - start, virt_addr, length, access_flags); 1467 - 1468 - xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length); 1469 - /* ODP requires xlt update via umr to work. */ 1470 - if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND)) 1471 - return ERR_PTR(-EINVAL); 1472 - 1473 - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && 1474 - length == U64_MAX) { 1475 - if (virt_addr != start) 1410 + if (!start && length == U64_MAX) { 1411 + if (iova != 0) 1476 1412 return ERR_PTR(-EINVAL); 1477 - if (!(access_flags & IB_ACCESS_ON_DEMAND) || 1478 - !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) 1413 + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) 1479 1414 return ERR_PTR(-EINVAL); 1480 1415 1481 1416 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); ··· 1512 1391 return &mr->ibmr; 1513 1392 } 1514 1393 1515 - err = mr_umem_get(dev, start, length, access_flags, &umem, 1516 - &npages, &page_shift, &ncont, &order); 1394 + /* ODP requires xlt update via umr to work. */ 1395 + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) 1396 + return ERR_PTR(-EINVAL); 1517 1397 1518 - if (err < 0) 1519 - return ERR_PTR(err); 1398 + odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, 1399 + &mlx5_mn_ops); 1400 + if (IS_ERR(odp)) 1401 + return ERR_CAST(odp); 1520 1402 1521 - if (xlt_with_umr) { 1522 - mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, 1523 - page_shift, order, access_flags); 1524 - if (IS_ERR(mr)) 1525 - mr = NULL; 1526 - } 1527 - 1528 - if (!mr) { 1529 - mutex_lock(&dev->slow_path_mutex); 1530 - mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, 1531 - page_shift, access_flags, !xlt_with_umr); 1532 - mutex_unlock(&dev->slow_path_mutex); 1533 - } 1534 - 1403 + mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags); 1535 1404 if (IS_ERR(mr)) { 1536 - err = PTR_ERR(mr); 1537 - goto error; 1405 + ib_umem_release(&odp->umem); 1406 + return ERR_CAST(mr); 1538 1407 } 1539 1408 1540 - mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1409 + odp->private = mr; 1410 + init_waitqueue_head(&mr->q_deferred_work); 1411 + atomic_set(&mr->num_deferred_work, 0); 1412 + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), 1413 + &mr->mmkey, GFP_KERNEL)); 1414 + if (err) 1415 + goto err_dereg_mr; 1541 1416 1542 - mr->umem = umem; 1543 - set_mr_fields(dev, mr, npages, length, access_flags); 1544 - 1545 - if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { 1546 - /* 1547 - * If the MR was created with reg_create then it will be 1548 - * configured properly but left disabled. It is safe to go ahead 1549 - * and configure it again via UMR while enabling it. 1550 - */ 1551 - int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; 1552 - 1553 - err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, 1554 - update_xlt_flags); 1555 - if (err) { 1556 - dereg_mr(dev, mr); 1557 - return ERR_PTR(err); 1558 - } 1559 - } 1560 - 1561 - if (is_odp_mr(mr)) { 1562 - to_ib_umem_odp(mr->umem)->private = mr; 1563 - init_waitqueue_head(&mr->q_deferred_work); 1564 - atomic_set(&mr->num_deferred_work, 0); 1565 - err = xa_err(xa_store(&dev->odp_mkeys, 1566 - mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, 1567 - GFP_KERNEL)); 1568 - if (err) { 1569 - dereg_mr(dev, mr); 1570 - return ERR_PTR(err); 1571 - } 1572 - 1573 - err = mlx5_ib_init_odp_mr(mr, xlt_with_umr); 1574 - if (err) { 1575 - dereg_mr(dev, mr); 1576 - return ERR_PTR(err); 1577 - } 1578 - } 1579 - 1417 + err = mlx5_ib_init_odp_mr(mr); 1418 + if (err) 1419 + goto err_dereg_mr; 1580 1420 return &mr->ibmr; 1581 - error: 1582 - ib_umem_release(umem); 1421 + 1422 + err_dereg_mr: 1423 + dereg_mr(dev, mr); 1583 1424 return ERR_PTR(err); 1425 + } 1426 + 1427 + struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1428 + u64 iova, int access_flags, 1429 + struct ib_udata *udata) 1430 + { 1431 + struct mlx5_ib_dev *dev = to_mdev(pd->device); 1432 + struct ib_umem *umem; 1433 + 1434 + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) 1435 + return ERR_PTR(-EOPNOTSUPP); 1436 + 1437 + mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", 1438 + start, iova, length, access_flags); 1439 + 1440 + if (access_flags & IB_ACCESS_ON_DEMAND) 1441 + return create_user_odp_mr(pd, start, length, iova, access_flags, 1442 + udata); 1443 + umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); 1444 + if (IS_ERR(umem)) 1445 + return ERR_CAST(umem); 1446 + return create_real_mr(pd, umem, iova, access_flags); 1584 1447 } 1585 1448 1586 1449 /** ··· 1579 1474 { 1580 1475 struct mlx5_umr_wr umrwr = {}; 1581 1476 1582 - if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 1477 + if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 1583 1478 return 0; 1584 1479 1585 1480 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | 1586 1481 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1587 1482 umrwr.wr.opcode = MLX5_IB_WR_UMR; 1588 - umrwr.pd = mr->dev->umrc.pd; 1483 + umrwr.pd = mr_to_mdev(mr)->umrc.pd; 1589 1484 umrwr.mkey = mr->mmkey.key; 1590 1485 umrwr.ignore_free_state = 1; 1591 1486 1592 - return mlx5_ib_post_send_wait(mr->dev, &umrwr); 1487 + return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); 1593 1488 } 1594 1489 1595 - static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1596 - int access_flags, int flags) 1490 + /* 1491 + * True if the change in access flags can be done via UMR, only some access 1492 + * flags can be updated. 1493 + */ 1494 + static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, 1495 + unsigned int current_access_flags, 1496 + unsigned int target_access_flags) 1597 1497 { 1598 - struct mlx5_ib_dev *dev = to_mdev(pd->device); 1599 - struct mlx5_umr_wr umrwr = {}; 1498 + unsigned int diffs = current_access_flags ^ target_access_flags; 1499 + 1500 + if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | 1501 + IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) 1502 + return false; 1503 + return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, 1504 + target_access_flags); 1505 + } 1506 + 1507 + static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, 1508 + int access_flags) 1509 + { 1510 + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1511 + struct mlx5_umr_wr umrwr = { 1512 + .wr = { 1513 + .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | 1514 + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, 1515 + .opcode = MLX5_IB_WR_UMR, 1516 + }, 1517 + .mkey = mr->mmkey.key, 1518 + .pd = pd, 1519 + .access_flags = access_flags, 1520 + }; 1600 1521 int err; 1601 1522 1602 - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1603 - 1604 - umrwr.wr.opcode = MLX5_IB_WR_UMR; 1605 - umrwr.mkey = mr->mmkey.key; 1606 - 1607 - if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) { 1608 - umrwr.pd = pd; 1609 - umrwr.access_flags = access_flags; 1610 - umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1611 - } 1612 - 1613 1523 err = mlx5_ib_post_send_wait(dev, &umrwr); 1524 + if (err) 1525 + return err; 1614 1526 1615 - return err; 1527 + mr->access_flags = access_flags; 1528 + mr->mmkey.pd = to_mpd(pd)->pdn; 1529 + return 0; 1616 1530 } 1617 1531 1618 - int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1619 - u64 length, u64 virt_addr, int new_access_flags, 1620 - struct ib_pd *new_pd, struct ib_udata *udata) 1532 + static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, 1533 + struct ib_umem *new_umem, 1534 + int new_access_flags, u64 iova, 1535 + unsigned long *page_size) 1536 + { 1537 + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1538 + 1539 + /* We only track the allocated sizes of MRs from the cache */ 1540 + if (!mr->cache_ent) 1541 + return false; 1542 + if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) 1543 + return false; 1544 + 1545 + *page_size = 1546 + mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); 1547 + if (WARN_ON(!*page_size)) 1548 + return false; 1549 + return (1ULL << mr->cache_ent->order) >= 1550 + ib_umem_num_dma_blocks(new_umem, *page_size); 1551 + } 1552 + 1553 + static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, 1554 + int access_flags, int flags, struct ib_umem *new_umem, 1555 + u64 iova, unsigned long page_size) 1556 + { 1557 + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1558 + int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; 1559 + struct ib_umem *old_umem = mr->umem; 1560 + int err; 1561 + 1562 + /* 1563 + * To keep everything simple the MR is revoked before we start to mess 1564 + * with it. This ensure the change is atomic relative to any use of the 1565 + * MR. 1566 + */ 1567 + err = mlx5_mr_cache_invalidate(mr); 1568 + if (err) 1569 + return err; 1570 + 1571 + if (flags & IB_MR_REREG_PD) { 1572 + mr->ibmr.pd = pd; 1573 + mr->mmkey.pd = to_mpd(pd)->pdn; 1574 + upd_flags |= MLX5_IB_UPD_XLT_PD; 1575 + } 1576 + if (flags & IB_MR_REREG_ACCESS) { 1577 + mr->access_flags = access_flags; 1578 + upd_flags |= MLX5_IB_UPD_XLT_ACCESS; 1579 + } 1580 + 1581 + mr->ibmr.length = new_umem->length; 1582 + mr->mmkey.iova = iova; 1583 + mr->mmkey.size = new_umem->length; 1584 + mr->page_shift = order_base_2(page_size); 1585 + mr->umem = new_umem; 1586 + err = mlx5_ib_update_mr_pas(mr, upd_flags); 1587 + if (err) { 1588 + /* 1589 + * The MR is revoked at this point so there is no issue to free 1590 + * new_umem. 1591 + */ 1592 + mr->umem = old_umem; 1593 + return err; 1594 + } 1595 + 1596 + atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); 1597 + ib_umem_release(old_umem); 1598 + atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); 1599 + return 0; 1600 + } 1601 + 1602 + struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1603 + u64 length, u64 iova, int new_access_flags, 1604 + struct ib_pd *new_pd, 1605 + struct ib_udata *udata) 1621 1606 { 1622 1607 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1623 1608 struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1624 - struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; 1625 - int access_flags = flags & IB_MR_REREG_ACCESS ? 1626 - new_access_flags : 1627 - mr->access_flags; 1628 - int page_shift = 0; 1629 - int upd_flags = 0; 1630 - int npages = 0; 1631 - int ncont = 0; 1632 - int order = 0; 1633 - u64 addr, len; 1634 1609 int err; 1635 1610 1636 - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1637 - start, virt_addr, length, access_flags); 1611 + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) 1612 + return ERR_PTR(-EOPNOTSUPP); 1638 1613 1639 - atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); 1614 + mlx5_ib_dbg( 1615 + dev, 1616 + "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", 1617 + start, iova, length, new_access_flags); 1640 1618 1641 - if (!mr->umem) 1642 - return -EINVAL; 1619 + if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) 1620 + return ERR_PTR(-EOPNOTSUPP); 1643 1621 1644 - if (is_odp_mr(mr)) 1645 - return -EOPNOTSUPP; 1622 + if (!(flags & IB_MR_REREG_ACCESS)) 1623 + new_access_flags = mr->access_flags; 1624 + if (!(flags & IB_MR_REREG_PD)) 1625 + new_pd = ib_mr->pd; 1646 1626 1647 - if (flags & IB_MR_REREG_TRANS) { 1648 - addr = virt_addr; 1649 - len = length; 1650 - } else { 1651 - addr = mr->umem->address; 1652 - len = mr->umem->length; 1653 - } 1627 + if (!(flags & IB_MR_REREG_TRANS)) { 1628 + struct ib_umem *umem; 1654 1629 1655 - if (flags != IB_MR_REREG_PD) { 1630 + /* Fast path for PD/access change */ 1631 + if (can_use_umr_rereg_access(dev, mr->access_flags, 1632 + new_access_flags)) { 1633 + err = umr_rereg_pd_access(mr, new_pd, new_access_flags); 1634 + if (err) 1635 + return ERR_PTR(err); 1636 + return NULL; 1637 + } 1638 + /* DM or ODP MR's don't have a umem so we can't re-use it */ 1639 + if (!mr->umem || is_odp_mr(mr)) 1640 + goto recreate; 1641 + 1656 1642 /* 1657 - * Replace umem. This needs to be done whether or not UMR is 1658 - * used. 1643 + * Only one active MR can refer to a umem at one time, revoke 1644 + * the old MR before assigning the umem to the new one. 1659 1645 */ 1660 - flags |= IB_MR_REREG_TRANS; 1661 - ib_umem_release(mr->umem); 1646 + err = mlx5_mr_cache_invalidate(mr); 1647 + if (err) 1648 + return ERR_PTR(err); 1649 + umem = mr->umem; 1662 1650 mr->umem = NULL; 1663 - err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, 1664 - &npages, &page_shift, &ncont, &order); 1665 - if (err) 1666 - goto err; 1651 + atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); 1652 + 1653 + return create_real_mr(new_pd, umem, mr->mmkey.iova, 1654 + new_access_flags); 1667 1655 } 1668 1656 1669 - if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, 1670 - access_flags) || 1671 - !mlx5_ib_can_load_pas_with_umr(dev, len) || 1672 - (flags & IB_MR_REREG_TRANS && 1673 - !mlx5_ib_pas_fits_in_mr(mr, addr, len))) { 1674 - /* 1675 - * UMR can't be used - MKey needs to be replaced. 1676 - */ 1677 - if (mr->cache_ent) 1678 - detach_mr_from_cache(mr); 1679 - err = destroy_mkey(dev, mr); 1680 - if (err) 1681 - goto err; 1657 + /* 1658 + * DM doesn't have a PAS list so we can't re-use it, odp does but the 1659 + * logic around releasing the umem is different 1660 + */ 1661 + if (!mr->umem || is_odp_mr(mr)) 1662 + goto recreate; 1682 1663 1683 - mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, 1684 - page_shift, access_flags, true); 1664 + if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && 1665 + can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { 1666 + struct ib_umem *new_umem; 1667 + unsigned long page_size; 1685 1668 1686 - if (IS_ERR(mr)) { 1687 - err = PTR_ERR(mr); 1688 - mr = to_mmr(ib_mr); 1689 - goto err; 1669 + new_umem = ib_umem_get(&dev->ib_dev, start, length, 1670 + new_access_flags); 1671 + if (IS_ERR(new_umem)) 1672 + return ERR_CAST(new_umem); 1673 + 1674 + /* Fast path for PAS change */ 1675 + if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, 1676 + &page_size)) { 1677 + err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, 1678 + new_umem, iova, page_size); 1679 + if (err) { 1680 + ib_umem_release(new_umem); 1681 + return ERR_PTR(err); 1682 + } 1683 + return NULL; 1690 1684 } 1691 - } else { 1692 - /* 1693 - * Send a UMR WQE 1694 - */ 1695 - mr->ibmr.pd = pd; 1696 - mr->access_flags = access_flags; 1697 - mr->mmkey.iova = addr; 1698 - mr->mmkey.size = len; 1699 - mr->mmkey.pd = to_mpd(pd)->pdn; 1700 - 1701 - if (flags & IB_MR_REREG_TRANS) { 1702 - upd_flags = MLX5_IB_UPD_XLT_ADDR; 1703 - if (flags & IB_MR_REREG_PD) 1704 - upd_flags |= MLX5_IB_UPD_XLT_PD; 1705 - if (flags & IB_MR_REREG_ACCESS) 1706 - upd_flags |= MLX5_IB_UPD_XLT_ACCESS; 1707 - err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, 1708 - upd_flags); 1709 - } else { 1710 - err = rereg_umr(pd, mr, access_flags, flags); 1711 - } 1712 - 1713 - if (err) 1714 - goto err; 1685 + return create_real_mr(new_pd, new_umem, iova, new_access_flags); 1715 1686 } 1716 1687 1717 - set_mr_fields(dev, mr, npages, len, access_flags); 1718 - 1719 - return 0; 1720 - 1721 - err: 1722 - ib_umem_release(mr->umem); 1723 - mr->umem = NULL; 1724 - 1725 - clean_mr(dev, mr); 1726 - return err; 1688 + /* 1689 + * Everything else has no state we can preserve, just create a new MR 1690 + * from scratch 1691 + */ 1692 + recreate: 1693 + return mlx5_ib_reg_user_mr(new_pd, start, length, iova, 1694 + new_access_flags, udata); 1727 1695 } 1728 1696 1729 1697 static int ··· 1805 1627 int ndescs, 1806 1628 int desc_size) 1807 1629 { 1630 + struct mlx5_ib_dev *dev = to_mdev(device); 1631 + struct device *ddev = &dev->mdev->pdev->dev; 1808 1632 int size = ndescs * desc_size; 1809 1633 int add_size; 1810 1634 int ret; ··· 1819 1639 1820 1640 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1821 1641 1822 - mr->desc_map = dma_map_single(device->dev.parent, mr->descs, 1823 - size, DMA_TO_DEVICE); 1824 - if (dma_mapping_error(device->dev.parent, mr->desc_map)) { 1642 + mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); 1643 + if (dma_mapping_error(ddev, mr->desc_map)) { 1825 1644 ret = -ENOMEM; 1826 1645 goto err; 1827 1646 } ··· 1838 1659 if (mr->descs) { 1839 1660 struct ib_device *device = mr->ibmr.device; 1840 1661 int size = mr->max_descs * mr->desc_size; 1662 + struct mlx5_ib_dev *dev = to_mdev(device); 1841 1663 1842 - dma_unmap_single(device->dev.parent, mr->desc_map, 1843 - size, DMA_TO_DEVICE); 1664 + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, 1665 + DMA_TO_DEVICE); 1844 1666 kfree(mr->descs_alloc); 1845 1667 mr->descs = NULL; 1846 1668 } ··· 1871 1691 1872 1692 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1873 1693 { 1874 - int npages = mr->npages; 1875 1694 struct ib_umem *umem = mr->umem; 1876 1695 1877 1696 /* Stop all DMA */ ··· 1879 1700 else 1880 1701 clean_mr(dev, mr); 1881 1702 1703 + if (umem) { 1704 + if (!is_odp_mr(mr)) 1705 + atomic_sub(ib_umem_num_pages(umem), 1706 + &dev->mdev->priv.reg_pages); 1707 + ib_umem_release(umem); 1708 + } 1709 + 1882 1710 if (mr->cache_ent) 1883 1711 mlx5_mr_cache_free(dev, mr); 1884 1712 else 1885 1713 kfree(mr); 1886 - 1887 - ib_umem_release(umem); 1888 - atomic_sub(npages, &dev->mdev->priv.reg_pages); 1889 - 1890 1714 } 1891 1715 1892 1716 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+29 -27
drivers/infiniband/hw/mlx5/odp.c
··· 102 102 if (flags & MLX5_IB_UPD_XLT_ZAP) { 103 103 for (; pklm != end; pklm++, idx++) { 104 104 pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); 105 - pklm->key = cpu_to_be32(imr->dev->null_mkey); 105 + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); 106 106 pklm->va = 0; 107 107 } 108 108 return; ··· 129 129 * locking around the xarray. 130 130 */ 131 131 lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); 132 - lockdep_assert_held(&imr->dev->odp_srcu); 132 + lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); 133 133 134 134 for (; pklm != end; pklm++, idx++) { 135 135 struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); ··· 139 139 pklm->key = cpu_to_be32(mtt->ibmr.lkey); 140 140 pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); 141 141 } else { 142 - pklm->key = cpu_to_be32(imr->dev->null_mkey); 142 + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); 143 143 pklm->va = 0; 144 144 } 145 145 } ··· 199 199 mutex_unlock(&odp->umem_mutex); 200 200 201 201 if (!mr->cache_ent) { 202 - mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); 202 + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); 203 203 WARN_ON(mr->descs); 204 204 } 205 205 } ··· 222 222 WARN_ON(atomic_read(&mr->num_deferred_work)); 223 223 224 224 if (need_imr_xlt) { 225 - srcu_key = srcu_read_lock(&mr->dev->odp_srcu); 225 + srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); 226 226 mutex_lock(&odp_imr->umem_mutex); 227 227 mlx5_ib_update_xlt(mr->parent, idx, 1, 0, 228 228 MLX5_IB_UPD_XLT_INDIRECT | 229 229 MLX5_IB_UPD_XLT_ATOMIC); 230 230 mutex_unlock(&odp_imr->umem_mutex); 231 - srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); 231 + srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); 232 232 } 233 233 234 234 dma_fence_odp_mr(mr); 235 235 236 236 mr->parent = NULL; 237 - mlx5_mr_cache_free(mr->dev, mr); 237 + mlx5_mr_cache_free(mr_to_mdev(mr), mr); 238 238 ib_umem_odp_release(odp); 239 239 if (atomic_dec_and_test(&imr->num_deferred_work)) 240 240 wake_up(&imr->q_deferred_work); ··· 274 274 goto out_unlock; 275 275 276 276 atomic_inc(&imr->num_deferred_work); 277 - call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, 277 + call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, 278 278 free_implicit_child_mr_rcu); 279 279 280 280 out_unlock: ··· 476 476 if (IS_ERR(odp)) 477 477 return ERR_CAST(odp); 478 478 479 - ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY, 480 - imr->access_flags); 479 + ret = mr = mlx5_mr_cache_alloc( 480 + mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags); 481 481 if (IS_ERR(mr)) 482 482 goto out_umem; 483 483 484 484 mr->ibmr.pd = imr->ibmr.pd; 485 + mr->ibmr.device = &mr_to_mdev(imr)->ib_dev; 485 486 mr->umem = &odp->umem; 486 487 mr->ibmr.lkey = mr->mmkey.key; 487 488 mr->ibmr.rkey = mr->mmkey.key; ··· 518 517 goto out_mr; 519 518 } 520 519 521 - mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); 520 + mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); 522 521 return mr; 523 522 524 523 out_mr: 525 - mlx5_mr_cache_free(imr->dev, mr); 524 + mlx5_mr_cache_free(mr_to_mdev(imr), mr); 526 525 out_umem: 527 526 ib_umem_odp_release(odp); 528 527 return ret; ··· 536 535 struct ib_umem_odp *umem_odp; 537 536 struct mlx5_ib_mr *imr; 538 537 int err; 538 + 539 + if (!mlx5_ib_can_load_pas_with_umr(dev, 540 + MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) 541 + return ERR_PTR(-EOPNOTSUPP); 539 542 540 543 umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); 541 544 if (IS_ERR(umem_odp)) ··· 556 551 imr->umem = &umem_odp->umem; 557 552 imr->ibmr.lkey = imr->mmkey.key; 558 553 imr->ibmr.rkey = imr->mmkey.key; 554 + imr->ibmr.device = &dev->ib_dev; 559 555 imr->umem = &umem_odp->umem; 560 556 imr->is_odp_implicit = true; 561 557 atomic_set(&imr->num_deferred_work, 0); ··· 590 584 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) 591 585 { 592 586 struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); 593 - struct mlx5_ib_dev *dev = imr->dev; 587 + struct mlx5_ib_dev *dev = mr_to_mdev(imr); 594 588 struct list_head destroy_list; 595 589 struct mlx5_ib_mr *mtt; 596 590 struct mlx5_ib_mr *tmp; ··· 660 654 void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) 661 655 { 662 656 /* Prevent new page faults and prefetch requests from succeeding */ 663 - xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); 657 + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); 664 658 665 659 /* Wait for all running page-fault handlers to finish. */ 666 - synchronize_srcu(&mr->dev->odp_srcu); 660 + synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); 667 661 668 662 wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); 669 663 ··· 707 701 708 702 if (ret < 0) { 709 703 if (ret != -EAGAIN) 710 - mlx5_ib_err(mr->dev, 704 + mlx5_ib_err(mr_to_mdev(mr), 711 705 "Failed to update mkey page tables\n"); 712 706 goto out; 713 707 } ··· 797 791 MLX5_IB_UPD_XLT_ATOMIC); 798 792 mutex_unlock(&odp_imr->umem_mutex); 799 793 if (err) { 800 - mlx5_ib_err(imr->dev, "Failed to update PAS\n"); 794 + mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n"); 801 795 return err; 802 796 } 803 797 return ret; ··· 817 811 { 818 812 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 819 813 820 - lockdep_assert_held(&mr->dev->odp_srcu); 814 + lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); 821 815 if (unlikely(io_virt < mr->mmkey.iova)) 822 816 return -EFAULT; 823 817 ··· 837 831 flags); 838 832 } 839 833 840 - int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) 834 + int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) 841 835 { 842 - u32 flags = MLX5_PF_FLAGS_SNAPSHOT; 843 836 int ret; 844 837 845 - if (enable) 846 - flags |= MLX5_PF_FLAGS_ENABLE; 847 - 848 - ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), 849 - mr->umem->address, mr->umem->length, NULL, 850 - flags); 838 + ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address, 839 + mr->umem->length, NULL, 840 + MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE); 851 841 return ret >= 0 ? 0 : ret; 852 842 } 853 843 ··· 1785 1783 1786 1784 /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ 1787 1785 WARN_ON(!work->num_sge); 1788 - dev = work->frags[0].mr->dev; 1786 + dev = mr_to_mdev(work->frags[0].mr); 1789 1787 /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ 1790 1788 srcu_key = srcu_read_lock(&dev->odp_srcu); 1791 1789 for (i = 0; i < work->num_sge; ++i) {
+88 -107
drivers/infiniband/hw/mlx5/qp.c
··· 778 778 return bfregi->sys_pages[index_of_sys_page] + offset; 779 779 } 780 780 781 - static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, 782 - unsigned long addr, size_t size, 783 - struct ib_umem **umem, int *npages, int *page_shift, 784 - int *ncont, u32 *offset) 785 - { 786 - int err; 787 - 788 - *umem = ib_umem_get(&dev->ib_dev, addr, size, 0); 789 - if (IS_ERR(*umem)) { 790 - mlx5_ib_dbg(dev, "umem_get failed\n"); 791 - return PTR_ERR(*umem); 792 - } 793 - 794 - mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); 795 - 796 - err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); 797 - if (err) { 798 - mlx5_ib_warn(dev, "bad offset\n"); 799 - goto err_umem; 800 - } 801 - 802 - mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n", 803 - addr, size, *npages, *page_shift, *ncont, *offset); 804 - 805 - return 0; 806 - 807 - err_umem: 808 - ib_umem_release(*umem); 809 - *umem = NULL; 810 - 811 - return err; 812 - } 813 - 814 781 static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, 815 782 struct mlx5_ib_rwq *rwq, struct ib_udata *udata) 816 783 { ··· 800 833 { 801 834 struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( 802 835 udata, struct mlx5_ib_ucontext, ibucontext); 803 - int page_shift = 0; 804 - int npages; 836 + unsigned long page_size = 0; 805 837 u32 offset = 0; 806 - int ncont = 0; 807 838 int err; 808 839 809 840 if (!ucmd->buf_addr) ··· 814 849 return err; 815 850 } 816 851 817 - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, 818 - &ncont, NULL); 819 - err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, 820 - &rwq->rq_page_offset); 821 - if (err) { 852 + page_size = mlx5_umem_find_best_quantized_pgoff( 853 + rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, 854 + page_offset, 64, &rwq->rq_page_offset); 855 + if (!page_size) { 822 856 mlx5_ib_warn(dev, "bad offset\n"); 857 + err = -EINVAL; 823 858 goto err_umem; 824 859 } 825 860 826 - rwq->rq_num_pas = ncont; 827 - rwq->page_shift = page_shift; 828 - rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 861 + rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size); 862 + rwq->page_shift = order_base_2(page_size); 863 + rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT; 829 864 rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); 830 865 831 - mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", 832 - (unsigned long long)ucmd->buf_addr, rwq->buf_size, 833 - npages, page_shift, ncont, offset); 866 + mlx5_ib_dbg( 867 + dev, 868 + "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n", 869 + (unsigned long long)ucmd->buf_addr, rwq->buf_size, 870 + ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas, 871 + offset); 834 872 835 873 err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); 836 874 if (err) { ··· 864 896 { 865 897 struct mlx5_ib_ucontext *context; 866 898 struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; 867 - int page_shift = 0; 899 + unsigned int page_offset_quantized = 0; 900 + unsigned long page_size = 0; 868 901 int uar_index = 0; 869 - int npages; 870 - u32 offset = 0; 871 902 int bfregn; 872 903 int ncont = 0; 873 904 __be64 *pas; ··· 917 950 918 951 if (ucmd->buf_addr && ubuffer->buf_size) { 919 952 ubuffer->buf_addr = ucmd->buf_addr; 920 - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, 921 - ubuffer->buf_size, &ubuffer->umem, 922 - &npages, &page_shift, &ncont, &offset); 923 - if (err) 953 + ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, 954 + ubuffer->buf_size, 0); 955 + if (IS_ERR(ubuffer->umem)) { 956 + err = PTR_ERR(ubuffer->umem); 924 957 goto err_bfreg; 958 + } 959 + page_size = mlx5_umem_find_best_quantized_pgoff( 960 + ubuffer->umem, qpc, log_page_size, 961 + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, 962 + &page_offset_quantized); 963 + if (!page_size) { 964 + err = -EINVAL; 965 + goto err_umem; 966 + } 967 + ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size); 925 968 } else { 926 969 ubuffer->umem = NULL; 927 970 } ··· 946 969 947 970 uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; 948 971 MLX5_SET(create_qp_in, *in, uid, uid); 949 - pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); 950 - if (ubuffer->umem) 951 - mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); 952 - 953 972 qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); 954 - 955 - MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); 956 - MLX5_SET(qpc, qpc, page_offset, offset); 957 - 973 + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); 974 + if (ubuffer->umem) { 975 + mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0); 976 + MLX5_SET(qpc, qpc, log_page_size, 977 + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); 978 + MLX5_SET(qpc, qpc, page_offset, page_offset_quantized); 979 + } 958 980 MLX5_SET(qpc, qpc, uar_page, uar_index); 959 981 if (bfregn != MLX5_IB_INVALID_BFREG) 960 982 resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn); ··· 1185 1209 void *wq; 1186 1210 int inlen; 1187 1211 int err; 1188 - int page_shift = 0; 1189 - int npages; 1190 - int ncont = 0; 1191 - u32 offset = 0; 1212 + unsigned int page_offset_quantized; 1213 + unsigned long page_size; 1192 1214 1193 - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, 1194 - &sq->ubuffer.umem, &npages, &page_shift, &ncont, 1195 - &offset); 1196 - if (err) 1197 - return err; 1215 + sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, 1216 + ubuffer->buf_size, 0); 1217 + if (IS_ERR(sq->ubuffer.umem)) 1218 + return PTR_ERR(sq->ubuffer.umem); 1219 + page_size = mlx5_umem_find_best_quantized_pgoff( 1220 + ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, 1221 + page_offset, 64, &page_offset_quantized); 1222 + if (!page_size) { 1223 + err = -EINVAL; 1224 + goto err_umem; 1225 + } 1198 1226 1199 - inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont; 1227 + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + 1228 + sizeof(u64) * 1229 + ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size); 1200 1230 in = kvzalloc(inlen, GFP_KERNEL); 1201 1231 if (!in) { 1202 1232 err = -ENOMEM; ··· 1230 1248 MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); 1231 1249 MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); 1232 1250 MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size)); 1233 - MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT); 1234 - MLX5_SET(wq, wq, page_offset, offset); 1251 + MLX5_SET(wq, wq, log_wq_pg_sz, 1252 + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); 1253 + MLX5_SET(wq, wq, page_offset, page_offset_quantized); 1235 1254 1236 1255 pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); 1237 - mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0); 1256 + mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0); 1238 1257 1239 1258 err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp); 1240 1259 ··· 1261 1278 ib_umem_release(sq->ubuffer.umem); 1262 1279 } 1263 1280 1264 - static size_t get_rq_pas_size(void *qpc) 1265 - { 1266 - u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12; 1267 - u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride); 1268 - u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size); 1269 - u32 page_offset = MLX5_GET(qpc, qpc, page_offset); 1270 - u32 po_quanta = 1 << (log_page_size - 6); 1271 - u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride); 1272 - u32 page_size = 1 << log_page_size; 1273 - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); 1274 - u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; 1275 - 1276 - return rq_num_pas * sizeof(u64); 1277 - } 1278 - 1279 1281 static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, 1280 1282 struct mlx5_ib_rq *rq, void *qpin, 1281 - size_t qpinlen, struct ib_pd *pd) 1283 + struct ib_pd *pd) 1282 1284 { 1283 1285 struct mlx5_ib_qp *mqp = rq->base.container_mibqp; 1284 1286 __be64 *pas; 1285 - __be64 *qp_pas; 1286 1287 void *in; 1287 1288 void *rqc; 1288 1289 void *wq; 1289 1290 void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); 1290 - size_t rq_pas_size = get_rq_pas_size(qpc); 1291 + struct ib_umem *umem = rq->base.ubuffer.umem; 1292 + unsigned int page_offset_quantized; 1293 + unsigned long page_size = 0; 1291 1294 size_t inlen; 1292 1295 int err; 1293 1296 1294 - if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas)) 1297 + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, 1298 + MLX5_ADAPTER_PAGE_SHIFT, 1299 + page_offset, 64, 1300 + &page_offset_quantized); 1301 + if (!page_size) 1295 1302 return -EINVAL; 1296 1303 1297 - inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size; 1304 + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + 1305 + sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size); 1298 1306 in = kvzalloc(inlen, GFP_KERNEL); 1299 1307 if (!in) 1300 1308 return -ENOMEM; ··· 1307 1333 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); 1308 1334 if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING) 1309 1335 MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); 1310 - MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset)); 1336 + MLX5_SET(wq, wq, page_offset, page_offset_quantized); 1311 1337 MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); 1312 1338 MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); 1313 1339 MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4); 1314 - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size)); 1340 + MLX5_SET(wq, wq, log_wq_pg_sz, 1341 + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); 1315 1342 MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size)); 1316 1343 1317 1344 pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); 1318 - qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas); 1319 - memcpy(pas, qp_pas, rq_pas_size); 1345 + mlx5_ib_populate_pas(umem, page_size, pas, 0); 1320 1346 1321 1347 err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp); 1322 1348 ··· 1437 1463 rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; 1438 1464 if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) 1439 1465 rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; 1440 - err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); 1466 + err = create_raw_packet_qp_rq(dev, rq, in, pd); 1441 1467 if (err) 1442 1468 goto err_destroy_sq; 1443 1469 ··· 2410 2436 } 2411 2437 2412 2438 qp->state = IB_QPS_RESET; 2413 - 2439 + rdma_restrack_no_track(&qp->ibqp.res); 2414 2440 return 0; 2415 2441 } 2416 2442 ··· 2434 2460 case IB_QPT_GSI: 2435 2461 if (dev->profile == &raw_eth_profile) 2436 2462 goto out; 2463 + fallthrough; 2437 2464 case IB_QPT_RAW_PACKET: 2438 2465 case IB_QPT_UD: 2439 2466 case MLX5_IB_QPT_REG_UMR: ··· 2687 2712 process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, 2688 2713 true, qp); 2689 2714 2690 - if (create_flags) 2715 + if (create_flags) { 2691 2716 mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n", 2692 2717 create_flags); 2693 - 2694 - return (create_flags) ? -EINVAL : 0; 2718 + return -EOPNOTSUPP; 2719 + } 2720 + return 0; 2695 2721 } 2696 2722 2697 2723 static int process_udata_size(struct mlx5_ib_dev *dev, ··· 3078 3102 return 5; 3079 3103 default: 3080 3104 return rate + MLX5_STAT_RATE_OFFSET; 3081 - }; 3105 + } 3082 3106 3083 3107 return 0; 3084 3108 } ··· 4223 4247 int err = -EINVAL; 4224 4248 int port; 4225 4249 4250 + if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) 4251 + return -EOPNOTSUPP; 4252 + 4226 4253 if (ibqp->rwq_ind_tbl) 4227 4254 return -ENOSYS; 4228 4255 ··· 4555 4576 pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 4556 4577 alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path); 4557 4578 4558 - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { 4579 + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || 4580 + qp->ibqp.qp_type == IB_QPT_XRC_INI || 4581 + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { 4559 4582 to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path); 4560 4583 to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path); 4561 4584 qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index); ··· 4863 4882 MLX5_SET(rqc, rqc, delay_drop_en, 1); 4864 4883 } 4865 4884 rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); 4866 - mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); 4885 + mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0); 4867 4886 err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp); 4868 4887 if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { 4869 4888 err = set_delay_drop(dev);
+1 -1
drivers/infiniband/hw/mlx5/restrack.c
··· 116 116 { 117 117 struct mlx5_ib_mr *mr = to_mmr(ibmr); 118 118 119 - return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY, 119 + return fill_res_raw(msg, mr_to_mdev(mr), MLX5_SGMT_TYPE_PRM_QUERY_MKEY, 120 120 mlx5_mkey_to_idx(mr->mmkey.key)); 121 121 } 122 122
+7 -27
drivers/infiniband/hw/mlx5/srq.c
··· 51 51 udata, struct mlx5_ib_ucontext, ibucontext); 52 52 size_t ucmdlen; 53 53 int err; 54 - int npages; 55 - int page_shift; 56 - int ncont; 57 - u32 offset; 58 54 u32 uidx = MLX5_IB_DEFAULT_UIDX; 59 55 60 56 ucmdlen = min(udata->inlen, sizeof(ucmd)); ··· 82 86 err = PTR_ERR(srq->umem); 83 87 return err; 84 88 } 85 - 86 - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, 87 - &page_shift, &ncont, NULL); 88 - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, 89 - &offset); 90 - if (err) { 91 - mlx5_ib_warn(dev, "bad offset\n"); 92 - goto err_umem; 93 - } 94 - 95 - in->pas = kvcalloc(ncont, sizeof(*in->pas), GFP_KERNEL); 96 - if (!in->pas) { 97 - err = -ENOMEM; 98 - goto err_umem; 99 - } 100 - 101 - mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); 89 + in->umem = srq->umem; 102 90 103 91 err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); 104 92 if (err) { 105 93 mlx5_ib_dbg(dev, "map doorbell failed\n"); 106 - goto err_in; 94 + goto err_umem; 107 95 } 108 96 109 - in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 110 - in->page_offset = offset; 111 97 in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; 112 98 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && 113 99 in->type != IB_SRQT_BASIC) 114 100 in->user_index = uidx; 115 101 116 102 return 0; 117 - 118 - err_in: 119 - kvfree(in->pas); 120 103 121 104 err_umem: 122 105 ib_umem_release(srq->umem); ··· 200 225 int err; 201 226 struct mlx5_srq_attr in = {}; 202 227 __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 228 + 229 + if (init_attr->srq_type != IB_SRQT_BASIC && 230 + init_attr->srq_type != IB_SRQT_XRC && 231 + init_attr->srq_type != IB_SRQT_TM) 232 + return -EOPNOTSUPP; 203 233 204 234 /* Sanity check SRQ size before proceeding */ 205 235 if (init_attr->attr.max_wr >= max_srq_wqes) {
+1
drivers/infiniband/hw/mlx5/srq.h
··· 28 28 u32 user_index; 29 29 u64 db_record; 30 30 __be64 *pas; 31 + struct ib_umem *umem; 31 32 u32 tm_log_list_size; 32 33 u32 tm_next_tag; 33 34 u32 tm_hw_phase_cnt;
+76 -4
drivers/infiniband/hw/mlx5/srq_cmd.c
··· 92 92 return srq; 93 93 } 94 94 95 + static int __set_srq_page_size(struct mlx5_srq_attr *in, 96 + unsigned long page_size) 97 + { 98 + if (!page_size) 99 + return -EINVAL; 100 + in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; 101 + 102 + if (WARN_ON(get_pas_size(in) != 103 + ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64))) 104 + return -EINVAL; 105 + return 0; 106 + } 107 + 108 + #define set_srq_page_size(in, typ, log_pgsz_fld) \ 109 + __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \ 110 + (in)->umem, typ, log_pgsz_fld, \ 111 + MLX5_ADAPTER_PAGE_SHIFT, page_offset, \ 112 + 64, &(in)->page_offset)) 113 + 95 114 static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, 96 115 struct mlx5_srq_attr *in) 97 116 { ··· 121 102 int pas_size; 122 103 int inlen; 123 104 int err; 105 + 106 + if (in->umem) { 107 + err = set_srq_page_size(in, srqc, log_page_size); 108 + if (err) 109 + return err; 110 + } 124 111 125 112 pas_size = get_pas_size(in); 126 113 inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; ··· 139 114 pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); 140 115 141 116 set_srqc(srqc, in); 142 - memcpy(pas, in->pas, pas_size); 117 + if (in->umem) 118 + mlx5_ib_populate_pas( 119 + in->umem, 120 + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), 121 + pas, 0); 122 + else 123 + memcpy(pas, in->pas, pas_size); 143 124 144 125 MLX5_SET(create_srq_in, create_in, opcode, 145 126 MLX5_CMD_OP_CREATE_SRQ); ··· 225 194 int inlen; 226 195 int err; 227 196 197 + if (in->umem) { 198 + err = set_srq_page_size(in, xrc_srqc, log_page_size); 199 + if (err) 200 + return err; 201 + } 202 + 228 203 pas_size = get_pas_size(in); 229 204 inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; 230 205 create_in = kvzalloc(inlen, GFP_KERNEL); ··· 244 207 245 208 set_srqc(xrc_srqc, in); 246 209 MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); 247 - memcpy(pas, in->pas, pas_size); 210 + if (in->umem) 211 + mlx5_ib_populate_pas( 212 + in->umem, 213 + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), 214 + pas, 0); 215 + else 216 + memcpy(pas, in->pas, pas_size); 248 217 MLX5_SET(create_xrc_srq_in, create_in, opcode, 249 218 MLX5_CMD_OP_CREATE_XRC_SRQ); 250 219 ··· 332 289 void *create_in = NULL; 333 290 void *rmpc; 334 291 void *wq; 292 + void *pas; 335 293 int pas_size; 336 294 int outlen; 337 295 int inlen; 338 296 int err; 297 + 298 + if (in->umem) { 299 + err = set_srq_page_size(in, wq, log_wq_pg_sz); 300 + if (err) 301 + return err; 302 + } 339 303 340 304 pas_size = get_pas_size(in); 341 305 inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; ··· 359 309 360 310 MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); 361 311 MLX5_SET(create_rmp_in, create_in, uid, in->uid); 312 + pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas); 313 + 362 314 set_wq(wq, in); 363 - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); 315 + if (in->umem) 316 + mlx5_ib_populate_pas( 317 + in->umem, 318 + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), 319 + pas, 0); 320 + else 321 + memcpy(pas, in->pas, pas_size); 364 322 365 323 MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); 366 324 err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); ··· 479 421 void *create_in; 480 422 void *xrqc; 481 423 void *wq; 424 + void *pas; 482 425 int pas_size; 483 426 int inlen; 484 427 int err; 428 + 429 + if (in->umem) { 430 + err = set_srq_page_size(in, wq, log_wq_pg_sz); 431 + if (err) 432 + return err; 433 + } 485 434 486 435 pas_size = get_pas_size(in); 487 436 inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; ··· 498 433 499 434 xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); 500 435 wq = MLX5_ADDR_OF(xrqc, xrqc, wq); 436 + pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas); 501 437 502 438 set_wq(wq, in); 503 - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); 439 + if (in->umem) 440 + mlx5_ib_populate_pas( 441 + in->umem, 442 + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), 443 + pas, 0); 444 + else 445 + memcpy(pas, in->pas, pas_size); 504 446 505 447 if (in->type == IB_SRQT_TM) { 506 448 MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
+1 -1
drivers/infiniband/hw/mthca/mthca_cq.c
··· 604 604 entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; 605 605 break; 606 606 default: 607 - entry->opcode = MTHCA_OPCODE_INVALID; 607 + entry->opcode = 0xFF; 608 608 break; 609 609 } 610 610 } else {
-1
drivers/infiniband/hw/mthca/mthca_dev.h
··· 105 105 MTHCA_OPCODE_ATOMIC_CS = 0x11, 106 106 MTHCA_OPCODE_ATOMIC_FA = 0x12, 107 107 MTHCA_OPCODE_BIND_MW = 0x18, 108 - MTHCA_OPCODE_INVALID = 0xff 109 108 }; 110 109 111 110 enum {
+22 -41
drivers/infiniband/hw/mthca/mthca_provider.c
··· 470 470 int err; 471 471 472 472 if (init_attr->create_flags) 473 - return ERR_PTR(-EINVAL); 473 + return ERR_PTR(-EOPNOTSUPP); 474 474 475 475 switch (init_attr->qp_type) { 476 476 case IB_QPT_RC: ··· 612 612 udata, struct mthca_ucontext, ibucontext); 613 613 614 614 if (attr->flags) 615 - return -EINVAL; 615 + return -EOPNOTSUPP; 616 616 617 617 if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) 618 618 return -EINVAL; ··· 961 961 struct mthca_dev *dev = 962 962 rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); 963 963 964 - return sprintf(buf, "%x\n", dev->rev_id); 964 + return sysfs_emit(buf, "%x\n", dev->rev_id); 965 965 } 966 966 static DEVICE_ATTR_RO(hw_rev); 967 + 968 + static const char *hca_type_string(int hca_type) 969 + { 970 + switch (hca_type) { 971 + case PCI_DEVICE_ID_MELLANOX_TAVOR: 972 + return "MT23108"; 973 + case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT: 974 + return "MT25208 (MT23108 compat mode)"; 975 + case PCI_DEVICE_ID_MELLANOX_ARBEL: 976 + return "MT25208"; 977 + case PCI_DEVICE_ID_MELLANOX_SINAI: 978 + case PCI_DEVICE_ID_MELLANOX_SINAI_OLD: 979 + return "MT25204"; 980 + } 981 + 982 + return "unknown"; 983 + } 967 984 968 985 static ssize_t hca_type_show(struct device *device, 969 986 struct device_attribute *attr, char *buf) ··· 988 971 struct mthca_dev *dev = 989 972 rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); 990 973 991 - switch (dev->pdev->device) { 992 - case PCI_DEVICE_ID_MELLANOX_TAVOR: 993 - return sprintf(buf, "MT23108\n"); 994 - case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT: 995 - return sprintf(buf, "MT25208 (MT23108 compat mode)\n"); 996 - case PCI_DEVICE_ID_MELLANOX_ARBEL: 997 - return sprintf(buf, "MT25208\n"); 998 - case PCI_DEVICE_ID_MELLANOX_SINAI: 999 - case PCI_DEVICE_ID_MELLANOX_SINAI_OLD: 1000 - return sprintf(buf, "MT25204\n"); 1001 - default: 1002 - return sprintf(buf, "unknown\n"); 1003 - } 974 + return sysfs_emit(buf, "%s\n", hca_type_string(dev->pdev->device)); 1004 975 } 1005 976 static DEVICE_ATTR_RO(hca_type); 1006 977 ··· 998 993 struct mthca_dev *dev = 999 994 rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); 1000 995 1001 - return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); 996 + return sysfs_emit(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); 1002 997 } 1003 998 static DEVICE_ATTR_RO(board_id); 1004 999 ··· 1163 1158 if (ret) 1164 1159 return ret; 1165 1160 1166 - dev->ib_dev.uverbs_cmd_mask = 1167 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 1168 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 1169 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 1170 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 1171 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 1172 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 1173 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 1174 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 1175 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 1176 - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 1177 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 1178 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 1179 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 1180 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 1181 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 1182 - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 1183 - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); 1184 1161 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 1185 1162 dev->ib_dev.phys_port_cnt = dev->limits.num_ports; 1186 1163 dev->ib_dev.num_comp_vectors = 1; 1187 1164 dev->ib_dev.dev.parent = &dev->pdev->dev; 1188 1165 1189 1166 if (dev->mthca_flags & MTHCA_FLAG_SRQ) { 1190 - dev->ib_dev.uverbs_cmd_mask |= 1191 - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 1192 - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 1193 - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 1194 - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); 1195 - 1196 1167 if (mthca_is_memfree(dev)) 1197 1168 ib_set_device_ops(&dev->ib_dev, 1198 1169 &mthca_dev_arbel_srq_ops);
+3
drivers/infiniband/hw/mthca/mthca_qp.c
··· 863 863 enum ib_qp_state cur_state, new_state; 864 864 int err = -EINVAL; 865 865 866 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 867 + return -EOPNOTSUPP; 868 + 866 869 mutex_lock(&qp->mutex); 867 870 if (attr_mask & IB_QP_CUR_STATE) { 868 871 cur_state = attr->cur_qp_state;
+5 -37
drivers/infiniband/hw/ocrdma/ocrdma_main.c
··· 119 119 struct ocrdma_dev *dev = 120 120 rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); 121 121 122 - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); 122 + return sysfs_emit(buf, "0x%x\n", dev->nic_info.pdev->vendor); 123 123 } 124 124 static DEVICE_ATTR_RO(hw_rev); 125 125 ··· 129 129 struct ocrdma_dev *dev = 130 130 rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); 131 131 132 - return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); 132 + return sysfs_emit(buf, "%s\n", &dev->model_number[0]); 133 133 } 134 134 static DEVICE_ATTR_RO(hca_type); 135 135 ··· 154 154 .create_ah = ocrdma_create_ah, 155 155 .create_cq = ocrdma_create_cq, 156 156 .create_qp = ocrdma_create_qp, 157 + .create_user_ah = ocrdma_create_ah, 157 158 .dealloc_pd = ocrdma_dealloc_pd, 158 159 .dealloc_ucontext = ocrdma_dealloc_ucontext, 159 160 .dereg_mr = ocrdma_dereg_mr, ··· 205 204 BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); 206 205 memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, 207 206 sizeof(OCRDMA_NODE_DESC)); 208 - dev->ibdev.uverbs_cmd_mask = 209 - OCRDMA_UVERBS(GET_CONTEXT) | 210 - OCRDMA_UVERBS(QUERY_DEVICE) | 211 - OCRDMA_UVERBS(QUERY_PORT) | 212 - OCRDMA_UVERBS(ALLOC_PD) | 213 - OCRDMA_UVERBS(DEALLOC_PD) | 214 - OCRDMA_UVERBS(REG_MR) | 215 - OCRDMA_UVERBS(DEREG_MR) | 216 - OCRDMA_UVERBS(CREATE_COMP_CHANNEL) | 217 - OCRDMA_UVERBS(CREATE_CQ) | 218 - OCRDMA_UVERBS(RESIZE_CQ) | 219 - OCRDMA_UVERBS(DESTROY_CQ) | 220 - OCRDMA_UVERBS(REQ_NOTIFY_CQ) | 221 - OCRDMA_UVERBS(CREATE_QP) | 222 - OCRDMA_UVERBS(MODIFY_QP) | 223 - OCRDMA_UVERBS(QUERY_QP) | 224 - OCRDMA_UVERBS(DESTROY_QP) | 225 - OCRDMA_UVERBS(POLL_CQ) | 226 - OCRDMA_UVERBS(POST_SEND) | 227 - OCRDMA_UVERBS(POST_RECV); 228 - 229 - dev->ibdev.uverbs_cmd_mask |= 230 - OCRDMA_UVERBS(CREATE_AH) | 231 - OCRDMA_UVERBS(MODIFY_AH) | 232 - OCRDMA_UVERBS(QUERY_AH) | 233 - OCRDMA_UVERBS(DESTROY_AH); 234 207 235 208 dev->ibdev.node_type = RDMA_NODE_IB_CA; 236 209 dev->ibdev.phys_port_cnt = 1; ··· 215 240 216 241 ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops); 217 242 218 - if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { 219 - dev->ibdev.uverbs_cmd_mask |= 220 - OCRDMA_UVERBS(CREATE_SRQ) | 221 - OCRDMA_UVERBS(MODIFY_SRQ) | 222 - OCRDMA_UVERBS(QUERY_SRQ) | 223 - OCRDMA_UVERBS(DESTROY_SRQ) | 224 - OCRDMA_UVERBS(POST_SRQ_RECV); 225 - 243 + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) 226 244 ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); 227 - } 245 + 228 246 rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); 229 247 ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1); 230 248 if (ret)
+10 -1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
··· 974 974 struct ocrdma_create_cq_ureq ureq; 975 975 976 976 if (attr->flags) 977 - return -EINVAL; 977 + return -EOPNOTSUPP; 978 978 979 979 if (udata) { 980 980 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) ··· 1299 1299 struct ocrdma_create_qp_ureq ureq; 1300 1300 u16 dpp_credit_lmt, dpp_offset; 1301 1301 1302 + if (attrs->create_flags) 1303 + return ERR_PTR(-EOPNOTSUPP); 1304 + 1302 1305 status = ocrdma_check_qp_params(ibpd, dev, attrs, udata); 1303 1306 if (status) 1304 1307 goto gen_err; ··· 1393 1390 struct ocrdma_qp *qp; 1394 1391 struct ocrdma_dev *dev; 1395 1392 enum ib_qp_state old_qps, new_qps; 1393 + 1394 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 1395 + return -EOPNOTSUPP; 1396 1396 1397 1397 qp = get_ocrdma_qp(ibqp); 1398 1398 dev = get_ocrdma_dev(ibqp->device); ··· 1775 1769 struct ocrdma_pd *pd = get_ocrdma_pd(ibsrq->pd); 1776 1770 struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); 1777 1771 struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq); 1772 + 1773 + if (init_attr->srq_type != IB_SRQT_BASIC) 1774 + return -EOPNOTSUPP; 1778 1775 1779 1776 if (init_attr->attr.max_sge > dev->attr.max_recv_sge) 1780 1777 return -EINVAL;
+5 -34
drivers/infiniband/hw/qedr/main.c
··· 124 124 struct qedr_dev *dev = 125 125 rdma_device_to_drv_device(device, struct qedr_dev, ibdev); 126 126 127 - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver); 127 + return sysfs_emit(buf, "0x%x\n", dev->attr.hw_ver); 128 128 } 129 129 static DEVICE_ATTR_RO(hw_rev); 130 130 ··· 134 134 struct qedr_dev *dev = 135 135 rdma_device_to_drv_device(device, struct qedr_dev, ibdev); 136 136 137 - return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n", 138 - dev->pdev->device, 139 - rdma_protocol_iwarp(&dev->ibdev, 1) ? 140 - "iWARP" : "RoCE"); 137 + return sysfs_emit(buf, "FastLinQ QL%x %s\n", dev->pdev->device, 138 + rdma_protocol_iwarp(&dev->ibdev, 1) ? "iWARP" : 139 + "RoCE"); 141 140 } 142 141 static DEVICE_ATTR_RO(hca_type); 143 142 ··· 187 188 dev->ibdev.node_type = RDMA_NODE_IB_CA; 188 189 189 190 ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); 190 - 191 - dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) | 192 - QEDR_UVERBS(CLOSE_XRCD) | 193 - QEDR_UVERBS(CREATE_XSRQ); 194 191 } 195 192 196 193 static const struct ib_device_ops qedr_dev_ops = { ··· 243 248 244 249 dev->ibdev.node_guid = dev->attr.node_guid; 245 250 memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); 246 - 247 - dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) | 248 - QEDR_UVERBS(QUERY_DEVICE) | 249 - QEDR_UVERBS(QUERY_PORT) | 250 - QEDR_UVERBS(ALLOC_PD) | 251 - QEDR_UVERBS(DEALLOC_PD) | 252 - QEDR_UVERBS(CREATE_COMP_CHANNEL) | 253 - QEDR_UVERBS(CREATE_CQ) | 254 - QEDR_UVERBS(RESIZE_CQ) | 255 - QEDR_UVERBS(DESTROY_CQ) | 256 - QEDR_UVERBS(REQ_NOTIFY_CQ) | 257 - QEDR_UVERBS(CREATE_QP) | 258 - QEDR_UVERBS(MODIFY_QP) | 259 - QEDR_UVERBS(QUERY_QP) | 260 - QEDR_UVERBS(DESTROY_QP) | 261 - QEDR_UVERBS(CREATE_SRQ) | 262 - QEDR_UVERBS(DESTROY_SRQ) | 263 - QEDR_UVERBS(QUERY_SRQ) | 264 - QEDR_UVERBS(MODIFY_SRQ) | 265 - QEDR_UVERBS(POST_SRQ_RECV) | 266 - QEDR_UVERBS(REG_MR) | 267 - QEDR_UVERBS(DEREG_MR) | 268 - QEDR_UVERBS(POLL_CQ) | 269 - QEDR_UVERBS(POST_SEND) | 270 - QEDR_UVERBS(POST_RECV); 271 251 272 252 if (IS_IWARP(dev)) { 273 253 rc = qedr_iw_register_device(dev); ··· 766 796 } 767 797 xa_unlock_irqrestore(&dev->srqs, flags); 768 798 DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); 799 + break; 769 800 default: 770 801 break; 771 802 }
+13
drivers/infiniband/hw/qedr/verbs.c
··· 928 928 "create_cq: called from %s. entries=%d, vector=%d\n", 929 929 udata ? "User Lib" : "Kernel", entries, vector); 930 930 931 + if (attr->flags) 932 + return -EOPNOTSUPP; 933 + 931 934 if (entries > QEDR_MAX_CQES) { 932 935 DP_ERR(dev, 933 936 "create cq: the number of entries %d is too high. Must be equal or below %d.\n", ··· 1548 1545 DP_DEBUG(dev, QEDR_MSG_QP, 1549 1546 "create SRQ called from %s (pd %p)\n", 1550 1547 (udata) ? "User lib" : "kernel", pd); 1548 + 1549 + if (init_attr->srq_type != IB_SRQT_BASIC && 1550 + init_attr->srq_type != IB_SRQT_XRC) 1551 + return -EOPNOTSUPP; 1551 1552 1552 1553 rc = qedr_check_srq_params(dev, init_attr, udata); 1553 1554 if (rc) ··· 2248 2241 struct ib_qp *ibqp; 2249 2242 int rc = 0; 2250 2243 2244 + if (attrs->create_flags) 2245 + return ERR_PTR(-EOPNOTSUPP); 2246 + 2251 2247 if (attrs->qp_type == IB_QPT_XRC_TGT) { 2252 2248 xrcd = get_qedr_xrcd(attrs->xrcd); 2253 2249 dev = get_qedr_dev(xrcd->ibxrcd.device); ··· 2486 2476 DP_DEBUG(dev, QEDR_MSG_QP, 2487 2477 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask, 2488 2478 attr->qp_state); 2479 + 2480 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 2481 + return -EOPNOTSUPP; 2489 2482 2490 2483 old_qp_state = qedr_get_ibqp_state(qp->state); 2491 2484 if (attr_mask & IB_QP_STATE)
+2 -9
drivers/infiniband/hw/qib/qib_pcie.c
··· 90 90 goto bail; 91 91 } 92 92 93 - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 93 + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 94 94 if (ret) { 95 95 /* 96 96 * If the 64 bit setup fails, try 32 bit. Some systems 97 97 * do not setup 64 bit maps on systems with 2GB or less 98 98 * memory installed. 99 99 */ 100 - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 100 + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); 101 101 if (ret) { 102 102 qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret); 103 103 goto bail; 104 104 } 105 - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 106 - } else 107 - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 108 - if (ret) { 109 - qib_early_err(&pdev->dev, 110 - "Unable to set DMA consistent mask: %d\n", ret); 111 - goto bail; 112 105 } 113 106 114 107 pci_set_master(pdev);
+43 -53
drivers/infiniband/hw/qib/qib_sysfs.c
··· 43 43 static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf) 44 44 { 45 45 struct qib_devdata *dd = ppd->dd; 46 - int ret; 47 46 48 - ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT); 49 - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); 50 - return ret; 47 + return sysfs_emit(buf, "%d\n", dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT)); 51 48 } 52 49 53 50 static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, ··· 103 106 104 107 static ssize_t show_status(struct qib_pportdata *ppd, char *buf) 105 108 { 106 - ssize_t ret; 107 - 108 109 if (!ppd->statusp) 109 - ret = -EINVAL; 110 - else 111 - ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", 112 - (unsigned long long) *(ppd->statusp)); 113 - return ret; 110 + return -EINVAL; 111 + 112 + return sysfs_emit(buf, "0x%llx\n", (unsigned long long)*(ppd->statusp)); 114 113 } 115 114 116 115 /* ··· 385 392 container_of(kobj, struct qib_pportdata, sl2vl_kobj); 386 393 struct qib_ibport *qibp = &ppd->ibport_data; 387 394 388 - return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); 395 + return sysfs_emit(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); 389 396 } 390 397 391 398 static const struct sysfs_ops qib_sl2vl_ops = { ··· 494 501 struct qib_pportdata *ppd = 495 502 container_of(kobj, struct qib_pportdata, diagc_kobj); 496 503 struct qib_ibport *qibp = &ppd->ibport_data; 504 + u64 val; 497 505 498 506 if (!strncmp(dattr->attr.name, "rc_acks", 7)) 499 - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks)); 507 + val = READ_PER_CPU_CNTR(rc_acks); 500 508 else if (!strncmp(dattr->attr.name, "rc_qacks", 8)) 501 - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks)); 509 + val = READ_PER_CPU_CNTR(rc_qacks); 502 510 else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15)) 503 - return sprintf(buf, "%llu\n", 504 - READ_PER_CPU_CNTR(rc_delayed_comp)); 511 + val = READ_PER_CPU_CNTR(rc_delayed_comp); 505 512 else 506 - return sprintf(buf, "%u\n", 507 - *(u32 *)((char *)qibp + dattr->counter)); 513 + val = *(u32 *)((char *)qibp + dattr->counter); 514 + 515 + return sysfs_emit(buf, "%llu\n", val); 508 516 } 509 517 510 518 static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, ··· 559 565 struct qib_ibdev *dev = 560 566 rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); 561 567 562 - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); 568 + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); 563 569 } 564 570 static DEVICE_ATTR_RO(hw_rev); 565 571 ··· 569 575 struct qib_ibdev *dev = 570 576 rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); 571 577 struct qib_devdata *dd = dd_from_dev(dev); 572 - int ret; 573 578 574 579 if (!dd->boardname) 575 - ret = -EINVAL; 576 - else 577 - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); 578 - return ret; 580 + return -EINVAL; 581 + return sysfs_emit(buf, "%s\n", dd->boardname); 579 582 } 580 583 static DEVICE_ATTR_RO(hca_type); 581 584 static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL); ··· 581 590 struct device_attribute *attr, char *buf) 582 591 { 583 592 /* The string printed here is already newline-terminated. */ 584 - return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version); 593 + return sysfs_emit(buf, "%s", (char *)ib_qib_version); 585 594 } 586 595 static DEVICE_ATTR_RO(version); 587 596 ··· 593 602 struct qib_devdata *dd = dd_from_dev(dev); 594 603 595 604 /* The string printed here is already newline-terminated. */ 596 - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); 605 + return sysfs_emit(buf, "%s", dd->boardversion); 597 606 } 598 607 static DEVICE_ATTR_RO(boardversion); 599 608 ··· 605 614 struct qib_devdata *dd = dd_from_dev(dev); 606 615 607 616 /* The string printed here is already newline-terminated. */ 608 - return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info); 617 + return sysfs_emit(buf, "%s", dd->lbus_info); 609 618 } 610 619 static DEVICE_ATTR_RO(localbus_info); 611 620 ··· 619 628 /* Return the number of user ports (contexts) available. */ 620 629 /* The calculation below deals with a special case where 621 630 * cfgctxts is set to 1 on a single-port board. */ 622 - return scnprintf(buf, PAGE_SIZE, "%u\n", 623 - (dd->first_user_ctxt > dd->cfgctxts) ? 0 : 624 - (dd->cfgctxts - dd->first_user_ctxt)); 631 + return sysfs_emit(buf, "%u\n", 632 + (dd->first_user_ctxt > dd->cfgctxts) ? 633 + 0 : 634 + (dd->cfgctxts - dd->first_user_ctxt)); 625 635 } 626 636 static DEVICE_ATTR_RO(nctxts); 627 637 ··· 634 642 struct qib_devdata *dd = dd_from_dev(dev); 635 643 636 644 /* Return the number of free user ports (contexts) available. */ 637 - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); 645 + return sysfs_emit(buf, "%u\n", dd->freectxts); 638 646 } 639 647 static DEVICE_ATTR_RO(nfreectxts); 640 648 641 - static ssize_t serial_show(struct device *device, 642 - struct device_attribute *attr, char *buf) 649 + static ssize_t serial_show(struct device *device, struct device_attribute *attr, 650 + char *buf) 643 651 { 644 652 struct qib_ibdev *dev = 645 653 rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); 646 654 struct qib_devdata *dd = dd_from_dev(dev); 655 + const u8 *end = memchr(dd->serial, 0, ARRAY_SIZE(dd->serial)); 656 + int size = end ? end - dd->serial : ARRAY_SIZE(dd->serial); 647 657 648 - buf[sizeof(dd->serial)] = '\0'; 649 - memcpy(buf, dd->serial, sizeof(dd->serial)); 650 - strcat(buf, "\n"); 651 - return strlen(buf); 658 + return sysfs_emit(buf, ".%*s\n", size, dd->serial); 652 659 } 653 660 static DEVICE_ATTR_RO(serial); 654 661 ··· 680 689 struct qib_ibdev *dev = 681 690 rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); 682 691 struct qib_devdata *dd = dd_from_dev(dev); 683 - int ret; 684 - int idx; 692 + int i; 685 693 u8 regvals[8]; 686 694 687 - ret = -ENXIO; 688 - for (idx = 0; idx < 8; ++idx) { 689 - if (idx == 6) 695 + for (i = 0; i < 8; i++) { 696 + int ret; 697 + 698 + if (i == 6) 690 699 continue; 691 - ret = dd->f_tempsense_rd(dd, idx); 700 + ret = dd->f_tempsense_rd(dd, i); 692 701 if (ret < 0) 693 - break; 694 - regvals[idx] = ret; 702 + return ret; /* return error on bad read */ 703 + regvals[i] = ret; 695 704 } 696 - if (idx == 8) 697 - ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", 698 - *(signed char *)(regvals), 699 - *(signed char *)(regvals + 1), 700 - regvals[2], regvals[3], 701 - *(signed char *)(regvals + 5), 702 - *(signed char *)(regvals + 7)); 703 - return ret; 705 + return sysfs_emit(buf, "%d %d %02X %02X %d %d\n", 706 + (signed char)regvals[0], 707 + (signed char)regvals[1], 708 + regvals[2], 709 + regvals[3], 710 + (signed char)regvals[5], 711 + (signed char)regvals[7]); 704 712 } 705 713 static DEVICE_ATTR_RO(tempsense); 706 714
-19
drivers/infiniband/hw/usnic/usnic_ib_main.c
··· 398 398 us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; 399 399 us_ibdev->ib_dev.dev.parent = &dev->dev; 400 400 401 - us_ibdev->ib_dev.uverbs_cmd_mask = 402 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 403 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 404 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 405 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 406 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 407 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 408 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 409 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 410 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 411 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 412 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 413 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 414 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 415 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 416 - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 417 - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 418 - (1ull << IB_USER_VERBS_CMD_OPEN_QP); 419 - 420 401 ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); 421 402 422 403 rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group);
+40 -60
drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
··· 57 57 subsystem_device_id = us_ibdev->pdev->subsystem_device; 58 58 mutex_unlock(&us_ibdev->usdev_lock); 59 59 60 - return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); 60 + return sysfs_emit(buf, "%u\n", subsystem_device_id); 61 61 } 62 62 static DEVICE_ATTR_RO(board_id); 63 63 ··· 69 69 { 70 70 struct usnic_ib_dev *us_ibdev = 71 71 rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); 72 - char *ptr; 73 - unsigned left; 74 - unsigned n; 75 72 enum usnic_vnic_res_type res_type; 76 - 77 - /* Buffer space limit is 1 page */ 78 - ptr = buf; 79 - left = PAGE_SIZE; 73 + int len; 80 74 81 75 mutex_lock(&us_ibdev->usdev_lock); 82 76 if (kref_read(&us_ibdev->vf_cnt) > 0) { 83 77 char *busname; 84 - 78 + char *sep = ""; 85 79 /* 86 80 * bus name seems to come with annoying prefix. 87 81 * Remove it if it is predictable ··· 84 90 if (strncmp(busname, "PCI Bus ", 8) == 0) 85 91 busname += 8; 86 92 87 - n = scnprintf(ptr, left, 88 - "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", 89 - dev_name(&us_ibdev->ib_dev.dev), 90 - busname, 91 - PCI_SLOT(us_ibdev->pdev->devfn), 92 - PCI_FUNC(us_ibdev->pdev->devfn), 93 - netdev_name(us_ibdev->netdev), 94 - us_ibdev->ufdev->mac, 95 - kref_read(&us_ibdev->vf_cnt)); 96 - UPDATE_PTR_LEFT(n, ptr, left); 93 + len = sysfs_emit(buf, "%s: %s:%d.%d, %s, %pM, %u VFs\n", 94 + dev_name(&us_ibdev->ib_dev.dev), 95 + busname, 96 + PCI_SLOT(us_ibdev->pdev->devfn), 97 + PCI_FUNC(us_ibdev->pdev->devfn), 98 + netdev_name(us_ibdev->netdev), 99 + us_ibdev->ufdev->mac, 100 + kref_read(&us_ibdev->vf_cnt)); 97 101 102 + len += sysfs_emit_at(buf, len, " Per VF:"); 98 103 for (res_type = USNIC_VNIC_RES_TYPE_EOL; 99 - res_type < USNIC_VNIC_RES_TYPE_MAX; 100 - res_type++) { 104 + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { 101 105 if (us_ibdev->vf_res_cnt[res_type] == 0) 102 106 continue; 103 - n = scnprintf(ptr, left, " %d %s%s", 104 - us_ibdev->vf_res_cnt[res_type], 105 - usnic_vnic_res_type_to_str(res_type), 106 - (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ? 107 - "," : ""); 108 - UPDATE_PTR_LEFT(n, ptr, left); 107 + len += sysfs_emit_at(buf, len, "%s %d %s", 108 + sep, 109 + us_ibdev->vf_res_cnt[res_type], 110 + usnic_vnic_res_type_to_str(res_type)); 111 + sep = ","; 109 112 } 110 - n = scnprintf(ptr, left, "\n"); 111 - UPDATE_PTR_LEFT(n, ptr, left); 113 + len += sysfs_emit_at(buf, len, "\n"); 112 114 } else { 113 - n = scnprintf(ptr, left, "%s: no VFs\n", 114 - dev_name(&us_ibdev->ib_dev.dev)); 115 - UPDATE_PTR_LEFT(n, ptr, left); 115 + len = sysfs_emit(buf, "%s: no VFs\n", 116 + dev_name(&us_ibdev->ib_dev.dev)); 116 117 } 118 + 117 119 mutex_unlock(&us_ibdev->usdev_lock); 118 120 119 - return ptr - buf; 121 + return len; 120 122 } 121 123 static DEVICE_ATTR_RO(config); 122 124 ··· 122 132 struct usnic_ib_dev *us_ibdev = 123 133 rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); 124 134 125 - return scnprintf(buf, PAGE_SIZE, "%s\n", 126 - netdev_name(us_ibdev->netdev)); 135 + return sysfs_emit(buf, "%s\n", netdev_name(us_ibdev->netdev)); 127 136 } 128 137 static DEVICE_ATTR_RO(iface); 129 138 ··· 132 143 struct usnic_ib_dev *us_ibdev = 133 144 rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); 134 145 135 - return scnprintf(buf, PAGE_SIZE, "%u\n", 136 - kref_read(&us_ibdev->vf_cnt)); 146 + return sysfs_emit(buf, "%u\n", kref_read(&us_ibdev->vf_cnt)); 137 147 } 138 148 static DEVICE_ATTR_RO(max_vf); 139 149 ··· 146 158 qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], 147 159 us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); 148 160 149 - return scnprintf(buf, PAGE_SIZE, 150 - "%d\n", qp_per_vf); 161 + return sysfs_emit(buf, "%d\n", qp_per_vf); 151 162 } 152 163 static DEVICE_ATTR_RO(qp_per_vf); 153 164 ··· 156 169 struct usnic_ib_dev *us_ibdev = 157 170 rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); 158 171 159 - return scnprintf(buf, PAGE_SIZE, "%d\n", 160 - us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); 172 + return sysfs_emit(buf, "%d\n", 173 + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); 161 174 } 162 175 static DEVICE_ATTR_RO(cq_per_vf); 163 176 ··· 204 217 205 218 static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf) 206 219 { 207 - return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx); 220 + return sysfs_emit(buf, "0x%p\n", qp_grp->ctx); 208 221 } 209 222 210 223 static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) 211 224 { 212 - int i, j, n; 213 - int left; 214 - char *ptr; 225 + int i, j; 215 226 struct usnic_vnic_res_chunk *res_chunk; 216 227 struct usnic_vnic_res *vnic_res; 228 + int len; 217 229 218 - left = PAGE_SIZE; 219 - ptr = buf; 220 - 221 - n = scnprintf(ptr, left, 222 - "QPN: %d State: (%s) PID: %u VF Idx: %hu ", 223 - qp_grp->ibqp.qp_num, 224 - usnic_ib_qp_grp_state_to_string(qp_grp->state), 225 - qp_grp->owner_pid, 226 - usnic_vnic_get_index(qp_grp->vf->vnic)); 227 - UPDATE_PTR_LEFT(n, ptr, left); 230 + len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu ", 231 + qp_grp->ibqp.qp_num, 232 + usnic_ib_qp_grp_state_to_string(qp_grp->state), 233 + qp_grp->owner_pid, 234 + usnic_vnic_get_index(qp_grp->vf->vnic)); 228 235 229 236 for (i = 0; qp_grp->res_chunk_list[i]; i++) { 230 237 res_chunk = qp_grp->res_chunk_list[i]; 231 238 for (j = 0; j < res_chunk->cnt; j++) { 232 239 vnic_res = res_chunk->res[j]; 233 - n = scnprintf(ptr, left, "%s[%d] ", 240 + len += sysfs_emit_at( 241 + buf, len, "%s[%d] ", 234 242 usnic_vnic_res_type_to_str(vnic_res->type), 235 243 vnic_res->vnic_idx); 236 - UPDATE_PTR_LEFT(n, ptr, left); 237 244 } 238 245 } 239 246 240 - n = scnprintf(ptr, left, "\n"); 241 - UPDATE_PTR_LEFT(n, ptr, left); 247 + len = sysfs_emit_at(buf, len, "\n"); 242 248 243 - return ptr - buf; 249 + return len; 244 250 } 245 251 246 252 static QPN_ATTR_RO(context);
+5 -2
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
··· 474 474 us_ibdev = to_usdev(pd->device); 475 475 476 476 if (init_attr->create_flags) 477 - return ERR_PTR(-EINVAL); 477 + return ERR_PTR(-EOPNOTSUPP); 478 478 479 479 err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); 480 480 if (err) { ··· 557 557 int status; 558 558 usnic_dbg("\n"); 559 559 560 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 561 + return -EOPNOTSUPP; 562 + 560 563 qp_grp = to_uqp_grp(ibqp); 561 564 562 565 mutex_lock(&qp_grp->vf->pf->usdev_lock); ··· 584 581 struct ib_udata *udata) 585 582 { 586 583 if (attr->flags) 587 - return -EINVAL; 584 + return -EOPNOTSUPP; 588 585 589 586 return 0; 590 587 }
+3
drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
··· 119 119 120 120 BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); 121 121 122 + if (attr->flags) 123 + return -EOPNOTSUPP; 124 + 122 125 entries = roundup_pow_of_two(entries); 123 126 if (entries < 1 || entries > dev->dsr->caps.max_cqe) 124 127 return -EINVAL;
+3 -31
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
··· 68 68 static ssize_t hca_type_show(struct device *device, 69 69 struct device_attribute *attr, char *buf) 70 70 { 71 - return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 71 + return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 72 72 } 73 73 static DEVICE_ATTR_RO(hca_type); 74 74 75 75 static ssize_t hw_rev_show(struct device *device, 76 76 struct device_attribute *attr, char *buf) 77 77 { 78 - return sprintf(buf, "%d\n", PVRDMA_REV_ID); 78 + return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID); 79 79 } 80 80 static DEVICE_ATTR_RO(hw_rev); 81 81 82 82 static ssize_t board_id_show(struct device *device, 83 83 struct device_attribute *attr, char *buf) 84 84 { 85 - return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); 85 + return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID); 86 86 } 87 87 static DEVICE_ATTR_RO(board_id); 88 88 ··· 205 205 dev->flags = 0; 206 206 dev->ib_dev.num_comp_vectors = 1; 207 207 dev->ib_dev.dev.parent = &dev->pdev->dev; 208 - dev->ib_dev.uverbs_cmd_mask = 209 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 210 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 211 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 212 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 213 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 214 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 215 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 216 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 217 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 218 - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 219 - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 220 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 221 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 222 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 223 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 224 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 225 - (1ull << IB_USER_VERBS_CMD_POST_SEND) | 226 - (1ull << IB_USER_VERBS_CMD_POST_RECV) | 227 - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 228 - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); 229 208 230 209 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 231 210 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; ··· 228 249 229 250 /* Check if SRQ is supported by backend */ 230 251 if (dev->dsr->caps.max_srq) { 231 - dev->ib_dev.uverbs_cmd_mask |= 232 - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 233 - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 234 - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 235 - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 236 - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); 237 - 238 252 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); 239 253 240 254 dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq,
+4 -1
drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
··· 209 209 dev_warn(&dev->pdev->dev, 210 210 "invalid create queuepair flags %#x\n", 211 211 init_attr->create_flags); 212 - return ERR_PTR(-EINVAL); 212 + return ERR_PTR(-EOPNOTSUPP); 213 213 } 214 214 215 215 if (init_attr->qp_type != IB_QPT_RC && ··· 543 543 struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp; 544 544 enum ib_qp_state cur_state, next_state; 545 545 int ret; 546 + 547 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 548 + return -EOPNOTSUPP; 546 549 547 550 /* Sanity checking. Should need lock here */ 548 551 mutex_lock(&qp->mutex);
+1 -1
drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
··· 121 121 dev_warn(&dev->pdev->dev, 122 122 "shared receive queue type %d not supported\n", 123 123 init_attr->srq_type); 124 - return -EINVAL; 124 + return -EOPNOTSUPP; 125 125 } 126 126 127 127 if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr ||
-1
drivers/infiniband/sw/rdmavt/Kconfig
··· 4 4 depends on INFINIBAND_VIRT_DMA 5 5 depends on X86_64 6 6 depends on PCI 7 - select DMA_VIRT_OPS 8 7 help 9 8 This is a common software verbs provider for RDMA networks.
+1 -2
drivers/infiniband/sw/rdmavt/ah.c
··· 126 126 } 127 127 128 128 /** 129 - * rvt_destory_ah - Destory an address handle 129 + * rvt_destroy_ah - Destroy an address handle 130 130 * @ibah: address handle 131 131 * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) 132 - * 133 132 * Return: 0 on success 134 133 */ 135 134 int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
+1 -1
drivers/infiniband/sw/rdmavt/cq.c
··· 211 211 int err; 212 212 213 213 if (attr->flags) 214 - return -EINVAL; 214 + return -EOPNOTSUPP; 215 215 216 216 if (entries < 1 || entries > rdi->dparms.props.max_cqe) 217 217 return -EINVAL;
+6 -6
drivers/infiniband/sw/rdmavt/mcast.c
··· 54 54 #include "mcast.h" 55 55 56 56 /** 57 - * rvt_driver_mcast - init resources for multicast 57 + * rvt_driver_mcast_init - init resources for multicast 58 58 * @rdi: rvt dev struct 59 59 * 60 60 * This is per device that registers with rdmavt ··· 69 69 } 70 70 71 71 /** 72 - * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct 72 + * rvt_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct 73 73 * @qp: the QP to link 74 74 */ 75 75 static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) ··· 98 98 } 99 99 100 100 /** 101 - * mcast_alloc - allocate the multicast GID structure 101 + * rvt_mcast_alloc - allocate the multicast GID structure 102 102 * @mgid: the multicast GID 103 103 * @lid: the muilticast LID (host order) 104 104 * ··· 181 181 EXPORT_SYMBOL(rvt_mcast_find); 182 182 183 183 /** 184 - * mcast_add - insert mcast GID into table and attach QP struct 184 + * rvt_mcast_add - insert mcast GID into table and attach QP struct 185 185 * @mcast: the mcast GID table 186 186 * @mqp: the QP to attach 187 187 * ··· 426 426 } 427 427 428 428 /** 429 - *rvt_mast_tree_empty - determine if any qps are attached to any mcast group 430 - *@rdi: rvt dev struct 429 + * rvt_mcast_tree_empty - determine if any qps are attached to any mcast group 430 + * @rdi: rvt dev struct 431 431 * 432 432 * Return: in use count 433 433 */
+2 -4
drivers/infiniband/sw/rdmavt/mr.c
··· 324 324 * @acc: access flags 325 325 * 326 326 * Return: the memory region on success, otherwise returns an errno. 327 - * Note that all DMA addresses should be created via the functions in 328 - * struct dma_virt_ops. 329 327 */ 330 328 struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) 331 329 { ··· 764 766 765 767 /* 766 768 * We use LKEY == zero for kernel virtual addresses 767 - * (see rvt_get_dma_mr() and dma_virt_ops). 769 + * (see rvt_get_dma_mr()). 768 770 */ 769 771 if (sge->lkey == 0) { 770 772 struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); ··· 875 877 876 878 /* 877 879 * We use RKEY == zero for kernel virtual addresses 878 - * (see rvt_get_dma_mr() and dma_virt_ops). 880 + * (see rvt_get_dma_mr()). 879 881 */ 880 882 rcu_read_lock(); 881 883 if (rkey == 0) {
+11 -7
drivers/infiniband/sw/rdmavt/qp.c
··· 1083 1083 if (!rdi) 1084 1084 return ERR_PTR(-EINVAL); 1085 1085 1086 + if (init_attr->create_flags & ~IB_QP_CREATE_NETDEV_USE) 1087 + return ERR_PTR(-EOPNOTSUPP); 1088 + 1086 1089 if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || 1087 - init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || 1088 - (init_attr->create_flags && 1089 - init_attr->create_flags != IB_QP_CREATE_NETDEV_USE)) 1090 + init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr) 1090 1091 return ERR_PTR(-EINVAL); 1091 1092 1092 1093 /* Check receive queue parameters if no SRQ is specified. */ ··· 1470 1469 int pmtu = 0; /* for gcc warning only */ 1471 1470 int opa_ah; 1472 1471 1472 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 1473 + return -EOPNOTSUPP; 1474 + 1473 1475 spin_lock_irq(&qp->r_lock); 1474 1476 spin_lock(&qp->s_hlock); 1475 1477 spin_lock(&qp->s_lock); ··· 1827 1823 } 1828 1824 1829 1825 /** 1830 - * rvt_post_receive - post a receive on a QP 1826 + * rvt_post_recv - post a receive on a QP 1831 1827 * @ibqp: the QP to post the receive on 1832 1828 * @wr: the WR to post 1833 1829 * @bad_wr: the first bad WR is put here ··· 2249 2245 } 2250 2246 2251 2247 /** 2252 - * rvt_post_srq_receive - post a receive on a shared receive queue 2248 + * rvt_post_srq_recv - post a receive on a shared receive queue 2253 2249 * @ibsrq: the SRQ to post the receive on 2254 2250 * @wr: the list of work requests to post 2255 2251 * @bad_wr: A pointer to the first WR to cause a problem is put here ··· 2501 2497 EXPORT_SYMBOL(rvt_get_rwqe); 2502 2498 2503 2499 /** 2504 - * qp_comm_est - handle trap with QP established 2500 + * rvt_comm_est - handle trap with QP established 2505 2501 * @qp: the QP 2506 2502 */ 2507 2503 void rvt_comm_est(struct rvt_qp *qp) ··· 2947 2943 } 2948 2944 2949 2945 /** 2950 - * ruc_loopback - handle UC and RC loopback requests 2946 + * rvt_ruc_loopback - handle UC and RC loopback requests 2951 2947 * @sqp: the sending QP 2952 2948 * 2953 2949 * This is called from rvt_do_send() to forward a WQE addressed to the same HFI
+2 -34
drivers/infiniband/sw/rdmavt/vt.c
··· 384 384 .create_cq = rvt_create_cq, 385 385 .create_qp = rvt_create_qp, 386 386 .create_srq = rvt_create_srq, 387 + .create_user_ah = rvt_create_ah, 387 388 .dealloc_pd = rvt_dealloc_pd, 388 389 .dealloc_ucontext = rvt_dealloc_ucontext, 389 390 .dereg_mr = rvt_dereg_mr, ··· 525 524 int rvt_register_device(struct rvt_dev_info *rdi) 526 525 { 527 526 int ret = 0, i; 528 - u64 dma_mask; 529 527 530 528 if (!rdi) 531 529 return -EINVAL; ··· 579 579 /* Completion queues */ 580 580 spin_lock_init(&rdi->n_cqs_lock); 581 581 582 - /* DMA Operations */ 583 - rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms; 584 - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); 585 - ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask); 586 - if (ret) 587 - goto bail_wss; 588 - 589 582 /* Protection Domain */ 590 583 spin_lock_init(&rdi->n_pds_lock); 591 584 rdi->n_pds_allocated = 0; ··· 589 596 * exactly which functions rdmavt supports, nor do they know the ABI 590 597 * version, so we do all of this sort of stuff here. 591 598 */ 592 - rdi->ibdev.uverbs_cmd_mask = 593 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 594 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 595 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 596 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 597 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 598 - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 599 - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | 600 - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | 601 - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | 602 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 603 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 604 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 605 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 606 - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 607 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 599 + rdi->ibdev.uverbs_cmd_mask |= 608 600 (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 609 601 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 610 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 611 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 612 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 613 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 614 602 (1ull << IB_USER_VERBS_CMD_POST_SEND) | 615 603 (1ull << IB_USER_VERBS_CMD_POST_RECV) | 616 - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 617 - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 618 - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 619 - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 620 - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 621 - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 622 604 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); 623 605 rdi->ibdev.node_type = RDMA_NODE_IB_CA; 624 606 if (!rdi->ibdev.num_comp_vectors)
-1
drivers/infiniband/sw/rxe/Kconfig
··· 5 5 depends on INFINIBAND_VIRT_DMA 6 6 select NET_UDP_TUNNEL 7 7 select CRYPTO_CRC32 8 - select DMA_VIRT_OPS 9 8 help 10 9 This driver implements the InfiniBand RDMA transport over 11 10 the Linux network stack. It enables a system with a
-5
drivers/infiniband/sw/rxe/rxe_cq.c
··· 123 123 124 124 memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); 125 125 126 - /* make sure all changes to the CQ are written before we update the 127 - * producer pointer 128 - */ 129 - smp_wmb(); 130 - 131 126 advance_producer(cq->queue); 132 127 spin_unlock_irqrestore(&cq->cq_lock, flags); 133 128
-1
drivers/infiniband/sw/rxe/rxe_mr.c
··· 31 31 return 0; 32 32 33 33 case RXE_MEM_TYPE_MR: 34 - case RXE_MEM_TYPE_FMR: 35 34 if (iova < mem->iova || 36 35 length > mem->length || 37 36 iova > mem->iova + mem->length - length)
-18
drivers/infiniband/sw/rxe/rxe_net.c
··· 8 8 #include <linux/if_arp.h> 9 9 #include <linux/netdevice.h> 10 10 #include <linux/if.h> 11 - #include <linux/if_vlan.h> 12 11 #include <net/udp_tunnel.h> 13 12 #include <net/sch_generic.h> 14 13 #include <linux/netfilter.h> ··· 18 19 #include "rxe_loc.h" 19 20 20 21 static struct rxe_recv_sockets recv_sockets; 21 - 22 - struct device *rxe_dma_device(struct rxe_dev *rxe) 23 - { 24 - struct net_device *ndev; 25 - 26 - ndev = rxe->ndev; 27 - 28 - if (is_vlan_dev(ndev)) 29 - ndev = vlan_dev_real_dev(ndev); 30 - 31 - return ndev->dev.parent; 32 - } 33 22 34 23 int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) 35 24 { ··· 153 166 { 154 167 struct udphdr *udph; 155 168 struct net_device *ndev = skb->dev; 156 - struct net_device *rdev = ndev; 157 169 struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); 158 170 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 159 171 160 - if (!rxe && is_vlan_dev(rdev)) { 161 - rdev = vlan_dev_real_dev(ndev); 162 - rxe = rxe_get_dev_from_net(rdev); 163 - } 164 172 if (!rxe) 165 173 goto drop; 166 174
+60 -34
drivers/infiniband/sw/rxe/rxe_queue.h
··· 7 7 #ifndef RXE_QUEUE_H 8 8 #define RXE_QUEUE_H 9 9 10 + /* for definition of shared struct rxe_queue_buf */ 11 + #include <uapi/rdma/rdma_user_rxe.h> 12 + 10 13 /* implements a simple circular buffer that can optionally be 11 14 * shared between user space and the kernel and can be resized 12 - 13 15 * the requested element size is rounded up to a power of 2 14 16 * and the number of elements in the buffer is also rounded 15 17 * up to a power of 2. Since the queue is empty when the 16 18 * producer and consumer indices match the maximum capacity 17 19 * of the queue is one less than the number of element slots 18 20 */ 19 - 20 - /* this data structure is shared between user space and kernel 21 - * space for those cases where the queue is shared. It contains 22 - * the producer and consumer indices. Is also contains a copy 23 - * of the queue size parameters for user space to use but the 24 - * kernel must use the parameters in the rxe_queue struct 25 - * this MUST MATCH the corresponding librxe struct 26 - * for performance reasons arrange to have producer and consumer 27 - * pointers in separate cache lines 28 - * the kernel should always mask the indices to avoid accessing 29 - * memory outside of the data area 30 - */ 31 - struct rxe_queue_buf { 32 - __u32 log2_elem_size; 33 - __u32 index_mask; 34 - __u32 pad_1[30]; 35 - __u32 producer_index; 36 - __u32 pad_2[31]; 37 - __u32 consumer_index; 38 - __u32 pad_3[31]; 39 - __u8 data[]; 40 - }; 41 21 42 22 struct rxe_queue { 43 23 struct rxe_dev *rxe; ··· 26 46 size_t buf_size; 27 47 size_t elem_size; 28 48 unsigned int log2_elem_size; 29 - unsigned int index_mask; 49 + u32 index_mask; 30 50 }; 31 51 32 52 int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, ··· 56 76 57 77 static inline int queue_empty(struct rxe_queue *q) 58 78 { 59 - return ((q->buf->producer_index - q->buf->consumer_index) 60 - & q->index_mask) == 0; 79 + u32 prod; 80 + u32 cons; 81 + 82 + /* make sure all changes to queue complete before 83 + * testing queue empty 84 + */ 85 + prod = smp_load_acquire(&q->buf->producer_index); 86 + /* same */ 87 + cons = smp_load_acquire(&q->buf->consumer_index); 88 + 89 + return ((prod - cons) & q->index_mask) == 0; 61 90 } 62 91 63 92 static inline int queue_full(struct rxe_queue *q) 64 93 { 65 - return ((q->buf->producer_index + 1 - q->buf->consumer_index) 66 - & q->index_mask) == 0; 94 + u32 prod; 95 + u32 cons; 96 + 97 + /* make sure all changes to queue complete before 98 + * testing queue full 99 + */ 100 + prod = smp_load_acquire(&q->buf->producer_index); 101 + /* same */ 102 + cons = smp_load_acquire(&q->buf->consumer_index); 103 + 104 + return ((prod + 1 - cons) & q->index_mask) == 0; 67 105 } 68 106 69 107 static inline void advance_producer(struct rxe_queue *q) 70 108 { 71 - q->buf->producer_index = (q->buf->producer_index + 1) 72 - & q->index_mask; 109 + u32 prod; 110 + 111 + prod = (q->buf->producer_index + 1) & q->index_mask; 112 + 113 + /* make sure all changes to queue complete before 114 + * changing producer index 115 + */ 116 + smp_store_release(&q->buf->producer_index, prod); 73 117 } 74 118 75 119 static inline void advance_consumer(struct rxe_queue *q) 76 120 { 77 - q->buf->consumer_index = (q->buf->consumer_index + 1) 78 - & q->index_mask; 121 + u32 cons; 122 + 123 + cons = (q->buf->consumer_index + 1) & q->index_mask; 124 + 125 + /* make sure all changes to queue complete before 126 + * changing consumer index 127 + */ 128 + smp_store_release(&q->buf->consumer_index, cons); 79 129 } 80 130 81 131 static inline void *producer_addr(struct rxe_queue *q) ··· 122 112 123 113 static inline unsigned int producer_index(struct rxe_queue *q) 124 114 { 125 - return q->buf->producer_index; 115 + u32 index; 116 + 117 + /* make sure all changes to queue 118 + * complete before getting producer index 119 + */ 120 + index = smp_load_acquire(&q->buf->producer_index); 121 + index &= q->index_mask; 122 + 123 + return index; 126 124 } 127 125 128 126 static inline unsigned int consumer_index(struct rxe_queue *q) 129 127 { 130 - return q->buf->consumer_index; 128 + u32 index; 129 + 130 + /* make sure all changes to queue 131 + * complete before getting consumer index 132 + */ 133 + index = smp_load_acquire(&q->buf->consumer_index); 134 + index &= q->index_mask; 135 + 136 + return index; 131 137 } 132 138 133 139 static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
+2 -1
drivers/infiniband/sw/rxe/rxe_req.c
··· 634 634 } 635 635 636 636 if (unlikely(qp_type(qp) == IB_QPT_RC && 637 - qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) { 637 + psn_compare(qp->req.psn, (qp->comp.psn + 638 + RXE_MAX_UNACKED_PSNS)) > 0)) { 638 639 qp->req.wait_psn = 1; 639 640 goto exit; 640 641 }
-5
drivers/infiniband/sw/rxe/rxe_resp.c
··· 872 872 else 873 873 wc->network_hdr_type = RDMA_NETWORK_IPV6; 874 874 875 - if (is_vlan_dev(skb->dev)) { 876 - wc->wc_flags |= IB_WC_WITH_VLAN; 877 - wc->vlan_id = vlan_dev_vlan_id(skb->dev); 878 - } 879 - 880 875 if (pkt->mask & RXE_IMMDT_MASK) { 881 876 wc->wc_flags |= IB_WC_WITH_IMM; 882 877 wc->ex.imm_data = immdt_imm(pkt);
+14 -53
drivers/infiniband/sw/rxe/rxe_verbs.c
··· 244 244 recv_wqe->dma.cur_sge = 0; 245 245 recv_wqe->dma.sge_offset = 0; 246 246 247 - /* make sure all changes to the work queue are written before we 248 - * update the producer pointer 249 - */ 250 - smp_wmb(); 251 - 252 247 advance_producer(rq->queue); 253 248 return 0; 254 249 ··· 259 264 struct rxe_pd *pd = to_rpd(ibsrq->pd); 260 265 struct rxe_srq *srq = to_rsrq(ibsrq); 261 266 struct rxe_create_srq_resp __user *uresp = NULL; 267 + 268 + if (init->srq_type != IB_SRQT_BASIC) 269 + return -EOPNOTSUPP; 262 270 263 271 if (udata) { 264 272 if (udata->outlen < sizeof(*uresp)) ··· 390 392 uresp = udata->outbuf; 391 393 } 392 394 395 + if (init->create_flags) 396 + return ERR_PTR(-EOPNOTSUPP); 397 + 393 398 err = rxe_qp_chk_init(rxe, init); 394 399 if (err) 395 400 goto err1; ··· 433 432 int err; 434 433 struct rxe_dev *rxe = to_rdev(ibqp->device); 435 434 struct rxe_qp *qp = to_rqp(ibqp); 435 + 436 + if (mask & ~IB_QP_ATTR_STANDARD_BITS) 437 + return -EOPNOTSUPP; 436 438 437 439 err = rxe_qp_chk_attr(rxe, qp, attr, mask); 438 440 if (err) ··· 628 624 if (unlikely(err)) 629 625 goto err1; 630 626 631 - /* 632 - * make sure all changes to the work queue are 633 - * written before we update the producer pointer 634 - */ 635 - smp_wmb(); 636 - 637 627 advance_producer(sq->queue); 638 628 spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 639 629 ··· 763 765 } 764 766 765 767 if (attr->flags) 766 - return -EINVAL; 768 + return -EOPNOTSUPP; 767 769 768 770 err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); 769 771 if (err) ··· 1031 1033 struct rxe_dev *rxe = 1032 1034 rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); 1033 1035 1034 - return scnprintf(buf, PAGE_SIZE, "%s\n", rxe_parent_name(rxe, 1)); 1036 + return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1)); 1035 1037 } 1036 1038 1037 1039 static DEVICE_ATTR_RO(parent); ··· 1068 1070 .create_cq = rxe_create_cq, 1069 1071 .create_qp = rxe_create_qp, 1070 1072 .create_srq = rxe_create_srq, 1073 + .create_user_ah = rxe_create_ah, 1071 1074 .dealloc_driver = rxe_dealloc, 1072 1075 .dealloc_pd = rxe_dealloc_pd, 1073 1076 .dealloc_ucontext = rxe_dealloc_ucontext, ··· 1117 1118 int err; 1118 1119 struct ib_device *dev = &rxe->ib_dev; 1119 1120 struct crypto_shash *tfm; 1120 - u64 dma_mask; 1121 1121 1122 1122 strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); 1123 1123 1124 1124 dev->node_type = RDMA_NODE_IB_CA; 1125 1125 dev->phys_port_cnt = 1; 1126 1126 dev->num_comp_vectors = num_possible_cpus(); 1127 - dev->dev.parent = rxe_dma_device(rxe); 1128 1127 dev->local_dma_lkey = 0; 1129 1128 addrconf_addr_eui48((unsigned char *)&dev->node_guid, 1130 1129 rxe->ndev->dev_addr); 1131 - dev->dev.dma_parms = &rxe->dma_parms; 1132 - dma_set_max_seg_size(&dev->dev, UINT_MAX); 1133 - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); 1134 - err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask); 1135 - if (err) 1136 - return err; 1137 1130 1138 - dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) 1139 - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) 1140 - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) 1141 - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) 1142 - | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) 1143 - | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) 1144 - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) 1145 - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) 1146 - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) 1147 - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) 1148 - | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) 1149 - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) 1150 - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) 1151 - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) 1152 - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) 1153 - | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) 1154 - | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) 1155 - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) 1156 - | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) 1157 - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) 1158 - | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) 1159 - | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) 1160 - | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) 1161 - | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) 1162 - | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) 1163 - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) 1164 - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) 1165 - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) 1166 - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) 1167 - | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) 1168 - | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) 1169 - ; 1131 + dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | 1132 + BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); 1170 1133 1171 1134 ib_set_device_ops(dev, &rxe_dev_ops); 1172 1135 err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
-2
drivers/infiniband/sw/rxe/rxe_verbs.h
··· 273 273 RXE_MEM_TYPE_NONE, 274 274 RXE_MEM_TYPE_DMA, 275 275 RXE_MEM_TYPE_MR, 276 - RXE_MEM_TYPE_FMR, 277 276 RXE_MEM_TYPE_MW, 278 277 }; 279 278 ··· 351 352 struct rxe_dev { 352 353 struct ib_device ib_dev; 353 354 struct ib_device_attr attr; 354 - struct device_dma_parameters dma_parms; 355 355 int max_ucontext; 356 356 int max_inline_data; 357 357 struct mutex usdev_lock;
-1
drivers/infiniband/sw/siw/Kconfig
··· 2 2 tristate "Software RDMA over TCP/IP (iWARP) driver" 3 3 depends on INET && INFINIBAND && LIBCRC32C 4 4 depends on INFINIBAND_VIRT_DMA 5 - select DMA_VIRT_OPS 6 5 help 7 6 This driver implements the iWARP RDMA transport over 8 7 the Linux TCP/IP network stack. It enables a system with a
-1
drivers/infiniband/sw/siw/siw.h
··· 69 69 70 70 struct siw_device { 71 71 struct ib_device base_dev; 72 - struct device_dma_parameters dma_parms; 73 72 struct net_device *netdev; 74 73 struct siw_dev_cap attrs; 75 74
+1 -1
drivers/infiniband/sw/siw/siw_cm.c
··· 1047 1047 cep->state); 1048 1048 } 1049 1049 } 1050 - if (rv && rv != EAGAIN) 1050 + if (rv && rv != -EAGAIN) 1051 1051 release_cep = 1; 1052 1052 break; 1053 1053
+3 -49
drivers/infiniband/sw/siw/siw_main.c
··· 305 305 { 306 306 struct siw_device *sdev = NULL; 307 307 struct ib_device *base_dev; 308 - struct device *parent = netdev->dev.parent; 309 - u64 dma_mask; 310 308 int rv; 311 309 312 - if (!parent) { 313 - /* 314 - * The loopback device has no parent device, 315 - * so it appears as a top-level device. To support 316 - * loopback device connectivity, take this device 317 - * as the parent device. Skip all other devices 318 - * w/o parent device. 319 - */ 320 - if (netdev->type != ARPHRD_LOOPBACK) { 321 - pr_warn("siw: device %s error: no parent device\n", 322 - netdev->name); 323 - return NULL; 324 - } 325 - parent = &netdev->dev; 326 - } 327 310 sdev = ib_alloc_device(siw_device, base_dev); 328 311 if (!sdev) 329 312 return NULL; ··· 330 347 addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, 331 348 addr); 332 349 } 333 - base_dev->uverbs_cmd_mask = 334 - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 335 - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 336 - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 337 - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 338 - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 339 - (1ull << IB_USER_VERBS_CMD_REG_MR) | 340 - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 341 - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 342 - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 343 - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 344 - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 345 - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 346 - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 347 - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 348 - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 349 - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 350 - (1ull << IB_USER_VERBS_CMD_POST_SEND) | 351 - (1ull << IB_USER_VERBS_CMD_POST_RECV) | 352 - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 353 - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV) | 354 - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 355 - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 356 - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); 350 + 351 + base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); 357 352 358 353 base_dev->node_type = RDMA_NODE_RNIC; 359 354 memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON, ··· 343 382 * per physical port. 344 383 */ 345 384 base_dev->phys_port_cnt = 1; 346 - base_dev->dev.parent = parent; 347 - base_dev->dev.dma_parms = &sdev->dma_parms; 348 - dma_set_max_seg_size(&base_dev->dev, UINT_MAX); 349 - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); 350 - if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask)) 351 - goto error; 352 - 353 385 base_dev->num_comp_vectors = num_possible_cpus(); 354 386 355 387 xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); ··· 384 430 atomic_set(&sdev->num_mr, 0); 385 431 atomic_set(&sdev->num_pd, 0); 386 432 387 - sdev->numa_node = dev_to_node(parent); 433 + sdev->numa_node = dev_to_node(&netdev->dev); 388 434 spin_lock_init(&sdev->lock); 389 435 390 436 return sdev;
+12
drivers/infiniband/sw/siw/siw_verbs.c
··· 307 307 308 308 siw_dbg(base_dev, "create new QP\n"); 309 309 310 + if (attrs->create_flags) 311 + return ERR_PTR(-EOPNOTSUPP); 312 + 310 313 if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { 311 314 siw_dbg(base_dev, "too many QP's\n"); 312 315 rv = -ENOMEM; ··· 546 543 547 544 if (!attr_mask) 548 545 return 0; 546 + 547 + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 548 + return -EOPNOTSUPP; 549 549 550 550 memset(&new_attrs, 0, sizeof(new_attrs)); 551 551 ··· 1100 1094 struct siw_cq *cq = to_siw_cq(base_cq); 1101 1095 int rv, size = attr->cqe; 1102 1096 1097 + if (attr->flags) 1098 + return -EOPNOTSUPP; 1099 + 1103 1100 if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) { 1104 1101 siw_dbg(base_cq->device, "too many CQ's\n"); 1105 1102 rv = -ENOMEM; ··· 1563 1554 rdma_udata_to_drv_context(udata, struct siw_ucontext, 1564 1555 base_ucontext); 1565 1556 int rv; 1557 + 1558 + if (init_attrs->srq_type != IB_SRQT_BASIC) 1559 + return -EOPNOTSUPP; 1566 1560 1567 1561 if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) { 1568 1562 siw_dbg_pd(base_srq->pd, "too many SRQ's\n");
+2 -2
drivers/infiniband/ulp/ipoib/ipoib_cm.c
··· 1514 1514 struct ipoib_dev_priv *priv = ipoib_priv(dev); 1515 1515 1516 1516 if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) 1517 - return sprintf(buf, "connected\n"); 1517 + return sysfs_emit(buf, "connected\n"); 1518 1518 else 1519 - return sprintf(buf, "datagram\n"); 1519 + return sysfs_emit(buf, "datagram\n"); 1520 1520 } 1521 1521 1522 1522 static ssize_t set_mode(struct device *d, struct device_attribute *attr,
+4
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
··· 166 166 return SPEED_14000; 167 167 case IB_SPEED_EDR: 168 168 return SPEED_25000; 169 + case IB_SPEED_HDR: 170 + return SPEED_50000; 171 + case IB_SPEED_NDR: 172 + return SPEED_100000; 169 173 } 170 174 171 175 return SPEED_UNKNOWN;
+4 -3
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 2266 2266 struct net_device *ndev = to_net_dev(dev); 2267 2267 struct ipoib_dev_priv *priv = ipoib_priv(ndev); 2268 2268 2269 - return sprintf(buf, "0x%04x\n", priv->pkey); 2269 + return sysfs_emit(buf, "0x%04x\n", priv->pkey); 2270 2270 } 2271 2271 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 2272 2272 ··· 2276 2276 struct net_device *ndev = to_net_dev(dev); 2277 2277 struct ipoib_dev_priv *priv = ipoib_priv(ndev); 2278 2278 2279 - return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); 2279 + return sysfs_emit(buf, "%d\n", 2280 + test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); 2280 2281 } 2281 2282 2282 2283 void ipoib_set_umcast(struct net_device *ndev, int umcast_val) ··· 2447 2446 "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", 2448 2447 current->comm); 2449 2448 2450 - return sprintf(buf, "%#x\n", ndev->dev_id); 2449 + return sysfs_emit(buf, "%#x\n", ndev->dev_id); 2451 2450 } 2452 2451 static DEVICE_ATTR_RO(dev_id); 2453 2452
+2 -2
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
··· 158 158 159 159 int ret, size, req_vec; 160 160 int i; 161 + static atomic_t counter; 161 162 162 163 size = ipoib_recvq_size + 1; 163 164 ret = ipoib_cm_dev_init(dev); ··· 172 171 if (ret != -EOPNOTSUPP) 173 172 return ret; 174 173 175 - req_vec = (priv->port - 1) * 2; 176 - 174 + req_vec = atomic_inc_return(&counter) * 2; 177 175 cq_attr.cqe = size; 178 176 cq_attr.comp_vector = req_vec % priv->ca->num_comp_vectors; 179 177 priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL,
+1 -1
drivers/infiniband/ulp/ipoib/ipoib_vlan.c
··· 46 46 struct net_device *dev = to_net_dev(d); 47 47 struct ipoib_dev_priv *priv = ipoib_priv(dev); 48 48 49 - return sprintf(buf, "%s\n", priv->parent->name); 49 + return sysfs_emit(buf, "%s\n", priv->parent->name); 50 50 } 51 51 static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); 52 52
+6 -18
drivers/infiniband/ulp/iser/iscsi_iser.c
··· 187 187 struct iser_device *device = iser_conn->ib_conn.device; 188 188 struct iscsi_iser_task *iser_task = task->dd_data; 189 189 u64 dma_addr; 190 - const bool mgmt_task = !task->sc && !in_interrupt(); 191 - int ret = 0; 192 190 193 - if (unlikely(mgmt_task)) 194 - mutex_lock(&iser_conn->state_mutex); 195 - 196 - if (unlikely(iser_conn->state != ISER_CONN_UP)) { 197 - ret = -ENODEV; 198 - goto out; 199 - } 191 + if (unlikely(iser_conn->state != ISER_CONN_UP)) 192 + return -ENODEV; 200 193 201 194 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, 202 195 ISER_HEADERS_LEN, DMA_TO_DEVICE); 203 - if (ib_dma_mapping_error(device->ib_device, dma_addr)) { 204 - ret = -ENOMEM; 205 - goto out; 206 - } 196 + if (ib_dma_mapping_error(device->ib_device, dma_addr)) 197 + return -ENOMEM; 207 198 208 199 tx_desc->inv_wr.next = NULL; 209 200 tx_desc->reg_wr.wr.next = NULL; ··· 205 214 tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey; 206 215 207 216 iser_task->iser_conn = iser_conn; 208 - out: 209 - if (unlikely(mgmt_task)) 210 - mutex_unlock(&iser_conn->state_mutex); 211 217 212 - return ret; 218 + return 0; 213 219 } 214 220 215 221 /** ··· 727 739 } 728 740 729 741 /** 730 - * iscsi_iser_set_param() - set class connection parameter 742 + * iscsi_iser_conn_get_stats() - get iscsi connection statistics 731 743 * @cls_conn: iscsi class connection 732 744 * @stats: iscsi stats to output 733 745 *
+27 -2
drivers/infiniband/ulp/isert/ib_isert.c
··· 28 28 module_param_named(debug_level, isert_debug_level, int, 0644); 29 29 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)"); 30 30 31 + static int isert_sg_tablesize_set(const char *val, 32 + const struct kernel_param *kp); 33 + static const struct kernel_param_ops sg_tablesize_ops = { 34 + .set = isert_sg_tablesize_set, 35 + .get = param_get_int, 36 + }; 37 + 38 + static int isert_sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE; 39 + module_param_cb(sg_tablesize, &sg_tablesize_ops, &isert_sg_tablesize, 0644); 40 + MODULE_PARM_DESC(sg_tablesize, 41 + "Number of gather/scatter entries in a single scsi command, should >= 128 (default: 256, max: 4096)"); 42 + 31 43 static DEFINE_MUTEX(device_list_mutex); 32 44 static LIST_HEAD(device_list); 33 45 static struct workqueue_struct *isert_comp_wq; ··· 58 46 static void isert_send_done(struct ib_cq *cq, struct ib_wc *wc); 59 47 static void isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc); 60 48 static void isert_login_send_done(struct ib_cq *cq, struct ib_wc *wc); 49 + 50 + static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp) 51 + { 52 + int n = 0, ret; 53 + 54 + ret = kstrtoint(val, 10, &n); 55 + if (ret != 0 || n < ISCSI_ISER_MIN_SG_TABLESIZE || 56 + n > ISCSI_ISER_MAX_SG_TABLESIZE) 57 + return -EINVAL; 58 + 59 + return param_set_int(val, kp); 60 + } 61 + 61 62 62 63 static inline bool 63 64 isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) ··· 126 101 attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; 127 102 attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; 128 103 factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num, 129 - ISCSI_ISER_MAX_SG_TABLESIZE); 104 + isert_sg_tablesize); 130 105 attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX * factor; 131 106 attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; 132 107 attr.cap.max_recv_sge = 1; ··· 1101 1076 sequence_cmd: 1102 1077 rc = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); 1103 1078 1104 - if (!rc && dump_payload == false && unsol_data) 1079 + if (!rc && !dump_payload && unsol_data) 1105 1080 iscsit_set_unsolicited_dataout(cmd); 1106 1081 else if (dump_payload && imm_data) 1107 1082 target_put_sess_cmd(&cmd->se_cmd);
+6
drivers/infiniband/ulp/isert/ib_isert.h
··· 65 65 */ 66 66 #define ISER_RX_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 1024) 67 67 68 + /* Default I/O size is 1MB */ 69 + #define ISCSI_ISER_DEF_SG_TABLESIZE 256 70 + 71 + /* Minimum I/O size is 512KB */ 72 + #define ISCSI_ISER_MIN_SG_TABLESIZE 128 73 + 68 74 /* Maximum support is 16MB I/O size */ 69 75 #define ISCSI_ISER_MAX_SG_TABLESIZE 4096 70 76
+1 -1
drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
··· 437 437 } __packed; 438 438 439 439 /** 440 - * struct opa_vnic_iface_macs_entry - single entry in the mac list 440 + * struct opa_vnic_iface_mac_entry - single entry in the mac list 441 441 * @mac_addr: MAC address 442 442 */ 443 443 struct opa_vnic_iface_mac_entry {
+1 -1
drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
··· 74 74 } 75 75 76 76 /** 77 - * opa_vnic_get_error_counters - get summary counters 77 + * opa_vnic_get_summary_counters - get summary counters 78 78 * @adapter: vnic port adapter 79 79 * @cntrs: pointer to destination summary counters structure 80 80 *
+31 -31
drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
··· 52 52 { 53 53 struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); 54 54 55 - return sprintf(page, "%d\n", rtrs_clt_get_max_reconnect_attempts(clt)); 55 + return sysfs_emit(page, "%d\n", 56 + rtrs_clt_get_max_reconnect_attempts(clt)); 56 57 } 57 58 58 59 static ssize_t max_reconnect_attempts_store(struct device *dev, ··· 96 95 97 96 switch (clt->mp_policy) { 98 97 case MP_POLICY_RR: 99 - return sprintf(page, "round-robin (RR: %d)\n", clt->mp_policy); 98 + return sysfs_emit(page, "round-robin (RR: %d)\n", 99 + clt->mp_policy); 100 100 case MP_POLICY_MIN_INFLIGHT: 101 - return sprintf(page, "min-inflight (MI: %d)\n", clt->mp_policy); 101 + return sysfs_emit(page, "min-inflight (MI: %d)\n", 102 + clt->mp_policy); 102 103 default: 103 - return sprintf(page, "Unknown (%d)\n", clt->mp_policy); 104 + return sysfs_emit(page, "Unknown (%d)\n", clt->mp_policy); 104 105 } 105 106 } 106 107 ··· 141 138 static ssize_t add_path_show(struct device *dev, 142 139 struct device_attribute *attr, char *page) 143 140 { 144 - return scnprintf(page, PAGE_SIZE, 145 - "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n", 146 - attr->attr.name); 141 + return sysfs_emit( 142 + page, 143 + "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n", 144 + attr->attr.name); 147 145 } 148 146 149 147 static ssize_t add_path_store(struct device *dev, ··· 188 184 189 185 sess = container_of(kobj, struct rtrs_clt_sess, kobj); 190 186 if (sess->state == RTRS_CLT_CONNECTED) 191 - return sprintf(page, "connected\n"); 187 + return sysfs_emit(page, "connected\n"); 192 188 193 - return sprintf(page, "disconnected\n"); 189 + return sysfs_emit(page, "disconnected\n"); 194 190 } 195 191 196 192 static struct kobj_attribute rtrs_clt_state_attr = 197 193 __ATTR(state, 0444, rtrs_clt_state_show, NULL); 198 194 199 195 static ssize_t rtrs_clt_reconnect_show(struct kobject *kobj, 200 - struct kobj_attribute *attr, 201 - char *page) 196 + struct kobj_attribute *attr, char *buf) 202 197 { 203 - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", 204 - attr->attr.name); 198 + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); 205 199 } 206 200 207 201 static ssize_t rtrs_clt_reconnect_store(struct kobject *kobj, ··· 227 225 rtrs_clt_reconnect_store); 228 226 229 227 static ssize_t rtrs_clt_disconnect_show(struct kobject *kobj, 230 - struct kobj_attribute *attr, 231 - char *page) 228 + struct kobj_attribute *attr, char *buf) 232 229 { 233 - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", 234 - attr->attr.name); 230 + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); 235 231 } 236 232 237 233 static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, ··· 257 257 rtrs_clt_disconnect_store); 258 258 259 259 static ssize_t rtrs_clt_remove_path_show(struct kobject *kobj, 260 - struct kobj_attribute *attr, 261 - char *page) 260 + struct kobj_attribute *attr, char *buf) 262 261 { 263 - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", 264 - attr->attr.name); 262 + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); 265 263 } 266 264 267 265 static ssize_t rtrs_clt_remove_path_store(struct kobject *kobj, ··· 322 324 323 325 sess = container_of(kobj, typeof(*sess), kobj); 324 326 325 - return scnprintf(page, PAGE_SIZE, "%u\n", sess->hca_port); 327 + return sysfs_emit(page, "%u\n", sess->hca_port); 326 328 } 327 329 328 330 static struct kobj_attribute rtrs_clt_hca_port_attr = ··· 336 338 337 339 sess = container_of(kobj, struct rtrs_clt_sess, kobj); 338 340 339 - return scnprintf(page, PAGE_SIZE, "%s\n", sess->hca_name); 341 + return sysfs_emit(page, "%s\n", sess->hca_name); 340 342 } 341 343 342 344 static struct kobj_attribute rtrs_clt_hca_name_attr = ··· 347 349 char *page) 348 350 { 349 351 struct rtrs_clt_sess *sess; 350 - int cnt; 352 + int len; 351 353 352 354 sess = container_of(kobj, struct rtrs_clt_sess, kobj); 353 - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, 354 - page, PAGE_SIZE); 355 - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); 355 + len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, 356 + PAGE_SIZE); 357 + len += sysfs_emit_at(page, len, "\n"); 358 + return len; 356 359 } 357 360 358 361 static struct kobj_attribute rtrs_clt_src_addr_attr = ··· 364 365 char *page) 365 366 { 366 367 struct rtrs_clt_sess *sess; 367 - int cnt; 368 + int len; 368 369 369 370 sess = container_of(kobj, struct rtrs_clt_sess, kobj); 370 - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, 371 - page, PAGE_SIZE); 372 - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); 371 + len = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page, 372 + PAGE_SIZE); 373 + len += sysfs_emit_at(page, len, "\n"); 374 + return len; 373 375 } 374 376 375 377 static struct kobj_attribute rtrs_clt_dst_addr_attr =
+36 -38
drivers/infiniband/ulp/rtrs/rtrs-clt.c
··· 1236 1236 if (req->mr) 1237 1237 ib_dereg_mr(req->mr); 1238 1238 kfree(req->sge); 1239 - rtrs_iu_free(req->iu, DMA_TO_DEVICE, 1240 - sess->s.dev->ib_dev, 1); 1239 + rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1); 1241 1240 } 1242 1241 kfree(sess->reqs); 1243 1242 sess->reqs = NULL; ··· 1498 1499 con->c.cid = cid; 1499 1500 con->c.sess = &sess->s; 1500 1501 atomic_set(&con->io_cnt, 0); 1502 + mutex_init(&con->con_mutex); 1501 1503 1502 1504 sess->s.con[cid] = &con->c; 1503 1505 ··· 1510 1510 struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); 1511 1511 1512 1512 sess->s.con[con->c.cid] = NULL; 1513 + mutex_destroy(&con->con_mutex); 1513 1514 kfree(con); 1514 1515 } 1515 1516 ··· 1521 1520 int err, cq_vector; 1522 1521 struct rtrs_msg_rkey_rsp *rsp; 1523 1522 1524 - /* 1525 - * This function can fail, but still destroy_con_cq_qp() should 1526 - * be called, this is because create_con_cq_qp() is called on cm 1527 - * event path, thus caller/waiter never knows: have we failed before 1528 - * create_con_cq_qp() or after. To solve this dilemma without 1529 - * creating any additional flags just allow destroy_con_cq_qp() be 1530 - * called many times. 1531 - */ 1532 - 1523 + lockdep_assert_held(&con->con_mutex); 1533 1524 if (con->c.cid == 0) { 1534 1525 /* 1535 1526 * One completion for each receive and two for each send ··· 1595 1602 * Be careful here: destroy_con_cq_qp() can be called even 1596 1603 * create_con_cq_qp() failed, see comments there. 1597 1604 */ 1598 - 1605 + lockdep_assert_held(&con->con_mutex); 1599 1606 rtrs_cq_qp_destroy(&con->c); 1600 1607 if (con->rsp_ius) { 1601 - rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE, 1602 - sess->s.dev->ib_dev, con->queue_size); 1608 + rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_size); 1603 1609 con->rsp_ius = NULL; 1604 1610 con->queue_size = 0; 1605 1611 } ··· 1626 1634 struct rtrs_sess *s = con->c.sess; 1627 1635 int err; 1628 1636 1637 + mutex_lock(&con->con_mutex); 1629 1638 err = create_con_cq_qp(con); 1639 + mutex_unlock(&con->con_mutex); 1630 1640 if (err) { 1631 1641 rtrs_err(s, "create_con_cq_qp(), err: %d\n", err); 1632 1642 return err; 1633 1643 } 1634 1644 err = rdma_resolve_route(con->c.cm_id, RTRS_CONNECT_TIMEOUT_MS); 1635 - if (err) { 1645 + if (err) 1636 1646 rtrs_err(s, "Resolving route failed, err: %d\n", err); 1637 - destroy_con_cq_qp(con); 1638 - } 1639 1647 1640 1648 return err; 1641 1649 } ··· 1829 1837 cm_err = rtrs_rdma_route_resolved(con); 1830 1838 break; 1831 1839 case RDMA_CM_EVENT_ESTABLISHED: 1832 - con->cm_err = rtrs_rdma_conn_established(con, ev); 1833 - if (likely(!con->cm_err)) { 1840 + cm_err = rtrs_rdma_conn_established(con, ev); 1841 + if (likely(!cm_err)) { 1834 1842 /* 1835 1843 * Report success and wake up. Here we abuse state_wq, 1836 1844 * i.e. wake up without state change, but we set cm_err. ··· 1843 1851 case RDMA_CM_EVENT_REJECTED: 1844 1852 cm_err = rtrs_rdma_conn_rejected(con, ev); 1845 1853 break; 1854 + case RDMA_CM_EVENT_DISCONNECTED: 1855 + /* No message for disconnecting */ 1856 + cm_err = -ECONNRESET; 1857 + break; 1846 1858 case RDMA_CM_EVENT_CONNECT_ERROR: 1847 1859 case RDMA_CM_EVENT_UNREACHABLE: 1860 + case RDMA_CM_EVENT_ADDR_CHANGE: 1861 + case RDMA_CM_EVENT_TIMEWAIT_EXIT: 1848 1862 rtrs_wrn(s, "CM error event %d\n", ev->event); 1849 1863 cm_err = -ECONNRESET; 1850 1864 break; 1851 1865 case RDMA_CM_EVENT_ADDR_ERROR: 1852 1866 case RDMA_CM_EVENT_ROUTE_ERROR: 1867 + rtrs_wrn(s, "CM error event %d\n", ev->event); 1853 1868 cm_err = -EHOSTUNREACH; 1854 - break; 1855 - case RDMA_CM_EVENT_DISCONNECTED: 1856 - case RDMA_CM_EVENT_ADDR_CHANGE: 1857 - case RDMA_CM_EVENT_TIMEWAIT_EXIT: 1858 - cm_err = -ECONNRESET; 1859 1869 break; 1860 1870 case RDMA_CM_EVENT_DEVICE_REMOVAL: 1861 1871 /* ··· 1943 1949 1944 1950 errr: 1945 1951 stop_cm(con); 1946 - /* Is safe to call destroy if cq_qp is not inited */ 1952 + mutex_lock(&con->con_mutex); 1947 1953 destroy_con_cq_qp(con); 1954 + mutex_unlock(&con->con_mutex); 1948 1955 destroy_cm: 1949 1956 destroy_cm(con); 1950 1957 ··· 2052 2057 if (!sess->s.con[cid]) 2053 2058 break; 2054 2059 con = to_clt_con(sess->s.con[cid]); 2060 + mutex_lock(&con->con_mutex); 2055 2061 destroy_con_cq_qp(con); 2062 + mutex_unlock(&con->con_mutex); 2056 2063 destroy_cm(con); 2057 2064 destroy_con(con); 2058 2065 } ··· 2161 2164 mutex_unlock(&clt->paths_mutex); 2162 2165 } 2163 2166 2164 - static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess, 2165 - struct rtrs_addr *addr) 2167 + static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess) 2166 2168 { 2167 2169 struct rtrs_clt *clt = sess->clt; 2168 2170 ··· 2220 2224 struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]); 2221 2225 2222 2226 stop_cm(con); 2227 + 2228 + mutex_lock(&con->con_mutex); 2223 2229 destroy_con_cq_qp(con); 2230 + mutex_unlock(&con->con_mutex); 2224 2231 destroy_cm(con); 2225 2232 destroy_con(con); 2226 2233 } ··· 2244 2245 struct rtrs_iu *iu; 2245 2246 2246 2247 iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); 2247 - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); 2248 + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); 2248 2249 2249 2250 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2250 2251 rtrs_err(sess->clt, "Sess info request send failed: %s\n", ··· 2263 2264 int i, sgi; 2264 2265 2265 2266 sg_cnt = le16_to_cpu(msg->sg_cnt); 2266 - if (unlikely(!sg_cnt)) 2267 + if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { 2268 + rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", 2269 + sg_cnt); 2267 2270 return -EINVAL; 2271 + } 2272 + 2268 2273 /* 2269 2274 * Check if IB immediate data size is enough to hold the mem_id and 2270 2275 * the offset inside the memory chunk. ··· 2279 2276 rtrs_err(sess->clt, 2280 2277 "RDMA immediate size (%db) not enough to encode %d buffers of size %dB\n", 2281 2278 MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size); 2282 - return -EINVAL; 2283 - } 2284 - if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { 2285 - rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", 2286 - sg_cnt); 2287 2279 return -EINVAL; 2288 2280 } 2289 2281 total_len = 0; ··· 2372 2374 2373 2375 out: 2374 2376 rtrs_clt_update_wc_stats(con); 2375 - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); 2377 + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); 2376 2378 rtrs_clt_change_state(sess, state); 2377 2379 } 2378 2380 ··· 2434 2436 2435 2437 out: 2436 2438 if (tx_iu) 2437 - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); 2439 + rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); 2438 2440 if (rx_iu) 2439 - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); 2441 + rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); 2440 2442 if (unlikely(err)) 2441 2443 /* If we've never taken async path because of malloc problems */ 2442 2444 rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); ··· 2936 2938 * IO will never grab it. Also it is very important to add 2937 2939 * path before init, since init fires LINK_CONNECTED event. 2938 2940 */ 2939 - rtrs_clt_add_path_to_arr(sess, addr); 2941 + rtrs_clt_add_path_to_arr(sess); 2940 2942 2941 2943 err = init_sess(sess); 2942 2944 if (err)
+1
drivers/infiniband/ulp/rtrs/rtrs-clt.h
··· 72 72 struct rtrs_iu *rsp_ius; 73 73 u32 queue_size; 74 74 unsigned int cpu; 75 + struct mutex con_mutex; 75 76 atomic_t io_cnt; 76 77 int cm_err; 77 78 };
+1 -2
drivers/infiniband/ulp/rtrs/rtrs-pri.h
··· 287 287 struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t, 288 288 struct ib_device *dev, enum dma_data_direction, 289 289 void (*done)(struct ib_cq *cq, struct ib_wc *wc)); 290 - void rtrs_iu_free(struct rtrs_iu *iu, enum dma_data_direction dir, 291 - struct ib_device *dev, u32 queue_size); 290 + void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_size); 292 291 int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); 293 292 int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, 294 293 struct ib_send_wr *head);
+9 -12
drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
··· 27 27 }; 28 28 29 29 static ssize_t rtrs_srv_disconnect_show(struct kobject *kobj, 30 - struct kobj_attribute *attr, 31 - char *page) 30 + struct kobj_attribute *attr, char *buf) 32 31 { 33 - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", 34 - attr->attr.name); 32 + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); 35 33 } 36 34 37 35 static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, ··· 70 72 sess = container_of(kobj, typeof(*sess), kobj); 71 73 usr_con = sess->s.con[0]; 72 74 73 - return scnprintf(page, PAGE_SIZE, "%u\n", 74 - usr_con->cm_id->port_num); 75 + return sysfs_emit(page, "%u\n", usr_con->cm_id->port_num); 75 76 } 76 77 77 78 static struct kobj_attribute rtrs_srv_hca_port_attr = ··· 84 87 85 88 sess = container_of(kobj, struct rtrs_srv_sess, kobj); 86 89 87 - return scnprintf(page, PAGE_SIZE, "%s\n", 88 - sess->s.dev->ib_dev->name); 90 + return sysfs_emit(page, "%s\n", sess->s.dev->ib_dev->name); 89 91 } 90 92 91 93 static struct kobj_attribute rtrs_srv_hca_name_attr = ··· 111 115 char *page) 112 116 { 113 117 struct rtrs_srv_sess *sess; 114 - int cnt; 118 + int len; 115 119 116 120 sess = container_of(kobj, struct rtrs_srv_sess, kobj); 117 - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, 118 - page, PAGE_SIZE); 119 - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); 121 + len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, 122 + PAGE_SIZE); 123 + len += sysfs_emit_at(page, len, "\n"); 124 + return len; 120 125 } 121 126 122 127 static struct kobj_attribute rtrs_srv_dst_addr_attr =
+60 -87
drivers/infiniband/ulp/rtrs/rtrs-srv.c
··· 113 113 return changed; 114 114 } 115 115 116 - static bool rtrs_srv_change_state_get_old(struct rtrs_srv_sess *sess, 117 - enum rtrs_srv_state new_state, 118 - enum rtrs_srv_state *old_state) 116 + static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, 117 + enum rtrs_srv_state new_state) 119 118 { 120 119 bool changed; 121 120 122 121 spin_lock_irq(&sess->state_lock); 123 - *old_state = sess->state; 124 122 changed = __rtrs_srv_change_state(sess, new_state); 125 123 spin_unlock_irq(&sess->state_lock); 126 124 127 125 return changed; 128 - } 129 - 130 - static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, 131 - enum rtrs_srv_state new_state) 132 - { 133 - enum rtrs_srv_state old_state; 134 - 135 - return rtrs_srv_change_state_get_old(sess, new_state, &old_state); 136 126 } 137 127 138 128 static void free_id(struct rtrs_srv_op *id) ··· 461 471 462 472 void close_sess(struct rtrs_srv_sess *sess) 463 473 { 464 - enum rtrs_srv_state old_state; 465 - 466 - if (rtrs_srv_change_state_get_old(sess, RTRS_SRV_CLOSING, 467 - &old_state)) 474 + if (rtrs_srv_change_state(sess, RTRS_SRV_CLOSING)) 468 475 queue_work(rtrs_wq, &sess->close_work); 469 476 WARN_ON(sess->state != RTRS_SRV_CLOSING); 470 477 } ··· 564 577 struct rtrs_srv_mr *srv_mr; 565 578 566 579 srv_mr = &sess->mrs[i]; 567 - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, 568 - sess->s.dev->ib_dev, 1); 580 + rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); 569 581 ib_dereg_mr(srv_mr->mr); 570 582 ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl, 571 583 srv_mr->sgt.nents, DMA_BIDIRECTIONAL); ··· 668 682 sgt = &srv_mr->sgt; 669 683 mr = srv_mr->mr; 670 684 free_iu: 671 - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, 672 - sess->s.dev->ib_dev, 1); 685 + rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); 673 686 dereg_mr: 674 687 ib_dereg_mr(mr); 675 688 unmap_sg: ··· 720 735 struct rtrs_iu *iu; 721 736 722 737 iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); 723 - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); 738 + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); 724 739 725 740 if (unlikely(wc->status != IB_WC_SUCCESS)) { 726 741 rtrs_err(s, "Sess info response send failed: %s\n", ··· 846 861 if (unlikely(err)) { 847 862 rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err); 848 863 iu_free: 849 - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); 864 + rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); 850 865 } 851 866 rwr_free: 852 867 kfree(rwr); ··· 891 906 goto close; 892 907 893 908 out: 894 - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); 909 + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); 895 910 return; 896 911 close: 897 912 close_sess(sess); ··· 914 929 err = rtrs_iu_post_recv(&con->c, rx_iu); 915 930 if (unlikely(err)) { 916 931 rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err); 917 - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); 932 + rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); 918 933 return err; 919 934 } 920 935 ··· 1313 1328 kfree(srv); 1314 1329 } 1315 1330 1316 - static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, 1317 - const uuid_t *paths_uuid) 1318 - { 1319 - struct rtrs_srv *srv; 1320 - int i; 1321 - 1322 - srv = kzalloc(sizeof(*srv), GFP_KERNEL); 1323 - if (!srv) 1324 - return NULL; 1325 - 1326 - refcount_set(&srv->refcount, 1); 1327 - INIT_LIST_HEAD(&srv->paths_list); 1328 - mutex_init(&srv->paths_mutex); 1329 - mutex_init(&srv->paths_ev_mutex); 1330 - uuid_copy(&srv->paths_uuid, paths_uuid); 1331 - srv->queue_depth = sess_queue_depth; 1332 - srv->ctx = ctx; 1333 - device_initialize(&srv->dev); 1334 - srv->dev.release = rtrs_srv_dev_release; 1335 - 1336 - srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), 1337 - GFP_KERNEL); 1338 - if (!srv->chunks) 1339 - goto err_free_srv; 1340 - 1341 - for (i = 0; i < srv->queue_depth; i++) { 1342 - srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL); 1343 - if (!srv->chunks[i]) 1344 - goto err_free_chunks; 1345 - } 1346 - list_add(&srv->ctx_list, &ctx->srv_list); 1347 - 1348 - return srv; 1349 - 1350 - err_free_chunks: 1351 - while (i--) 1352 - mempool_free(srv->chunks[i], chunk_pool); 1353 - kfree(srv->chunks); 1354 - 1355 - err_free_srv: 1356 - kfree(srv); 1357 - 1358 - return NULL; 1359 - } 1360 - 1361 1331 static void free_srv(struct rtrs_srv *srv) 1362 1332 { 1363 1333 int i; ··· 1327 1387 put_device(&srv->dev); 1328 1388 } 1329 1389 1330 - static inline struct rtrs_srv *__find_srv_and_get(struct rtrs_srv_ctx *ctx, 1331 - const uuid_t *paths_uuid) 1332 - { 1333 - struct rtrs_srv *srv; 1334 - 1335 - list_for_each_entry(srv, &ctx->srv_list, ctx_list) { 1336 - if (uuid_equal(&srv->paths_uuid, paths_uuid) && 1337 - refcount_inc_not_zero(&srv->refcount)) 1338 - return srv; 1339 - } 1340 - 1341 - return NULL; 1342 - } 1343 - 1344 1390 static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, 1345 1391 const uuid_t *paths_uuid) 1346 1392 { 1347 1393 struct rtrs_srv *srv; 1394 + int i; 1348 1395 1349 1396 mutex_lock(&ctx->srv_mutex); 1350 - srv = __find_srv_and_get(ctx, paths_uuid); 1351 - if (!srv) 1352 - srv = __alloc_srv(ctx, paths_uuid); 1397 + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { 1398 + if (uuid_equal(&srv->paths_uuid, paths_uuid) && 1399 + refcount_inc_not_zero(&srv->refcount)) { 1400 + mutex_unlock(&ctx->srv_mutex); 1401 + return srv; 1402 + } 1403 + } 1404 + 1405 + /* need to allocate a new srv */ 1406 + srv = kzalloc(sizeof(*srv), GFP_KERNEL); 1407 + if (!srv) { 1408 + mutex_unlock(&ctx->srv_mutex); 1409 + return NULL; 1410 + } 1411 + 1412 + INIT_LIST_HEAD(&srv->paths_list); 1413 + mutex_init(&srv->paths_mutex); 1414 + mutex_init(&srv->paths_ev_mutex); 1415 + uuid_copy(&srv->paths_uuid, paths_uuid); 1416 + srv->queue_depth = sess_queue_depth; 1417 + srv->ctx = ctx; 1418 + device_initialize(&srv->dev); 1419 + srv->dev.release = rtrs_srv_dev_release; 1420 + list_add(&srv->ctx_list, &ctx->srv_list); 1353 1421 mutex_unlock(&ctx->srv_mutex); 1354 1422 1423 + srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), 1424 + GFP_KERNEL); 1425 + if (!srv->chunks) 1426 + goto err_free_srv; 1427 + 1428 + for (i = 0; i < srv->queue_depth; i++) { 1429 + srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL); 1430 + if (!srv->chunks[i]) 1431 + goto err_free_chunks; 1432 + } 1433 + refcount_set(&srv->refcount, 1); 1434 + 1355 1435 return srv; 1436 + 1437 + err_free_chunks: 1438 + while (i--) 1439 + mempool_free(srv->chunks[i], chunk_pool); 1440 + kfree(srv->chunks); 1441 + 1442 + err_free_srv: 1443 + kfree(srv); 1444 + return NULL; 1356 1445 } 1357 1446 1358 1447 static void put_srv(struct rtrs_srv *srv) ··· 1782 1813 } 1783 1814 recon_cnt = le16_to_cpu(msg->recon_cnt); 1784 1815 srv = get_or_create_srv(ctx, &msg->paths_uuid); 1785 - if (!srv) { 1816 + /* 1817 + * "refcount == 0" happens if a previous thread calls get_or_create_srv 1818 + * allocate srv, but chunks of srv are not allocated yet. 1819 + */ 1820 + if (!srv || refcount_read(&srv->refcount) == 0) { 1786 1821 err = -ENOMEM; 1787 1822 goto reject_w_err; 1788 1823 }
+1 -1
drivers/infiniband/ulp/rtrs/rtrs-srv.h
··· 62 62 63 63 /* 64 64 * server side memory region context, when always_invalidate=Y, we need 65 - * queue_depth of memory regrion to invalidate each memory region. 65 + * queue_depth of memory region to invalidate each memory region. 66 66 */ 67 67 struct rtrs_srv_mr { 68 68 struct ib_mr *mr;
+23 -38
drivers/infiniband/ulp/rtrs/rtrs.c
··· 31 31 return NULL; 32 32 for (i = 0; i < queue_size; i++) { 33 33 iu = &ius[i]; 34 + iu->direction = dir; 34 35 iu->buf = kzalloc(size, gfp_mask); 35 36 if (!iu->buf) 36 37 goto err; ··· 42 41 43 42 iu->cqe.done = done; 44 43 iu->size = size; 45 - iu->direction = dir; 46 44 } 47 45 return ius; 48 46 err: 49 - rtrs_iu_free(ius, dir, dma_dev, i); 47 + rtrs_iu_free(ius, dma_dev, i); 50 48 return NULL; 51 49 } 52 50 EXPORT_SYMBOL_GPL(rtrs_iu_alloc); 53 51 54 - void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir, 55 - struct ib_device *ibdev, u32 queue_size) 52 + void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_size) 56 53 { 57 54 struct rtrs_iu *iu; 58 55 int i; ··· 60 61 61 62 for (i = 0; i < queue_size; i++) { 62 63 iu = &ius[i]; 63 - ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, dir); 64 + ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, iu->direction); 64 65 kfree(iu->buf); 65 66 } 66 67 kfree(ius); ··· 104 105 } 105 106 EXPORT_SYMBOL_GPL(rtrs_post_recv_empty); 106 107 108 + static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head, 109 + struct ib_send_wr *wr) 110 + { 111 + if (head) { 112 + struct ib_send_wr *tail = head; 113 + 114 + while (tail->next) 115 + tail = tail->next; 116 + tail->next = wr; 117 + } else { 118 + head = wr; 119 + } 120 + 121 + return ib_post_send(qp, head, NULL); 122 + } 123 + 107 124 int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, 108 125 struct ib_send_wr *head) 109 126 { ··· 142 127 .send_flags = IB_SEND_SIGNALED, 143 128 }; 144 129 145 - if (head) { 146 - struct ib_send_wr *tail = head; 147 - 148 - while (tail->next) 149 - tail = tail->next; 150 - tail->next = &wr; 151 - } else { 152 - head = &wr; 153 - } 154 - 155 - return ib_post_send(con->qp, head, NULL); 130 + return rtrs_post_send(con->qp, head, &wr); 156 131 } 157 132 EXPORT_SYMBOL_GPL(rtrs_iu_post_send); 158 133 ··· 174 169 if (WARN_ON(sge[i].length == 0)) 175 170 return -EINVAL; 176 171 177 - if (head) { 178 - struct ib_send_wr *tail = head; 179 - 180 - while (tail->next) 181 - tail = tail->next; 182 - tail->next = &wr.wr; 183 - } else { 184 - head = &wr.wr; 185 - } 186 - 187 - return ib_post_send(con->qp, head, NULL); 172 + return rtrs_post_send(con->qp, head, &wr.wr); 188 173 } 189 174 EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm); 190 175 ··· 191 196 .ex.imm_data = cpu_to_be32(imm_data), 192 197 }; 193 198 194 - if (head) { 195 - struct ib_send_wr *tail = head; 196 - 197 - while (tail->next) 198 - tail = tail->next; 199 - tail->next = &wr; 200 - } else { 201 - head = &wr; 202 - } 203 - 204 - return ib_post_send(con->qp, head, NULL); 199 + return rtrs_post_send(con->qp, head, &wr); 205 200 } 206 201 EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); 207 202
+26 -22
drivers/infiniband/ulp/srp/ib_srp.c
··· 169 169 int tmo = *(int *)kp->arg; 170 170 171 171 if (tmo >= 0) 172 - return sprintf(buffer, "%d\n", tmo); 172 + return sysfs_emit(buffer, "%d\n", tmo); 173 173 else 174 - return sprintf(buffer, "off\n"); 174 + return sysfs_emit(buffer, "off\n"); 175 175 } 176 176 177 177 static int srp_tmo_set(const char *val, const struct kernel_param *kp) ··· 2896 2896 { 2897 2897 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2898 2898 2899 - return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); 2899 + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); 2900 2900 } 2901 2901 2902 2902 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, ··· 2904 2904 { 2905 2905 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2906 2906 2907 - return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); 2907 + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); 2908 2908 } 2909 2909 2910 2910 static ssize_t show_service_id(struct device *dev, ··· 2914 2914 2915 2915 if (target->using_rdma_cm) 2916 2916 return -ENOENT; 2917 - return sprintf(buf, "0x%016llx\n", 2918 - be64_to_cpu(target->ib_cm.service_id)); 2917 + return sysfs_emit(buf, "0x%016llx\n", 2918 + be64_to_cpu(target->ib_cm.service_id)); 2919 2919 } 2920 2920 2921 2921 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, ··· 2925 2925 2926 2926 if (target->using_rdma_cm) 2927 2927 return -ENOENT; 2928 - return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); 2928 + 2929 + return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); 2929 2930 } 2930 2931 2931 2932 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, ··· 2934 2933 { 2935 2934 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2936 2935 2937 - return sprintf(buf, "%pI6\n", target->sgid.raw); 2936 + return sysfs_emit(buf, "%pI6\n", target->sgid.raw); 2938 2937 } 2939 2938 2940 2939 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, ··· 2945 2944 2946 2945 if (target->using_rdma_cm) 2947 2946 return -ENOENT; 2948 - return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); 2947 + 2948 + return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); 2949 2949 } 2950 2950 2951 2951 static ssize_t show_orig_dgid(struct device *dev, ··· 2956 2954 2957 2955 if (target->using_rdma_cm) 2958 2956 return -ENOENT; 2959 - return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); 2957 + 2958 + return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); 2960 2959 } 2961 2960 2962 2961 static ssize_t show_req_lim(struct device *dev, ··· 2971 2968 ch = &target->ch[i]; 2972 2969 req_lim = min(req_lim, ch->req_lim); 2973 2970 } 2974 - return sprintf(buf, "%d\n", req_lim); 2971 + 2972 + return sysfs_emit(buf, "%d\n", req_lim); 2975 2973 } 2976 2974 2977 2975 static ssize_t show_zero_req_lim(struct device *dev, ··· 2980 2976 { 2981 2977 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2982 2978 2983 - return sprintf(buf, "%d\n", target->zero_req_lim); 2979 + return sysfs_emit(buf, "%d\n", target->zero_req_lim); 2984 2980 } 2985 2981 2986 2982 static ssize_t show_local_ib_port(struct device *dev, ··· 2988 2984 { 2989 2985 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2990 2986 2991 - return sprintf(buf, "%d\n", target->srp_host->port); 2987 + return sysfs_emit(buf, "%d\n", target->srp_host->port); 2992 2988 } 2993 2989 2994 2990 static ssize_t show_local_ib_device(struct device *dev, ··· 2996 2992 { 2997 2993 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2998 2994 2999 - return sprintf(buf, "%s\n", 3000 - dev_name(&target->srp_host->srp_dev->dev->dev)); 2995 + return sysfs_emit(buf, "%s\n", 2996 + dev_name(&target->srp_host->srp_dev->dev->dev)); 3001 2997 } 3002 2998 3003 2999 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, ··· 3005 3001 { 3006 3002 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3007 3003 3008 - return sprintf(buf, "%d\n", target->ch_count); 3004 + return sysfs_emit(buf, "%d\n", target->ch_count); 3009 3005 } 3010 3006 3011 3007 static ssize_t show_comp_vector(struct device *dev, ··· 3013 3009 { 3014 3010 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3015 3011 3016 - return sprintf(buf, "%d\n", target->comp_vector); 3012 + return sysfs_emit(buf, "%d\n", target->comp_vector); 3017 3013 } 3018 3014 3019 3015 static ssize_t show_tl_retry_count(struct device *dev, ··· 3021 3017 { 3022 3018 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3023 3019 3024 - return sprintf(buf, "%d\n", target->tl_retry_count); 3020 + return sysfs_emit(buf, "%d\n", target->tl_retry_count); 3025 3021 } 3026 3022 3027 3023 static ssize_t show_cmd_sg_entries(struct device *dev, ··· 3029 3025 { 3030 3026 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3031 3027 3032 - return sprintf(buf, "%u\n", target->cmd_sg_cnt); 3028 + return sysfs_emit(buf, "%u\n", target->cmd_sg_cnt); 3033 3029 } 3034 3030 3035 3031 static ssize_t show_allow_ext_sg(struct device *dev, ··· 3037 3033 { 3038 3034 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3039 3035 3040 - return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); 3036 + return sysfs_emit(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); 3041 3037 } 3042 3038 3043 3039 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); ··· 3897 3893 { 3898 3894 struct srp_host *host = container_of(dev, struct srp_host, dev); 3899 3895 3900 - return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); 3896 + return sysfs_emit(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); 3901 3897 } 3902 3898 3903 3899 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); ··· 3907 3903 { 3908 3904 struct srp_host *host = container_of(dev, struct srp_host, dev); 3909 3905 3910 - return sprintf(buf, "%d\n", host->port); 3906 + return sysfs_emit(buf, "%d\n", host->port); 3911 3907 } 3912 3908 3913 3909 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
+7 -7
drivers/infiniband/ulp/srpt/ib_srpt.c
··· 3448 3448 struct se_portal_group *se_tpg = attrib_to_tpg(item); 3449 3449 struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); 3450 3450 3451 - return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size); 3451 + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rdma_size); 3452 3452 } 3453 3453 3454 3454 static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item, ··· 3485 3485 struct se_portal_group *se_tpg = attrib_to_tpg(item); 3486 3486 struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); 3487 3487 3488 - return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size); 3488 + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rsp_size); 3489 3489 } 3490 3490 3491 3491 static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item, ··· 3522 3522 struct se_portal_group *se_tpg = attrib_to_tpg(item); 3523 3523 struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); 3524 3524 3525 - return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size); 3525 + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_sq_size); 3526 3526 } 3527 3527 3528 3528 static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item, ··· 3559 3559 struct se_portal_group *se_tpg = attrib_to_tpg(item); 3560 3560 struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); 3561 3561 3562 - return sprintf(page, "%d\n", sport->port_attrib.use_srq); 3562 + return sysfs_emit(page, "%d\n", sport->port_attrib.use_srq); 3563 3563 } 3564 3564 3565 3565 static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, ··· 3649 3649 3650 3650 static ssize_t srpt_rdma_cm_port_show(struct config_item *item, char *page) 3651 3651 { 3652 - return sprintf(page, "%d\n", rdma_cm_port); 3652 + return sysfs_emit(page, "%d\n", rdma_cm_port); 3653 3653 } 3654 3654 3655 3655 static ssize_t srpt_rdma_cm_port_store(struct config_item *item, ··· 3705 3705 struct se_portal_group *se_tpg = to_tpg(item); 3706 3706 struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); 3707 3707 3708 - return snprintf(page, PAGE_SIZE, "%d\n", sport->enabled); 3708 + return sysfs_emit(page, "%d\n", sport->enabled); 3709 3709 } 3710 3710 3711 3711 static ssize_t srpt_tpg_enable_store(struct config_item *item, ··· 3812 3812 3813 3813 static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) 3814 3814 { 3815 - return scnprintf(buf, PAGE_SIZE, "\n"); 3815 + return sysfs_emit(buf, "\n"); 3816 3816 } 3817 3817 3818 3818 CONFIGFS_ATTR_RO(srpt_wwn_, version);
+1 -1
drivers/infiniband/ulp/srpt/ib_srpt.h
··· 347 347 }; 348 348 349 349 /** 350 - * struct srpt_port_attib - attributes for SRPT port 350 + * struct srpt_port_attrib - attributes for SRPT port 351 351 * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. 352 352 * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. 353 353 * @srp_sq_size: Shared receive queue (SRQ) size.
+1 -1
drivers/nvme/host/rdma.c
··· 853 853 return error; 854 854 855 855 ctrl->device = ctrl->queues[0].device; 856 - ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); 856 + ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev); 857 857 858 858 /* T10-PI support */ 859 859 if (ctrl->device->dev->attrs.device_cap_flags &
+2 -1
drivers/nvme/target/rdma.c
··· 414 414 if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) 415 415 goto out_free_rsp; 416 416 417 - r->req.p2p_client = &ndev->device->dev; 417 + if (!ib_uses_virt_dma(ndev->device)) 418 + r->req.p2p_client = &ndev->device->dev; 418 419 r->send_sge.length = sizeof(*r->req.cqe); 419 420 r->send_sge.lkey = ndev->pd->local_dma_lkey; 420 421
+1 -24
drivers/pci/p2pdma.c
··· 556 556 return -1; 557 557 558 558 for (i = 0; i < num_clients; i++) { 559 - #ifdef CONFIG_DMA_VIRT_OPS 560 - if (clients[i]->dma_ops == &dma_virt_ops) { 561 - if (verbose) 562 - dev_warn(clients[i], 563 - "cannot be used for peer-to-peer DMA because the driver makes use of dma_virt_ops\n"); 564 - return -1; 565 - } 566 - #endif 567 - 568 559 pci_client = find_parent_pci_dev(clients[i]); 569 560 if (!pci_client) { 570 561 if (verbose) ··· 825 834 struct device *dev, struct scatterlist *sg, int nents) 826 835 { 827 836 struct scatterlist *s; 828 - phys_addr_t paddr; 829 837 int i; 830 838 831 - /* 832 - * p2pdma mappings are not compatible with devices that use 833 - * dma_virt_ops. If the upper layers do the right thing 834 - * this should never happen because it will be prevented 835 - * by the check in pci_p2pdma_distance_many() 836 - */ 837 - #ifdef CONFIG_DMA_VIRT_OPS 838 - if (WARN_ON_ONCE(dev->dma_ops == &dma_virt_ops)) 839 - return 0; 840 - #endif 841 - 842 839 for_each_sg(sg, s, nents, i) { 843 - paddr = sg_phys(s); 844 - 845 - s->dma_address = paddr - p2p_pgmap->bus_offset; 840 + s->dma_address = sg_phys(s) - p2p_pgmap->bus_offset; 846 841 sg_dma_len(s) = s->length; 847 842 } 848 843
-2
include/linux/dma-mapping.h
··· 565 565 int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, 566 566 dma_addr_t dma_start, u64 size); 567 567 568 - extern const struct dma_map_ops dma_virt_ops; 569 - 570 568 #endif /* _LINUX_DMA_MAPPING_H */
+42
include/rdma/ib_umem.h
··· 34 34 return umem->address & ~PAGE_MASK; 35 35 } 36 36 37 + static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem, 38 + unsigned long pgsz) 39 + { 40 + return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) & 41 + (pgsz - 1); 42 + } 43 + 37 44 static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, 38 45 unsigned long pgsz) 39 46 { ··· 86 79 unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, 87 80 unsigned long pgsz_bitmap, 88 81 unsigned long virt); 82 + /** 83 + * ib_umem_find_best_pgoff - Find best HW page size 84 + * 85 + * @umem: umem struct 86 + * @pgsz_bitmap bitmap of HW supported page sizes 87 + * @pgoff_bitmask: Mask of bits that can be represented with an offset 88 + * 89 + * This is very similar to ib_umem_find_best_pgsz() except instead of accepting 90 + * an IOVA it accepts a bitmask specifying what address bits can be represented 91 + * with a page offset. 92 + * 93 + * For instance if the HW has multiple page sizes, requires 64 byte alignemnt, 94 + * and can support aligned offsets up to 4032 then pgoff_bitmask would be 95 + * "111111000000". 96 + * 97 + * If the pgoff_bitmask requires either alignment in the low bit or an 98 + * unavailable page size for the high bits, this function returns 0. 99 + */ 100 + static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, 101 + unsigned long pgsz_bitmap, 102 + u64 pgoff_bitmask) 103 + { 104 + struct scatterlist *sg = umem->sg_head.sgl; 105 + dma_addr_t dma_addr; 106 + 107 + dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK); 108 + return ib_umem_find_best_pgsz(umem, pgsz_bitmap, 109 + dma_addr & pgoff_bitmask); 110 + } 89 111 90 112 #else /* CONFIG_INFINIBAND_USER_MEM */ 91 113 ··· 134 98 static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, 135 99 unsigned long pgsz_bitmap, 136 100 unsigned long virt) 101 + { 102 + return 0; 103 + } 104 + static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, 105 + unsigned long pgsz_bitmap, 106 + u64 pgoff_bitmask) 137 107 { 138 108 return 0; 139 109 }
+97 -101
include/rdma/ib_verbs.h
··· 1235 1235 IB_QP_RESERVED3 = (1<<23), 1236 1236 IB_QP_RESERVED4 = (1<<24), 1237 1237 IB_QP_RATE_LIMIT = (1<<25), 1238 + 1239 + IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0), 1238 1240 }; 1239 1241 1240 1242 enum ib_qp_state { ··· 1472 1470 RDMA_REMOVE_DRIVER_REMOVE, 1473 1471 /* uobj is being cleaned-up before being committed */ 1474 1472 RDMA_REMOVE_ABORT, 1473 + /* The driver failed to destroy the uobject and is being disconnected */ 1474 + RDMA_REMOVE_DRIVER_FAILURE, 1475 1475 }; 1476 1476 1477 1477 struct ib_rdmacg_object { ··· 1485 1481 struct ib_ucontext { 1486 1482 struct ib_device *device; 1487 1483 struct ib_uverbs_file *ufile; 1488 - 1489 - bool cleanup_retryable; 1490 1484 1491 1485 struct ib_rdmacg_object cg_obj; 1492 1486 /* ··· 2404 2402 int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); 2405 2403 int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, 2406 2404 struct ib_udata *udata); 2405 + int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, 2406 + struct ib_udata *udata); 2407 2407 int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); 2408 2408 int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); 2409 2409 int (*destroy_ah)(struct ib_ah *ah, u32 flags); ··· 2434 2430 struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, 2435 2431 u64 virt_addr, int mr_access_flags, 2436 2432 struct ib_udata *udata); 2437 - int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, 2438 - u64 virt_addr, int mr_access_flags, 2439 - struct ib_pd *pd, struct ib_udata *udata); 2433 + struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, 2434 + u64 length, u64 virt_addr, 2435 + int mr_access_flags, struct ib_pd *pd, 2436 + struct ib_udata *udata); 2440 2437 int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); 2441 2438 struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, 2442 2439 u32 max_num_sg); ··· 2671 2666 const struct attribute_group *groups[3]; 2672 2667 2673 2668 u64 uverbs_cmd_mask; 2674 - u64 uverbs_ex_cmd_mask; 2675 2669 2676 2670 char node_desc[IB_DEVICE_NODE_DESC_MAX]; 2677 2671 __be64 node_guid; ··· 2903 2899 size_t len) 2904 2900 { 2905 2901 return ib_is_buffer_cleared(udata->inbuf + offset, len); 2906 - } 2907 - 2908 - /** 2909 - * ib_is_destroy_retryable - Check whether the uobject destruction 2910 - * is retryable. 2911 - * @ret: The initial destruction return code 2912 - * @why: remove reason 2913 - * @uobj: The uobject that is destroyed 2914 - * 2915 - * This function is a helper function that IB layer and low-level drivers 2916 - * can use to consider whether the destruction of the given uobject is 2917 - * retry-able. 2918 - * It checks the original return code, if it wasn't success the destruction 2919 - * is retryable according to the ucontext state (i.e. cleanup_retryable) and 2920 - * the remove reason. (i.e. why). 2921 - * Must be called with the object locked for destroy. 2922 - */ 2923 - static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, 2924 - struct ib_uobject *uobj) 2925 - { 2926 - return ret && (why == RDMA_REMOVE_DESTROY || 2927 - uobj->context->cleanup_retryable); 2928 - } 2929 - 2930 - /** 2931 - * ib_destroy_usecnt - Called during destruction to check the usecnt 2932 - * @usecnt: The usecnt atomic 2933 - * @why: remove reason 2934 - * @uobj: The uobject that is destroyed 2935 - * 2936 - * Non-zero usecnts will block destruction unless destruction was triggered by 2937 - * a ucontext cleanup. 2938 - */ 2939 - static inline int ib_destroy_usecnt(atomic_t *usecnt, 2940 - enum rdma_remove_reason why, 2941 - struct ib_uobject *uobj) 2942 - { 2943 - if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) 2944 - return -EBUSY; 2945 - return 0; 2946 2902 } 2947 2903 2948 2904 /** ··· 3395 3431 struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, 3396 3432 const char *caller); 3397 3433 3434 + /** 3435 + * ib_alloc_pd - Allocates an unused protection domain. 3436 + * @device: The device on which to allocate the protection domain. 3437 + * @flags: protection domain flags 3438 + * 3439 + * A protection domain object provides an association between QPs, shared 3440 + * receive queues, address handles, memory regions, and memory windows. 3441 + * 3442 + * Every PD has a local_dma_lkey which can be used as the lkey value for local 3443 + * memory operations. 3444 + */ 3398 3445 #define ib_alloc_pd(device, flags) \ 3399 3446 __ib_alloc_pd((device), (flags), KBUILD_MODNAME) 3400 3447 ··· 3631 3656 bad_recv_wr ? : &dummy); 3632 3657 } 3633 3658 3634 - struct ib_qp *ib_create_qp(struct ib_pd *pd, 3635 - struct ib_qp_init_attr *qp_init_attr); 3659 + struct ib_qp *ib_create_named_qp(struct ib_pd *pd, 3660 + struct ib_qp_init_attr *qp_init_attr, 3661 + const char *caller); 3662 + static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, 3663 + struct ib_qp_init_attr *init_attr) 3664 + { 3665 + return ib_create_named_qp(pd, init_attr, KBUILD_MODNAME); 3666 + } 3636 3667 3637 3668 /** 3638 3669 * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. ··· 3925 3944 -ENOSYS; 3926 3945 } 3927 3946 3947 + /* 3948 + * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to 3949 + * NULL. This causes the ib_dma* helpers to just stash the kernel virtual 3950 + * address into the dma address. 3951 + */ 3952 + static inline bool ib_uses_virt_dma(struct ib_device *dev) 3953 + { 3954 + return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; 3955 + } 3956 + 3928 3957 /** 3929 3958 * ib_dma_mapping_error - check a DMA addr for error 3930 3959 * @dev: The device for which the dma_addr was created ··· 3942 3951 */ 3943 3952 static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) 3944 3953 { 3954 + if (ib_uses_virt_dma(dev)) 3955 + return 0; 3945 3956 return dma_mapping_error(dev->dma_device, dma_addr); 3946 3957 } 3947 3958 ··· 3958 3965 void *cpu_addr, size_t size, 3959 3966 enum dma_data_direction direction) 3960 3967 { 3968 + if (ib_uses_virt_dma(dev)) 3969 + return (uintptr_t)cpu_addr; 3961 3970 return dma_map_single(dev->dma_device, cpu_addr, size, direction); 3962 3971 } 3963 3972 ··· 3974 3979 u64 addr, size_t size, 3975 3980 enum dma_data_direction direction) 3976 3981 { 3977 - dma_unmap_single(dev->dma_device, addr, size, direction); 3982 + if (!ib_uses_virt_dma(dev)) 3983 + dma_unmap_single(dev->dma_device, addr, size, direction); 3978 3984 } 3979 3985 3980 3986 /** ··· 3992 3996 size_t size, 3993 3997 enum dma_data_direction direction) 3994 3998 { 3999 + if (ib_uses_virt_dma(dev)) 4000 + return (uintptr_t)(page_address(page) + offset); 3995 4001 return dma_map_page(dev->dma_device, page, offset, size, direction); 3996 4002 } 3997 4003 ··· 4008 4010 u64 addr, size_t size, 4009 4011 enum dma_data_direction direction) 4010 4012 { 4011 - dma_unmap_page(dev->dma_device, addr, size, direction); 4013 + if (!ib_uses_virt_dma(dev)) 4014 + dma_unmap_page(dev->dma_device, addr, size, direction); 4015 + } 4016 + 4017 + int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents); 4018 + static inline int ib_dma_map_sg_attrs(struct ib_device *dev, 4019 + struct scatterlist *sg, int nents, 4020 + enum dma_data_direction direction, 4021 + unsigned long dma_attrs) 4022 + { 4023 + if (ib_uses_virt_dma(dev)) 4024 + return ib_dma_virt_map_sg(dev, sg, nents); 4025 + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, 4026 + dma_attrs); 4027 + } 4028 + 4029 + static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, 4030 + struct scatterlist *sg, int nents, 4031 + enum dma_data_direction direction, 4032 + unsigned long dma_attrs) 4033 + { 4034 + if (!ib_uses_virt_dma(dev)) 4035 + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, 4036 + dma_attrs); 4012 4037 } 4013 4038 4014 4039 /** ··· 4045 4024 struct scatterlist *sg, int nents, 4046 4025 enum dma_data_direction direction) 4047 4026 { 4048 - return dma_map_sg(dev->dma_device, sg, nents, direction); 4027 + return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0); 4049 4028 } 4050 4029 4051 4030 /** ··· 4059 4038 struct scatterlist *sg, int nents, 4060 4039 enum dma_data_direction direction) 4061 4040 { 4062 - dma_unmap_sg(dev->dma_device, sg, nents, direction); 4063 - } 4064 - 4065 - static inline int ib_dma_map_sg_attrs(struct ib_device *dev, 4066 - struct scatterlist *sg, int nents, 4067 - enum dma_data_direction direction, 4068 - unsigned long dma_attrs) 4069 - { 4070 - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, 4071 - dma_attrs); 4072 - } 4073 - 4074 - static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, 4075 - struct scatterlist *sg, int nents, 4076 - enum dma_data_direction direction, 4077 - unsigned long dma_attrs) 4078 - { 4079 - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); 4041 + ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0); 4080 4042 } 4081 4043 4082 4044 /** ··· 4070 4066 */ 4071 4067 static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) 4072 4068 { 4069 + if (ib_uses_virt_dma(dev)) 4070 + return UINT_MAX; 4073 4071 return dma_get_max_seg_size(dev->dma_device); 4074 4072 } 4075 4073 ··· 4087 4081 size_t size, 4088 4082 enum dma_data_direction dir) 4089 4083 { 4090 - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); 4084 + if (!ib_uses_virt_dma(dev)) 4085 + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); 4091 4086 } 4092 4087 4093 4088 /** ··· 4103 4096 size_t size, 4104 4097 enum dma_data_direction dir) 4105 4098 { 4106 - dma_sync_single_for_device(dev->dma_device, addr, size, dir); 4107 - } 4108 - 4109 - /** 4110 - * ib_dma_alloc_coherent - Allocate memory and map it for DMA 4111 - * @dev: The device for which the DMA address is requested 4112 - * @size: The size of the region to allocate in bytes 4113 - * @dma_handle: A pointer for returning the DMA address of the region 4114 - * @flag: memory allocator flags 4115 - */ 4116 - static inline void *ib_dma_alloc_coherent(struct ib_device *dev, 4117 - size_t size, 4118 - dma_addr_t *dma_handle, 4119 - gfp_t flag) 4120 - { 4121 - return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); 4122 - } 4123 - 4124 - /** 4125 - * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() 4126 - * @dev: The device for which the DMA addresses were allocated 4127 - * @size: The size of the region 4128 - * @cpu_addr: the address returned by ib_dma_alloc_coherent() 4129 - * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() 4130 - */ 4131 - static inline void ib_dma_free_coherent(struct ib_device *dev, 4132 - size_t size, void *cpu_addr, 4133 - dma_addr_t dma_handle) 4134 - { 4135 - dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); 4099 + if (!ib_uses_virt_dma(dev)) 4100 + dma_sync_single_for_device(dev->dma_device, addr, size, dir); 4136 4101 } 4137 4102 4138 4103 /* ib_reg_user_mr - register a memory region for virtual addresses from kernel ··· 4196 4217 struct inode *inode, struct ib_udata *udata); 4197 4218 int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); 4198 4219 4199 - static inline int ib_check_mr_access(int flags) 4220 + static inline int ib_check_mr_access(struct ib_device *ib_dev, 4221 + unsigned int flags) 4200 4222 { 4201 4223 /* 4202 4224 * Local write permission is required if remote write or ··· 4210 4230 if (flags & ~IB_ACCESS_SUPPORTED) 4211 4231 return -EINVAL; 4212 4232 4233 + if (flags & IB_ACCESS_ON_DEMAND && 4234 + !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) 4235 + return -EINVAL; 4213 4236 return 0; 4214 4237 } 4215 4238 ··· 4597 4614 container_of(device, struct ib_core_device, dev); 4598 4615 4599 4616 return coredev->owner; 4617 + } 4618 + 4619 + /** 4620 + * ibdev_to_node - return the NUMA node for a given ib_device 4621 + * @dev: device to get the NUMA node for. 4622 + */ 4623 + static inline int ibdev_to_node(struct ib_device *ibdev) 4624 + { 4625 + struct device *parent = ibdev->dev.parent; 4626 + 4627 + if (!parent) 4628 + return NUMA_NO_NODE; 4629 + return dev_to_node(parent); 4600 4630 } 4601 4631 4602 4632 /**
+24
include/rdma/restrack.h
··· 68 68 * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI 69 69 */ 70 70 bool valid; 71 + /** 72 + * @no_track: don't add this entry to restrack DB 73 + * 74 + * This field is used to mark an entry that doesn't need to be added to 75 + * internal restrack DB and presented later to the users at the nldev 76 + * query stage. 77 + */ 78 + u8 no_track : 1; 71 79 /* 72 80 * @kref: Protect destroy of the resource 73 81 */ ··· 153 145 struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, 154 146 enum rdma_restrack_type type, 155 147 u32 id); 148 + 149 + /** 150 + * rdma_restrack_no_track() - don't add resource to the DB 151 + * @res: resource entry 152 + * 153 + * Every user of thie API should be cross examined. 154 + * Probaby you don't need to use this function. 155 + */ 156 + static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res) 157 + { 158 + res->no_track = true; 159 + } 160 + static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res) 161 + { 162 + return !res->no_track; 163 + } 156 164 #endif /* _RDMA_RESTRACK_H_ */
+19 -6
include/rdma/uverbs_ioctl.h
··· 647 647 * 'ucontext'. 648 648 * 649 649 */ 650 - #define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ 651 - (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ 652 - driver_udata) \ 653 - ->context, \ 654 - drv_dev_struct, member) : \ 655 - (drv_dev_struct *)NULL) 650 + static inline struct uverbs_attr_bundle * 651 + rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata) 652 + { 653 + return container_of(udata, struct uverbs_attr_bundle, driver_udata); 654 + } 655 + 656 + #define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ 657 + (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \ 658 + drv_dev_struct, member) : (drv_dev_struct *)NULL) 656 659 657 660 #define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) 658 661 ··· 864 861 size_t size) 865 862 { 866 863 return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); 864 + } 865 + 866 + static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, 867 + size_t n, size_t size) 868 + { 869 + size_t bytes; 870 + 871 + if (unlikely(check_mul_overflow(n, size, &bytes))) 872 + return ERR_PTR(-EOVERFLOW); 873 + return uverbs_zalloc(bundle, bytes); 867 874 } 868 875 int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, 869 876 size_t idx, s64 lower_bound, u64 upper_bound,
+7 -2
include/rdma/uverbs_types.h
··· 71 71 enum rdma_remove_reason why, 72 72 struct uverbs_attr_bundle *attrs); 73 73 void (*remove_handle)(struct ib_uobject *uobj); 74 + void (*swap_uobjects)(struct ib_uobject *obj_old, 75 + struct ib_uobject *obj_new); 74 76 }; 75 77 76 78 struct uverbs_obj_type { ··· 118 116 bool hw_obj_valid); 119 117 void rdma_alloc_commit_uobject(struct ib_uobject *uobj, 120 118 struct uverbs_attr_bundle *attrs); 119 + void rdma_assign_uobject(struct ib_uobject *to_uobj, 120 + struct ib_uobject *new_uobj, 121 + struct uverbs_attr_bundle *attrs); 121 122 122 123 /* 123 124 * uverbs_uobject_get is called in order to increase the reference count on ··· 143 138 * because the driver is removed or the FD is closed. 144 139 */ 145 140 struct uverbs_obj_type type; 146 - int (*destroy_object)(struct ib_uobject *uobj, 147 - enum rdma_remove_reason why); 141 + void (*destroy_object)(struct ib_uobject *uobj, 142 + enum rdma_remove_reason why); 148 143 const struct file_operations *fops; 149 144 const char *name; 150 145 int flags;
+10
include/uapi/rdma/hns-abi.h
··· 43 43 __u32 reserved; 44 44 }; 45 45 46 + enum hns_roce_cq_cap_flags { 47 + HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0, 48 + }; 49 + 46 50 struct hns_roce_ib_create_cq_resp { 47 51 __aligned_u64 cqn; /* Only 32 bits used, 64 for compat */ 48 52 __aligned_u64 cap_flags; ··· 71 67 __u8 sq_no_prefetch; 72 68 __u8 reserved[5]; 73 69 __aligned_u64 sdb_addr; 70 + }; 71 + 72 + enum hns_roce_qp_cap_flags { 73 + HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, 74 + HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, 75 + HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, 74 76 }; 75 77 76 78 struct hns_roce_ib_create_qp_resp {
-14
include/uapi/rdma/ib_user_verbs.h
··· 596 596 IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, 597 597 }; 598 598 599 - enum { 600 - /* 601 - * This value is equal to IB_QP_DEST_QPN. 602 - */ 603 - IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20, 604 - }; 605 - 606 - enum { 607 - /* 608 - * This value is equal to IB_QP_RATE_LIMIT. 609 - */ 610 - IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25, 611 - }; 612 - 613 599 struct ib_uverbs_ex_create_qp { 614 600 __aligned_u64 user_handle; 615 601 __u32 pd_handle;
+21
include/uapi/rdma/rdma_user_rxe.h
··· 181 181 __aligned_u64 mmap_info_addr; 182 182 }; 183 183 184 + /* This data structure is stored at the base of work and 185 + * completion queues shared between user space and kernel space. 186 + * It contains the producer and consumer indices. Is also 187 + * contains a copy of the queue size parameters for user space 188 + * to use but the kernel must use the parameters in the 189 + * rxe_queue struct. For performance reasons arrange to have 190 + * producer and consumer indices in separate cache lines 191 + * the kernel should always mask the indices to avoid accessing 192 + * memory outside of the data area 193 + */ 194 + struct rxe_queue_buf { 195 + __u32 log2_elem_size; 196 + __u32 index_mask; 197 + __u32 pad_1[30]; 198 + __u32 producer_index; 199 + __u32 pad_2[31]; 200 + __u32 consumer_index; 201 + __u32 pad_3[31]; 202 + __u8 data[]; 203 + }; 204 + 184 205 #endif /* RDMA_USER_RXE_H */
-5
kernel/dma/Kconfig
··· 75 75 config ARCH_HAS_FORCE_DMA_UNENCRYPTED 76 76 bool 77 77 78 - config DMA_VIRT_OPS 79 - bool 80 - depends on HAS_DMA 81 - select DMA_OPS 82 - 83 78 config SWIOTLB 84 79 bool 85 80 select NEED_DMA_MAP_STATE
-1
kernel/dma/Makefile
··· 5 5 obj-$(CONFIG_DMA_OPS) += dummy.o 6 6 obj-$(CONFIG_DMA_CMA) += contiguous.o 7 7 obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o 8 - obj-$(CONFIG_DMA_VIRT_OPS) += virt.o 9 8 obj-$(CONFIG_DMA_API_DEBUG) += debug.o 10 9 obj-$(CONFIG_SWIOTLB) += swiotlb.o 11 10 obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o
-61
kernel/dma/virt.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * DMA operations that map to virtual addresses without flushing memory. 4 - */ 5 - #include <linux/export.h> 6 - #include <linux/mm.h> 7 - #include <linux/dma-map-ops.h> 8 - #include <linux/scatterlist.h> 9 - 10 - static void *dma_virt_alloc(struct device *dev, size_t size, 11 - dma_addr_t *dma_handle, gfp_t gfp, 12 - unsigned long attrs) 13 - { 14 - void *ret; 15 - 16 - ret = (void *)__get_free_pages(gfp | __GFP_ZERO, get_order(size)); 17 - if (ret) 18 - *dma_handle = (uintptr_t)ret; 19 - return ret; 20 - } 21 - 22 - static void dma_virt_free(struct device *dev, size_t size, 23 - void *cpu_addr, dma_addr_t dma_addr, 24 - unsigned long attrs) 25 - { 26 - free_pages((unsigned long)cpu_addr, get_order(size)); 27 - } 28 - 29 - static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, 30 - unsigned long offset, size_t size, 31 - enum dma_data_direction dir, 32 - unsigned long attrs) 33 - { 34 - return (uintptr_t)(page_address(page) + offset); 35 - } 36 - 37 - static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, 38 - int nents, enum dma_data_direction dir, 39 - unsigned long attrs) 40 - { 41 - int i; 42 - struct scatterlist *sg; 43 - 44 - for_each_sg(sgl, sg, nents, i) { 45 - BUG_ON(!sg_page(sg)); 46 - sg_dma_address(sg) = (uintptr_t)sg_virt(sg); 47 - sg_dma_len(sg) = sg->length; 48 - } 49 - 50 - return nents; 51 - } 52 - 53 - const struct dma_map_ops dma_virt_ops = { 54 - .alloc = dma_virt_alloc, 55 - .free = dma_virt_free, 56 - .map_page = dma_virt_map_page, 57 - .map_sg = dma_virt_map_sg, 58 - .alloc_pages = dma_common_alloc_pages, 59 - .free_pages = dma_common_free_pages, 60 - }; 61 - EXPORT_SYMBOL(dma_virt_ops);
-10
net/rds/ib.c
··· 30 30 * SOFTWARE. 31 31 * 32 32 */ 33 - #include <linux/dmapool.h> 34 33 #include <linux/kernel.h> 35 34 #include <linux/in.h> 36 35 #include <linux/if.h> ··· 107 108 rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); 108 109 if (rds_ibdev->pd) 109 110 ib_dealloc_pd(rds_ibdev->pd); 110 - dma_pool_destroy(rds_ibdev->rid_hdrs_pool); 111 111 112 112 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { 113 113 list_del(&i_ipaddr->list); ··· 187 189 if (IS_ERR(rds_ibdev->pd)) { 188 190 ret = PTR_ERR(rds_ibdev->pd); 189 191 rds_ibdev->pd = NULL; 190 - goto put_dev; 191 - } 192 - rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name, 193 - device->dma_device, 194 - sizeof(struct rds_header), 195 - L1_CACHE_BYTES, 0); 196 - if (!rds_ibdev->rid_hdrs_pool) { 197 - ret = -ENOMEM; 198 192 goto put_dev; 199 193 } 200 194
-13
net/rds/ib.h
··· 246 246 struct list_head conn_list; 247 247 struct ib_device *dev; 248 248 struct ib_pd *pd; 249 - struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */ 250 249 u8 odp_capable:1; 251 250 252 251 unsigned int max_mrs; ··· 263 264 int *vector_load; 264 265 }; 265 266 266 - static inline int ibdev_to_node(struct ib_device *ibdev) 267 - { 268 - struct device *parent; 269 - 270 - parent = ibdev->dev.parent; 271 - return parent ? dev_to_node(parent) : NUMA_NO_NODE; 272 - } 273 267 #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev) 274 268 275 269 /* bits for i_ack_flags */ ··· 379 387 int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6); 380 388 void rds_ib_cm_connect_complete(struct rds_connection *conn, 381 389 struct rdma_cm_event *event); 382 - struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, 383 - struct dma_pool *pool, 384 - dma_addr_t **dma_addrs, u32 num_hdrs); 385 - void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, 386 - dma_addr_t *dma_addrs, u32 num_hdrs); 387 390 388 391 #define rds_ib_conn_error(conn, fmt...) \ 389 392 __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
+78 -50
net/rds/ib_cm.c
··· 30 30 * SOFTWARE. 31 31 * 32 32 */ 33 - #include <linux/dmapool.h> 34 33 #include <linux/kernel.h> 35 34 #include <linux/in.h> 36 35 #include <linux/slab.h> ··· 440 441 rds_ibdev->vector_load[index]--; 441 442 } 442 443 444 + static void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr, 445 + dma_addr_t dma_addr, enum dma_data_direction dir) 446 + { 447 + ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir); 448 + kfree(hdr); 449 + } 450 + 451 + static struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev, 452 + dma_addr_t *dma_addr, enum dma_data_direction dir) 453 + { 454 + struct rds_header *hdr; 455 + 456 + hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev)); 457 + if (!hdr) 458 + return NULL; 459 + 460 + *dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr), 461 + DMA_BIDIRECTIONAL); 462 + if (ib_dma_mapping_error(dev, *dma_addr)) { 463 + kfree(hdr); 464 + return NULL; 465 + } 466 + 467 + return hdr; 468 + } 469 + 470 + /* Free the DMA memory used to store struct rds_header. 471 + * 472 + * @dev: the RDS IB device 473 + * @hdrs: pointer to the array storing DMA memory pointers 474 + * @dma_addrs: pointer to the array storing DMA addresses 475 + * @num_hdars: number of headers to free. 476 + */ 477 + static void rds_dma_hdrs_free(struct rds_ib_device *dev, 478 + struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs, 479 + enum dma_data_direction dir) 480 + { 481 + u32 i; 482 + 483 + for (i = 0; i < num_hdrs; i++) 484 + rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir); 485 + kvfree(hdrs); 486 + kvfree(dma_addrs); 487 + } 488 + 489 + 443 490 /* Allocate DMA coherent memory to be used to store struct rds_header for 444 491 * sending/receiving packets. The pointers to the DMA memory and the 445 492 * associated DMA addresses are stored in two arrays. 446 493 * 447 - * @ibdev: the IB device 448 - * @pool: the DMA memory pool 494 + * @dev: the RDS IB device 449 495 * @dma_addrs: pointer to the array for storing DMA addresses 450 496 * @num_hdrs: number of headers to allocate 451 497 * 452 498 * It returns the pointer to the array storing the DMA memory pointers. On 453 499 * error, NULL pointer is returned. 454 500 */ 455 - struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, 456 - struct dma_pool *pool, 457 - dma_addr_t **dma_addrs, u32 num_hdrs) 501 + static struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev, 502 + dma_addr_t **dma_addrs, u32 num_hdrs, 503 + enum dma_data_direction dir) 458 504 { 459 505 struct rds_header **hdrs; 460 506 dma_addr_t *hdr_daddrs; 461 507 u32 i; 462 508 463 509 hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, 464 - ibdev_to_node(ibdev)); 510 + ibdev_to_node(dev->dev)); 465 511 if (!hdrs) 466 512 return NULL; 467 513 468 514 hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, 469 - ibdev_to_node(ibdev)); 515 + ibdev_to_node(dev->dev)); 470 516 if (!hdr_daddrs) { 471 517 kvfree(hdrs); 472 518 return NULL; 473 519 } 474 520 475 521 for (i = 0; i < num_hdrs; i++) { 476 - hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]); 522 + hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir); 477 523 if (!hdrs[i]) { 478 - rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i); 524 + rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir); 479 525 return NULL; 480 526 } 481 527 } 482 528 483 529 *dma_addrs = hdr_daddrs; 484 530 return hdrs; 485 - } 486 - 487 - /* Free the DMA memory used to store struct rds_header. 488 - * 489 - * @pool: the DMA memory pool 490 - * @hdrs: pointer to the array storing DMA memory pointers 491 - * @dma_addrs: pointer to the array storing DMA addresses 492 - * @num_hdars: number of headers to free. 493 - */ 494 - void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, 495 - dma_addr_t *dma_addrs, u32 num_hdrs) 496 - { 497 - u32 i; 498 - 499 - for (i = 0; i < num_hdrs; i++) 500 - dma_pool_free(pool, hdrs[i], dma_addrs[i]); 501 - kvfree(hdrs); 502 - kvfree(dma_addrs); 503 531 } 504 532 505 533 /* ··· 542 516 struct rds_ib_device *rds_ibdev; 543 517 unsigned long max_wrs; 544 518 int ret, fr_queue_space; 545 - struct dma_pool *pool; 546 519 547 520 /* 548 521 * It's normal to see a null device if an incoming connection races ··· 637 612 goto recv_cq_out; 638 613 } 639 614 640 - pool = rds_ibdev->rid_hdrs_pool; 641 - ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma, 642 - ic->i_send_ring.w_nr); 615 + ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma, 616 + ic->i_send_ring.w_nr, 617 + DMA_TO_DEVICE); 643 618 if (!ic->i_send_hdrs) { 644 619 ret = -ENOMEM; 645 620 rdsdebug("DMA send hdrs alloc failed\n"); 646 621 goto qp_out; 647 622 } 648 623 649 - ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma, 650 - ic->i_recv_ring.w_nr); 624 + ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma, 625 + ic->i_recv_ring.w_nr, 626 + DMA_FROM_DEVICE); 651 627 if (!ic->i_recv_hdrs) { 652 628 ret = -ENOMEM; 653 629 rdsdebug("DMA recv hdrs alloc failed\n"); 654 630 goto send_hdrs_dma_out; 655 631 } 656 632 657 - ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, 658 - &ic->i_ack_dma); 633 + ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma, 634 + DMA_TO_DEVICE); 659 635 if (!ic->i_ack) { 660 636 ret = -ENOMEM; 661 637 rdsdebug("DMA ack header alloc failed\n"); ··· 692 666 vfree(ic->i_sends); 693 667 694 668 ack_dma_out: 695 - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); 669 + rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma, 670 + DMA_TO_DEVICE); 696 671 ic->i_ack = NULL; 697 672 698 673 recv_hdrs_dma_out: 699 - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, 700 - ic->i_recv_ring.w_nr); 674 + rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, 675 + ic->i_recv_ring.w_nr, DMA_FROM_DEVICE); 701 676 ic->i_recv_hdrs = NULL; 702 677 ic->i_recv_hdrs_dma = NULL; 703 678 704 679 send_hdrs_dma_out: 705 - rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, 706 - ic->i_send_ring.w_nr); 680 + rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma, 681 + ic->i_send_ring.w_nr, DMA_TO_DEVICE); 707 682 ic->i_send_hdrs = NULL; 708 683 ic->i_send_hdrs_dma = NULL; 709 684 ··· 1137 1110 } 1138 1111 1139 1112 if (ic->rds_ibdev) { 1140 - struct dma_pool *pool; 1141 - 1142 - pool = ic->rds_ibdev->rid_hdrs_pool; 1143 - 1144 1113 /* then free the resources that ib callbacks use */ 1145 1114 if (ic->i_send_hdrs) { 1146 - rds_dma_hdrs_free(pool, ic->i_send_hdrs, 1115 + rds_dma_hdrs_free(ic->rds_ibdev, 1116 + ic->i_send_hdrs, 1147 1117 ic->i_send_hdrs_dma, 1148 - ic->i_send_ring.w_nr); 1118 + ic->i_send_ring.w_nr, 1119 + DMA_TO_DEVICE); 1149 1120 ic->i_send_hdrs = NULL; 1150 1121 ic->i_send_hdrs_dma = NULL; 1151 1122 } 1152 1123 1153 1124 if (ic->i_recv_hdrs) { 1154 - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, 1125 + rds_dma_hdrs_free(ic->rds_ibdev, 1126 + ic->i_recv_hdrs, 1155 1127 ic->i_recv_hdrs_dma, 1156 - ic->i_recv_ring.w_nr); 1128 + ic->i_recv_ring.w_nr, 1129 + DMA_FROM_DEVICE); 1157 1130 ic->i_recv_hdrs = NULL; 1158 1131 ic->i_recv_hdrs_dma = NULL; 1159 1132 } 1160 1133 1161 1134 if (ic->i_ack) { 1162 - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); 1135 + rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack, 1136 + ic->i_ack_dma, DMA_TO_DEVICE); 1163 1137 ic->i_ack = NULL; 1164 1138 } 1165 1139 } else {
+15 -3
net/rds/ib_recv.c
··· 662 662 seq = rds_ib_get_ack(ic); 663 663 664 664 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); 665 + 666 + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma, 667 + sizeof(*hdr), DMA_TO_DEVICE); 665 668 rds_message_populate_header(hdr, 0, 0, 0); 666 669 hdr->h_ack = cpu_to_be64(seq); 667 670 hdr->h_credit = adv_credits; 668 671 rds_message_make_checksum(hdr); 672 + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma, 673 + sizeof(*hdr), DMA_TO_DEVICE); 674 + 669 675 ic->i_ack_queued = jiffies; 670 676 671 677 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL); ··· 851 845 struct rds_ib_connection *ic = conn->c_transport_data; 852 846 struct rds_ib_incoming *ibinc = ic->i_ibinc; 853 847 struct rds_header *ihdr, *hdr; 848 + dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; 854 849 855 850 /* XXX shut down the connection if port 0,0 are seen? */ 856 851 ··· 870 863 871 864 ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; 872 865 866 + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr, 867 + sizeof(*ihdr), DMA_FROM_DEVICE); 873 868 /* Validate the checksum. */ 874 869 if (!rds_message_verify_checksum(ihdr)) { 875 870 rds_ib_conn_error(conn, "incoming message " ··· 879 870 "forcing a reconnect\n", 880 871 &conn->c_faddr); 881 872 rds_stats_inc(s_recv_drop_bad_checksum); 882 - return; 873 + goto done; 883 874 } 884 875 885 876 /* Process the ACK sequence which comes with every packet */ ··· 908 899 */ 909 900 rds_ib_frag_free(ic, recv->r_frag); 910 901 recv->r_frag = NULL; 911 - return; 902 + goto done; 912 903 } 913 904 914 905 /* ··· 942 933 hdr->h_dport != ihdr->h_dport) { 943 934 rds_ib_conn_error(conn, 944 935 "fragment header mismatch; forcing reconnect\n"); 945 - return; 936 + goto done; 946 937 } 947 938 } 948 939 ··· 974 965 975 966 rds_inc_put(&ibinc->ii_inc); 976 967 } 968 + done: 969 + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr, 970 + sizeof(*ihdr), DMA_FROM_DEVICE); 977 971 } 978 972 979 973 void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
+8
net/rds/ib_send.c
··· 638 638 send->s_sge[0].length = sizeof(struct rds_header); 639 639 send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; 640 640 641 + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, 642 + ic->i_send_hdrs_dma[pos], 643 + sizeof(struct rds_header), 644 + DMA_TO_DEVICE); 641 645 memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, 642 646 sizeof(struct rds_header)); 643 647 ··· 692 688 adv_credits = 0; 693 689 rds_ib_stats_inc(s_ib_tx_credit_updates); 694 690 } 691 + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, 692 + ic->i_send_hdrs_dma[pos], 693 + sizeof(struct rds_header), 694 + DMA_TO_DEVICE); 695 695 696 696 if (prev) 697 697 prev->s_wr.next = &send->s_wr;
+41 -23
tools/testing/scatterlist/main.c
··· 9 9 int alloc_ret; 10 10 unsigned num_pages; 11 11 unsigned *pfn; 12 + unsigned *pfn_app; 12 13 unsigned size; 13 14 unsigned int max_seg; 14 15 unsigned int expected_segments; ··· 53 52 { 54 53 const unsigned int sgmax = UINT_MAX; 55 54 struct test *test, tests[] = { 56 - { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, 57 - { 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, 58 - { 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, 59 - { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 }, 60 - { 0, 1, pfn(0), 1, sgmax, 1 }, 61 - { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 }, 62 - { 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 }, 63 - { 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 }, 64 - { 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 }, 65 - { 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 }, 66 - { 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 }, 67 - { 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 }, 68 - { 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 }, 69 - { 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 }, 70 - { 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 }, 71 - { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 }, 72 - { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, 73 - { 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, 74 - { 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 }, 75 - { 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, 76 - { 0, 0, NULL, 0, 0, 0 }, 55 + { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 }, 56 + { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 }, 57 + { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 }, 58 + { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 }, 59 + { 0, 1, pfn(0), NULL, 1, sgmax, 1 }, 60 + { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 }, 61 + { 0, 2, pfn(1, 0), NULL, 2 * PAGE_SIZE, sgmax, 2 }, 62 + { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 }, 63 + { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 }, 64 + { 0, 3, pfn(0, 1, 2), pfn(3, 4, 5), 3 * PAGE_SIZE, sgmax, 1 }, 65 + { 0, 3, pfn(0, 1, 2), pfn(4, 5, 6), 3 * PAGE_SIZE, sgmax, 2 }, 66 + { 0, 3, pfn(0, 2, 1), NULL, 3 * PAGE_SIZE, sgmax, 3 }, 67 + { 0, 3, pfn(0, 1, 3), NULL, 3 * PAGE_SIZE, sgmax, 2 }, 68 + { 0, 3, pfn(1, 2, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 }, 69 + { 0, 3, pfn(1, 3, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 }, 70 + { 0, 4, pfn(0, 1, 3, 4), NULL, 4 * PAGE_SIZE, sgmax, 2 }, 71 + { 0, 5, pfn(0, 1, 3, 4, 5), NULL, 5 * PAGE_SIZE, sgmax, 2 }, 72 + { 0, 5, pfn(0, 1, 3, 4, 6), NULL, 5 * PAGE_SIZE, sgmax, 3 }, 73 + { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, sgmax, 1 }, 74 + { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, 2 * PAGE_SIZE, 75 + 3 }, 76 + { 0, 6, pfn(0, 1, 2, 3, 4, 5), NULL, 6 * PAGE_SIZE, 77 + 2 * PAGE_SIZE, 3 }, 78 + { 0, 6, pfn(0, 2, 3, 4, 5, 6), NULL, 6 * PAGE_SIZE, 79 + 2 * PAGE_SIZE, 4 }, 80 + { 0, 6, pfn(0, 1, 3, 4, 5, 6), pfn(7, 8, 9, 10, 11, 12), 81 + 6 * PAGE_SIZE, 12 * PAGE_SIZE, 2 }, 82 + { 0, 0, NULL, NULL, 0, 0, 0 }, 77 83 }; 78 84 unsigned int i; 79 85 80 86 for (i = 0, test = tests; test->expected_segments; test++, i++) { 87 + int left_pages = test->pfn_app ? test->num_pages : 0; 81 88 struct page *pages[MAX_PAGES]; 82 89 struct sg_table st; 83 90 struct scatterlist *sg; ··· 93 84 set_pages(pages, test->pfn, test->num_pages); 94 85 95 86 sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, 96 - test->size, test->max_seg, NULL, 0, GFP_KERNEL); 87 + test->size, test->max_seg, NULL, left_pages, GFP_KERNEL); 97 88 assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); 98 89 99 90 if (test->alloc_ret) 100 91 continue; 101 92 93 + if (test->pfn_app) { 94 + set_pages(pages, test->pfn_app, test->num_pages); 95 + sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, 96 + test->size, test->max_seg, sg, 0, GFP_KERNEL); 97 + 98 + assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); 99 + } 100 + 102 101 VALIDATE(st.nents == test->expected_segments, &st, test); 103 - VALIDATE(st.orig_nents == test->expected_segments, &st, test); 102 + if (!test->pfn_app) 103 + VALIDATE(st.orig_nents == test->expected_segments, &st, test); 104 104 105 105 sg_free_table(&st); 106 106 }