Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Doug Ledford:
"Here's our second -rc pull request. Nothing particularly special in
this one. The client removal deadlock fix is kindy tricky, but we had
multiple eyes on it and no one could find a fault in it. A couple
Spectre V1 fixes too. Otherwise, all just normal -rc fodder:

- A couple Spectre V1 fixes (umad, hfi1)

- Fix a tricky deadlock in the rdma core code with refcounting
instead of locks (client removal patches)

- Build errors (hns)

- Fix a scheduling while atomic issue (mlx5)

- Use after free fix (mad)

- Fix error path return code (hns)

- Null deref fix (siw_crypto_hash)

- A few other misc. minor fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
RDMA/hns: Fix error return code in hns_roce_v1_rsv_lp_qp()
RDMA/mlx5: Release locks during notifier unregister
IB/hfi1: Fix Spectre v1 vulnerability
IB/mad: Fix use-after-free in ib mad completion handling
RDMA/restrack: Track driver QP types in resource tracker
IB/mlx5: Fix MR registration flow to use UMR properly
RDMA/devices: Remove the lock around remove_client_context
RDMA/devices: Do not deadlock during client removal
IB/core: Add mitigation for Spectre V1
Do not dereference 'siw_crypto_shash' before checking
RDMA/qedr: Fix the hca_type and hca_rev returned in device attributes
RDMA/hns: Fix build error

+125 -84
+4 -1
drivers/infiniband/core/core_priv.h
··· 302 302 struct ib_udata *udata, 303 303 struct ib_uobject *uobj) 304 304 { 305 + enum ib_qp_type qp_type = attr->qp_type; 305 306 struct ib_qp *qp; 307 + bool is_xrc; 306 308 307 309 if (!dev->ops.create_qp) 308 310 return ERR_PTR(-EOPNOTSUPP); ··· 322 320 * and more importantly they are created internaly by driver, 323 321 * see mlx5 create_dev_resources() as an example. 324 322 */ 325 - if (attr->qp_type < IB_QPT_XRC_INI) { 323 + is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; 324 + if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { 326 325 qp->res.type = RDMA_RESTRACK_QP; 327 326 if (uobj) 328 327 rdma_restrack_uadd(&qp->res);
+69 -35
drivers/infiniband/core/device.c
··· 94 94 static DECLARE_RWSEM(devices_rwsem); 95 95 #define DEVICE_REGISTERED XA_MARK_1 96 96 97 - static LIST_HEAD(client_list); 97 + static u32 highest_client_id; 98 98 #define CLIENT_REGISTERED XA_MARK_1 99 99 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); 100 100 static DECLARE_RWSEM(clients_rwsem); 101 + 102 + static void ib_client_put(struct ib_client *client) 103 + { 104 + if (refcount_dec_and_test(&client->uses)) 105 + complete(&client->uses_zero); 106 + } 101 107 102 108 /* 103 109 * If client_data is registered then the corresponding client must also still ··· 667 661 668 662 down_write(&device->client_data_rwsem); 669 663 /* 664 + * So long as the client is registered hold both the client and device 665 + * unregistration locks. 666 + */ 667 + if (!refcount_inc_not_zero(&client->uses)) 668 + goto out_unlock; 669 + refcount_inc(&device->refcount); 670 + 671 + /* 670 672 * Another caller to add_client_context got here first and has already 671 673 * completely initialized context. 672 674 */ ··· 697 683 return 0; 698 684 699 685 out: 686 + ib_device_put(device); 687 + ib_client_put(client); 688 + out_unlock: 700 689 up_write(&device->client_data_rwsem); 701 690 return ret; 702 691 } ··· 719 702 client_data = xa_load(&device->client_data, client_id); 720 703 xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED); 721 704 client = xa_load(&clients, client_id); 722 - downgrade_write(&device->client_data_rwsem); 705 + up_write(&device->client_data_rwsem); 723 706 724 707 /* 725 708 * Notice we cannot be holding any exclusive locks when calling the ··· 729 712 * 730 713 * For this reason clients and drivers should not call the 731 714 * unregistration functions will holdling any locks. 732 - * 733 - * It tempting to drop the client_data_rwsem too, but this is required 734 - * to ensure that unregister_client does not return until all clients 735 - * are completely unregistered, which is required to avoid module 736 - * unloading races. 737 715 */ 738 716 if (client->remove) 739 717 client->remove(device, client_data); 740 718 741 719 xa_erase(&device->client_data, client_id); 742 - up_read(&device->client_data_rwsem); 720 + ib_device_put(device); 721 + ib_client_put(client); 743 722 } 744 723 745 724 static int alloc_port_data(struct ib_device *device) ··· 1237 1224 1238 1225 static void disable_device(struct ib_device *device) 1239 1226 { 1240 - struct ib_client *client; 1227 + u32 cid; 1241 1228 1242 1229 WARN_ON(!refcount_read(&device->refcount)); 1243 1230 ··· 1245 1232 xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); 1246 1233 up_write(&devices_rwsem); 1247 1234 1235 + /* 1236 + * Remove clients in LIFO order, see assign_client_id. This could be 1237 + * more efficient if xarray learns to reverse iterate. Since no new 1238 + * clients can be added to this ib_device past this point we only need 1239 + * the maximum possible client_id value here. 1240 + */ 1248 1241 down_read(&clients_rwsem); 1249 - list_for_each_entry_reverse(client, &client_list, list) 1250 - remove_client_context(device, client->client_id); 1242 + cid = highest_client_id; 1251 1243 up_read(&clients_rwsem); 1244 + while (cid) { 1245 + cid--; 1246 + remove_client_context(device, cid); 1247 + } 1252 1248 1253 1249 /* Pairs with refcount_set in enable_device */ 1254 1250 ib_device_put(device); ··· 1684 1662 /* 1685 1663 * The add/remove callbacks must be called in FIFO/LIFO order. To 1686 1664 * achieve this we assign client_ids so they are sorted in 1687 - * registration order, and retain a linked list we can reverse iterate 1688 - * to get the LIFO order. The extra linked list can go away if xarray 1689 - * learns to reverse iterate. 1665 + * registration order. 1690 1666 */ 1691 - if (list_empty(&client_list)) { 1692 - client->client_id = 0; 1693 - } else { 1694 - struct ib_client *last; 1695 - 1696 - last = list_last_entry(&client_list, struct ib_client, list); 1697 - client->client_id = last->client_id + 1; 1698 - } 1667 + client->client_id = highest_client_id; 1699 1668 ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); 1700 1669 if (ret) 1701 1670 goto out; 1702 1671 1672 + highest_client_id++; 1703 1673 xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); 1704 - list_add_tail(&client->list, &client_list); 1705 1674 1706 1675 out: 1707 1676 up_write(&clients_rwsem); 1708 1677 return ret; 1678 + } 1679 + 1680 + static void remove_client_id(struct ib_client *client) 1681 + { 1682 + down_write(&clients_rwsem); 1683 + xa_erase(&clients, client->client_id); 1684 + for (; highest_client_id; highest_client_id--) 1685 + if (xa_load(&clients, highest_client_id - 1)) 1686 + break; 1687 + up_write(&clients_rwsem); 1709 1688 } 1710 1689 1711 1690 /** ··· 1728 1705 unsigned long index; 1729 1706 int ret; 1730 1707 1708 + refcount_set(&client->uses, 1); 1709 + init_completion(&client->uses_zero); 1731 1710 ret = assign_client_id(client); 1732 1711 if (ret) 1733 1712 return ret; ··· 1765 1740 unsigned long index; 1766 1741 1767 1742 down_write(&clients_rwsem); 1743 + ib_client_put(client); 1768 1744 xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); 1769 1745 up_write(&clients_rwsem); 1770 - /* 1771 - * Every device still known must be serialized to make sure we are 1772 - * done with the client callbacks before we return. 1773 - */ 1774 - down_read(&devices_rwsem); 1775 - xa_for_each (&devices, index, device) 1776 - remove_client_context(device, client->client_id); 1777 - up_read(&devices_rwsem); 1778 1746 1779 - down_write(&clients_rwsem); 1780 - list_del(&client->list); 1781 - xa_erase(&clients, client->client_id); 1782 - up_write(&clients_rwsem); 1747 + /* We do not want to have locks while calling client->remove() */ 1748 + rcu_read_lock(); 1749 + xa_for_each (&devices, index, device) { 1750 + if (!ib_device_try_get(device)) 1751 + continue; 1752 + rcu_read_unlock(); 1753 + 1754 + remove_client_context(device, client->client_id); 1755 + 1756 + ib_device_put(device); 1757 + rcu_read_lock(); 1758 + } 1759 + rcu_read_unlock(); 1760 + 1761 + /* 1762 + * remove_client_context() is not a fence, it can return even though a 1763 + * removal is ongoing. Wait until all removals are completed. 1764 + */ 1765 + wait_for_completion(&client->uses_zero); 1766 + remove_client_id(client); 1783 1767 } 1784 1768 EXPORT_SYMBOL(ib_unregister_client); 1785 1769
+10 -10
drivers/infiniband/core/mad.c
··· 3224 3224 if (has_smi) 3225 3225 cq_size *= 2; 3226 3226 3227 + port_priv->pd = ib_alloc_pd(device, 0); 3228 + if (IS_ERR(port_priv->pd)) { 3229 + dev_err(&device->dev, "Couldn't create ib_mad PD\n"); 3230 + ret = PTR_ERR(port_priv->pd); 3231 + goto error3; 3232 + } 3233 + 3227 3234 port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, 3228 3235 IB_POLL_UNBOUND_WORKQUEUE); 3229 3236 if (IS_ERR(port_priv->cq)) { 3230 3237 dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); 3231 3238 ret = PTR_ERR(port_priv->cq); 3232 - goto error3; 3233 - } 3234 - 3235 - port_priv->pd = ib_alloc_pd(device, 0); 3236 - if (IS_ERR(port_priv->pd)) { 3237 - dev_err(&device->dev, "Couldn't create ib_mad PD\n"); 3238 - ret = PTR_ERR(port_priv->pd); 3239 3239 goto error4; 3240 3240 } 3241 3241 ··· 3278 3278 error7: 3279 3279 destroy_mad_qp(&port_priv->qp_info[0]); 3280 3280 error6: 3281 - ib_dealloc_pd(port_priv->pd); 3282 - error4: 3283 3281 ib_free_cq(port_priv->cq); 3284 3282 cleanup_recv_queue(&port_priv->qp_info[1]); 3285 3283 cleanup_recv_queue(&port_priv->qp_info[0]); 3284 + error4: 3285 + ib_dealloc_pd(port_priv->pd); 3286 3286 error3: 3287 3287 kfree(port_priv); 3288 3288 ··· 3312 3312 destroy_workqueue(port_priv->wq); 3313 3313 destroy_mad_qp(&port_priv->qp_info[1]); 3314 3314 destroy_mad_qp(&port_priv->qp_info[0]); 3315 - ib_dealloc_pd(port_priv->pd); 3316 3315 ib_free_cq(port_priv->cq); 3316 + ib_dealloc_pd(port_priv->pd); 3317 3317 cleanup_recv_queue(&port_priv->qp_info[1]); 3318 3318 cleanup_recv_queue(&port_priv->qp_info[0]); 3319 3319 /* XXX: Handle deallocation of MAD registration tables */
+5 -1
drivers/infiniband/core/user_mad.c
··· 49 49 #include <linux/sched.h> 50 50 #include <linux/semaphore.h> 51 51 #include <linux/slab.h> 52 + #include <linux/nospec.h> 52 53 53 54 #include <linux/uaccess.h> 54 55 ··· 885 884 886 885 if (get_user(id, arg)) 887 886 return -EFAULT; 887 + if (id >= IB_UMAD_MAX_AGENTS) 888 + return -EINVAL; 888 889 889 890 mutex_lock(&file->port->file_mutex); 890 891 mutex_lock(&file->mutex); 891 892 892 - if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { 893 + id = array_index_nospec(id, IB_UMAD_MAX_AGENTS); 894 + if (!__get_agent(file, id)) { 893 895 ret = -EINVAL; 894 896 goto out; 895 897 }
+2
drivers/infiniband/hw/hfi1/verbs.c
··· 54 54 #include <linux/mm.h> 55 55 #include <linux/vmalloc.h> 56 56 #include <rdma/opa_addr.h> 57 + #include <linux/nospec.h> 57 58 58 59 #include "hfi.h" 59 60 #include "common.h" ··· 1537 1536 sl = rdma_ah_get_sl(ah_attr); 1538 1537 if (sl >= ARRAY_SIZE(ibp->sl_to_sc)) 1539 1538 return -EINVAL; 1539 + sl = array_index_nospec(sl, ARRAY_SIZE(ibp->sl_to_sc)); 1540 1540 1541 1541 sc5 = ibp->sl_to_sc[sl]; 1542 1542 if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
+3 -3
drivers/infiniband/hw/hns/Kconfig
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 config INFINIBAND_HNS 3 - tristate "HNS RoCE Driver" 3 + bool "HNS RoCE Driver" 4 4 depends on NET_VENDOR_HISILICON 5 5 depends on ARM64 || (COMPILE_TEST && 64BIT) 6 6 ---help--- ··· 11 11 To compile HIP06 or HIP08 driver as module, choose M here. 12 12 13 13 config INFINIBAND_HNS_HIP06 14 - bool "Hisilicon Hip06 Family RoCE support" 14 + tristate "Hisilicon Hip06 Family RoCE support" 15 15 depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET 16 16 ---help--- 17 17 RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and ··· 21 21 module will be called hns-roce-hw-v1 22 22 23 23 config INFINIBAND_HNS_HIP08 24 - bool "Hisilicon Hip08 Family RoCE support" 24 + tristate "Hisilicon Hip08 Family RoCE support" 25 25 depends on INFINIBAND_HNS && PCI && HNS3 26 26 ---help--- 27 27 RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
+2 -6
drivers/infiniband/hw/hns/Makefile
··· 9 9 hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ 10 10 hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o 11 11 12 - ifdef CONFIG_INFINIBAND_HNS_HIP06 13 12 hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs) 14 - obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o 15 - endif 13 + obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o 16 14 17 - ifdef CONFIG_INFINIBAND_HNS_HIP08 18 15 hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs) 19 - obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o 20 - endif 16 + obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
+3 -1
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
··· 750 750 atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0); 751 751 752 752 pd = rdma_zalloc_drv_obj(ibdev, ib_pd); 753 - if (!pd) 753 + if (!pd) { 754 + ret = -ENOMEM; 754 755 goto alloc_mem_failed; 756 + } 755 757 756 758 pd->device = ibdev; 757 759 ret = hns_roce_alloc_pd(pd, NULL);
+3 -4
drivers/infiniband/hw/mlx5/main.c
··· 5802 5802 return; 5803 5803 } 5804 5804 5805 - if (mpi->mdev_events.notifier_call) 5806 - mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); 5807 - mpi->mdev_events.notifier_call = NULL; 5808 - 5809 5805 mpi->ibdev = NULL; 5810 5806 5811 5807 spin_unlock(&port->mp.mpi_lock); 5808 + if (mpi->mdev_events.notifier_call) 5809 + mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); 5810 + mpi->mdev_events.notifier_call = NULL; 5812 5811 mlx5_remove_netdev_notifier(ibdev, port_num); 5813 5812 spin_lock(&port->mp.mpi_lock); 5814 5813
+9 -18
drivers/infiniband/hw/mlx5/mr.c
··· 51 51 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 52 52 static int mr_cache_max_order(struct mlx5_ib_dev *dev); 53 53 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 54 - static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev) 55 - { 56 - return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled); 57 - } 58 54 59 55 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) 60 56 { 61 57 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); 62 - } 63 - 64 - static bool use_umr(struct mlx5_ib_dev *dev, int order) 65 - { 66 - return order <= mr_cache_max_order(dev) && 67 - umr_can_modify_entity_size(dev); 68 58 } 69 59 70 60 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) ··· 1261 1271 { 1262 1272 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1263 1273 struct mlx5_ib_mr *mr = NULL; 1264 - bool populate_mtts = false; 1274 + bool use_umr; 1265 1275 struct ib_umem *umem; 1266 1276 int page_shift; 1267 1277 int npages; ··· 1293 1303 if (err < 0) 1294 1304 return ERR_PTR(err); 1295 1305 1296 - if (use_umr(dev, order)) { 1306 + use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) && 1307 + (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) || 1308 + !MLX5_CAP_GEN(dev->mdev, atomic)); 1309 + 1310 + if (order <= mr_cache_max_order(dev) && use_umr) { 1297 1311 mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, 1298 1312 page_shift, order, access_flags); 1299 1313 if (PTR_ERR(mr) == -EAGAIN) { 1300 1314 mlx5_ib_dbg(dev, "cache empty for order %d\n", order); 1301 1315 mr = NULL; 1302 1316 } 1303 - populate_mtts = false; 1304 1317 } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { 1305 1318 if (access_flags & IB_ACCESS_ON_DEMAND) { 1306 1319 err = -EINVAL; 1307 1320 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); 1308 1321 goto error; 1309 1322 } 1310 - populate_mtts = true; 1323 + use_umr = false; 1311 1324 } 1312 1325 1313 1326 if (!mr) { 1314 - if (!umr_can_modify_entity_size(dev)) 1315 - populate_mtts = true; 1316 1327 mutex_lock(&dev->slow_path_mutex); 1317 1328 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, 1318 - page_shift, access_flags, populate_mtts); 1329 + page_shift, access_flags, !use_umr); 1319 1330 mutex_unlock(&dev->slow_path_mutex); 1320 1331 } 1321 1332 ··· 1332 1341 1333 1342 update_odp_mr(mr); 1334 1343 1335 - if (!populate_mtts) { 1344 + if (use_umr) { 1336 1345 int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; 1337 1346 1338 1347 if (access_flags & IB_ACCESS_ON_DEMAND)
+8 -2
drivers/infiniband/hw/qedr/main.c
··· 125 125 struct qedr_dev *dev = 126 126 rdma_device_to_drv_device(device, struct qedr_dev, ibdev); 127 127 128 - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor); 128 + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver); 129 129 } 130 130 static DEVICE_ATTR_RO(hw_rev); 131 131 132 132 static ssize_t hca_type_show(struct device *device, 133 133 struct device_attribute *attr, char *buf) 134 134 { 135 - return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET"); 135 + struct qedr_dev *dev = 136 + rdma_device_to_drv_device(device, struct qedr_dev, ibdev); 137 + 138 + return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n", 139 + dev->pdev->device, 140 + rdma_protocol_iwarp(&dev->ibdev, 1) ? 141 + "iWARP" : "RoCE"); 136 142 } 137 143 static DEVICE_ATTR_RO(hca_type); 138 144
+4 -2
drivers/infiniband/sw/siw/siw_qp.c
··· 220 220 { 221 221 struct siw_rx_stream *c_rx = &qp->rx_stream; 222 222 struct siw_iwarp_tx *c_tx = &qp->tx_ctx; 223 - int size = crypto_shash_descsize(siw_crypto_shash) + 224 - sizeof(struct shash_desc); 223 + int size; 225 224 226 225 if (siw_crypto_shash == NULL) 227 226 return -ENOENT; 227 + 228 + size = crypto_shash_descsize(siw_crypto_shash) + 229 + sizeof(struct shash_desc); 228 230 229 231 c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL); 230 232 c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
+3 -1
include/rdma/ib_verbs.h
··· 2647 2647 const union ib_gid *gid, 2648 2648 const struct sockaddr *addr, 2649 2649 void *client_data); 2650 - struct list_head list; 2650 + 2651 + refcount_t uses; 2652 + struct completion uses_zero; 2651 2653 u32 client_id; 2652 2654 2653 2655 /* kverbs are not required by the client */