Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma update from Doug Ledford:
"This includes two bugs against the newly added opa vnic that were
found by turning on the debug kernel options:

- sleeping while holding a lock, so a one line fix where they
switched it from GFP_KERNEL allocation to a GFP_ATOMIC allocation

- a case where they had an isolated caller of their code that could
call them in an atomic context so they had to switch their use of a
mutex to a spinlock to be safe, so this was considerably more lines
of diff because all uses of that lock had to be switched

In addition, the bug that was discussed with you already about an out
of bounds array access in ib_uverbs_modify_qp and ib_uverbs_create_ah
and is only seven lines of diff.

And finally, one fix to an earlier fix in the -rc cycle that broke
hfi1 and qib in regards to IPoIB (this one is, unfortunately, larger
than I would like for a -rc7 submission, but fixing the problem
required that we not treat all devices as though they had allocated a
netdev universally because it isn't true, and it took 70 lines of diff
to resolve the issue, but the final patch has been vetted by Intel and
Mellanox and they've both given their approval to the fix).

Summary:

- Two fixes for OPA found by debug kernel
- Fix for user supplied input causing kernel problems
- Fix for the IPoIB fixes submitted around -rc4"

[ Doug sent this having not noticed the 4.12 release, so I guess I'll be
getting another rdma pull request with the actuakl merge window
updates and not just fixes.

Oh well - it would have been nice if this small update had been the
merge window one. - Linus ]

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
IB/core, opa_vnic, hfi1, mlx5: Properly free rdma_netdev
RDMA/uverbs: Check port number supplied by user verbs cmds
IB/opa_vnic: Use spinlock instead of mutex for stats_lock
IB/opa_vnic: Use GFP_ATOMIC while sending trap

+58 -44
+8
drivers/infiniband/core/uverbs_cmd.c
··· 1935 1935 goto out; 1936 1936 } 1937 1937 1938 + if (!rdma_is_port_valid(qp->device, cmd->base.port_num)) { 1939 + ret = -EINVAL; 1940 + goto release_qp; 1941 + } 1942 + 1938 1943 attr->qp_state = cmd->base.qp_state; 1939 1944 attr->cur_qp_state = cmd->base.cur_qp_state; 1940 1945 attr->path_mtu = cmd->base.path_mtu; ··· 2552 2547 2553 2548 if (copy_from_user(&cmd, buf, sizeof cmd)) 2554 2549 return -EFAULT; 2550 + 2551 + if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) 2552 + return -EINVAL; 2555 2553 2556 2554 INIT_UDATA(&udata, buf + sizeof(cmd), 2557 2555 (unsigned long)cmd.response + sizeof(resp),
-1
drivers/infiniband/hw/hfi1/verbs.c
··· 1779 1779 ibdev->alloc_hw_stats = alloc_hw_stats; 1780 1780 ibdev->get_hw_stats = get_hw_stats; 1781 1781 ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn; 1782 - ibdev->free_rdma_netdev = hfi1_vnic_free_rn; 1783 1782 1784 1783 /* keep process mad in the driver */ 1785 1784 ibdev->process_mad = hfi1_process_mad;
-1
drivers/infiniband/hw/hfi1/vnic.h
··· 176 176 const char *name, 177 177 unsigned char name_assign_type, 178 178 void (*setup)(struct net_device *)); 179 - void hfi1_vnic_free_rn(struct net_device *netdev); 180 179 int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, 181 180 struct hfi1_vnic_vport_info *vinfo, 182 181 struct sk_buff *skb, u64 pbc, u8 plen);
+10 -9
drivers/infiniband/hw/hfi1/vnic_main.c
··· 833 833 .ndo_get_stats64 = hfi1_vnic_get_stats64, 834 834 }; 835 835 836 + static void hfi1_vnic_free_rn(struct net_device *netdev) 837 + { 838 + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 839 + 840 + hfi1_vnic_deinit(vinfo); 841 + mutex_destroy(&vinfo->lock); 842 + free_netdev(netdev); 843 + } 844 + 836 845 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, 837 846 u8 port_num, 838 847 enum rdma_netdev_t type, ··· 873 864 vinfo->num_tx_q = dd->chip_sdma_engines; 874 865 vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT; 875 866 vinfo->netdev = netdev; 867 + rn->free_rdma_netdev = hfi1_vnic_free_rn; 876 868 rn->set_id = hfi1_vnic_set_vesw_id; 877 869 878 870 netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG; ··· 901 891 mutex_destroy(&vinfo->lock); 902 892 free_netdev(netdev); 903 893 return ERR_PTR(rc); 904 - } 905 - 906 - void hfi1_vnic_free_rn(struct net_device *netdev) 907 - { 908 - struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 909 - 910 - hfi1_vnic_deinit(vinfo); 911 - mutex_destroy(&vinfo->lock); 912 - free_netdev(netdev); 913 894 }
+17 -10
drivers/infiniband/hw/mlx5/main.c
··· 3528 3528 return num_counters; 3529 3529 } 3530 3530 3531 + static void mlx5_ib_free_rdma_netdev(struct net_device *netdev) 3532 + { 3533 + return mlx5_rdma_netdev_free(netdev); 3534 + } 3535 + 3531 3536 static struct net_device* 3532 3537 mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, 3533 3538 u8 port_num, ··· 3541 3536 unsigned char name_assign_type, 3542 3537 void (*setup)(struct net_device *)) 3543 3538 { 3539 + struct net_device *netdev; 3540 + struct rdma_netdev *rn; 3541 + 3544 3542 if (type != RDMA_NETDEV_IPOIB) 3545 3543 return ERR_PTR(-EOPNOTSUPP); 3546 3544 3547 - return mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, 3548 - name, setup); 3549 - } 3550 - 3551 - static void mlx5_ib_free_rdma_netdev(struct net_device *netdev) 3552 - { 3553 - return mlx5_rdma_netdev_free(netdev); 3545 + netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, 3546 + name, setup); 3547 + if (likely(!IS_ERR_OR_NULL(netdev))) { 3548 + rn = netdev_priv(netdev); 3549 + rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev; 3550 + } 3551 + return netdev; 3554 3552 } 3555 3553 3556 3554 static void *mlx5_ib_add(struct mlx5_core_dev *mdev) ··· 3686 3678 dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; 3687 3679 dev->ib_dev.get_port_immutable = mlx5_port_immutable; 3688 3680 dev->ib_dev.get_dev_fw_str = get_dev_fw_str; 3689 - if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) { 3681 + if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) 3690 3682 dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev; 3691 - dev->ib_dev.free_rdma_netdev = mlx5_ib_free_rdma_netdev; 3692 - } 3683 + 3693 3684 if (mlx5_core_is_pf(mdev)) { 3694 3685 dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; 3695 3686 dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
+4 -4
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 1893 1893 rn->send = ipoib_send; 1894 1894 rn->attach_mcast = ipoib_mcast_attach; 1895 1895 rn->detach_mcast = ipoib_mcast_detach; 1896 + rn->free_rdma_netdev = free_netdev; 1896 1897 rn->hca = hca; 1897 1898 1898 1899 dev->netdev_ops = &ipoib_netdev_default_pf; ··· 2289 2288 return; 2290 2289 2291 2290 list_for_each_entry_safe(priv, tmp, dev_list, list) { 2291 + struct rdma_netdev *rn = netdev_priv(priv->dev); 2292 + 2292 2293 ib_unregister_event_handler(&priv->event_handler); 2293 2294 flush_workqueue(ipoib_workqueue); 2294 2295 ··· 2307 2304 flush_workqueue(priv->wq); 2308 2305 2309 2306 unregister_netdev(priv->dev); 2310 - if (device->free_rdma_netdev) 2311 - device->free_rdma_netdev(priv->dev); 2312 - else 2313 - free_netdev(priv->dev); 2307 + rn->free_rdma_netdev(priv->dev); 2314 2308 2315 2309 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) 2316 2310 kfree(cpriv);
+2 -2
drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
··· 146 146 int i; 147 147 148 148 memset(&vstats, 0, sizeof(vstats)); 149 - mutex_lock(&adapter->stats_lock); 149 + spin_lock(&adapter->stats_lock); 150 150 adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats); 151 + spin_unlock(&adapter->stats_lock); 151 152 for (i = 0; i < VNIC_STATS_LEN; i++) { 152 153 char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset; 153 154 154 155 data[i] = (vnic_gstrings_stats[i].sizeof_stat == 155 156 sizeof(u64)) ? *(u64 *)p : *(u32 *)p; 156 157 } 157 - mutex_unlock(&adapter->stats_lock); 158 158 } 159 159 160 160 /* vnic_get_strings - get strings */
+1 -1
drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
··· 214 214 struct mutex mactbl_lock; 215 215 216 216 /* Lock used to protect access to vnic counters */ 217 - struct mutex stats_lock; 217 + spinlock_t stats_lock; 218 218 219 219 u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE]; 220 220
+7 -9
drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
··· 69 69 struct opa_vnic_stats vstats; 70 70 71 71 memset(&vstats, 0, sizeof(vstats)); 72 - mutex_lock(&adapter->stats_lock); 72 + spin_lock(&adapter->stats_lock); 73 73 adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats); 74 - mutex_unlock(&adapter->stats_lock); 74 + spin_unlock(&adapter->stats_lock); 75 75 memcpy(stats, &vstats.netstats, sizeof(*stats)); 76 76 } 77 77 ··· 323 323 else if (IS_ERR(netdev)) 324 324 return ERR_CAST(netdev); 325 325 326 + rn = netdev_priv(netdev); 326 327 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); 327 328 if (!adapter) { 328 329 rc = -ENOMEM; 329 330 goto adapter_err; 330 331 } 331 332 332 - rn = netdev_priv(netdev); 333 333 rn->clnt_priv = adapter; 334 334 rn->hca = ibdev; 335 335 rn->port_num = port_num; ··· 344 344 netdev->hard_header_len += OPA_VNIC_SKB_HEADROOM; 345 345 mutex_init(&adapter->lock); 346 346 mutex_init(&adapter->mactbl_lock); 347 - mutex_init(&adapter->stats_lock); 347 + spin_lock_init(&adapter->stats_lock); 348 348 349 349 SET_NETDEV_DEV(netdev, ibdev->dev.parent); 350 350 ··· 364 364 netdev_err: 365 365 mutex_destroy(&adapter->lock); 366 366 mutex_destroy(&adapter->mactbl_lock); 367 - mutex_destroy(&adapter->stats_lock); 368 367 kfree(adapter); 369 368 adapter_err: 370 - ibdev->free_rdma_netdev(netdev); 369 + rn->free_rdma_netdev(netdev); 371 370 372 371 return ERR_PTR(rc); 373 372 } ··· 375 376 void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter) 376 377 { 377 378 struct net_device *netdev = adapter->netdev; 378 - struct ib_device *ibdev = adapter->ibdev; 379 + struct rdma_netdev *rn = netdev_priv(netdev); 379 380 380 381 v_info("removing\n"); 381 382 unregister_netdev(netdev); 382 383 opa_vnic_release_mac_tbl(adapter); 383 384 mutex_destroy(&adapter->lock); 384 385 mutex_destroy(&adapter->mactbl_lock); 385 - mutex_destroy(&adapter->stats_lock); 386 386 kfree(adapter); 387 - ibdev->free_rdma_netdev(netdev); 387 + rn->free_rdma_netdev(netdev); 388 388 }
+1 -1
drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
··· 794 794 795 795 send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0, 796 796 IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA, 797 - GFP_KERNEL, OPA_MGMT_BASE_VERSION); 797 + GFP_ATOMIC, OPA_MGMT_BASE_VERSION); 798 798 if (IS_ERR(send_buf)) { 799 799 c_err("%s:Couldn't allocate send buf\n", __func__); 800 800 goto err_sndbuf;
+4 -4
drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
··· 89 89 u64 *src; 90 90 91 91 memset(&vstats, 0, sizeof(vstats)); 92 - mutex_lock(&adapter->stats_lock); 92 + spin_lock(&adapter->stats_lock); 93 93 adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats); 94 - mutex_unlock(&adapter->stats_lock); 94 + spin_unlock(&adapter->stats_lock); 95 95 96 96 cntrs->vp_instance = cpu_to_be16(adapter->vport_num); 97 97 cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id); ··· 128 128 struct opa_vnic_stats vstats; 129 129 130 130 memset(&vstats, 0, sizeof(vstats)); 131 - mutex_lock(&adapter->stats_lock); 131 + spin_lock(&adapter->stats_lock); 132 132 adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats); 133 - mutex_unlock(&adapter->stats_lock); 133 + spin_unlock(&adapter->stats_lock); 134 134 135 135 cntrs->vp_instance = cpu_to_be16(adapter->vport_num); 136 136 cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+4 -2
include/rdma/ib_verbs.h
··· 1968 1968 struct ib_device *hca; 1969 1969 u8 port_num; 1970 1970 1971 + /* cleanup function must be specified */ 1972 + void (*free_rdma_netdev)(struct net_device *netdev); 1973 + 1971 1974 /* control functions */ 1972 1975 void (*set_id)(struct net_device *netdev, int id); 1973 1976 /* send packet */ ··· 2246 2243 struct ib_udata *udata); 2247 2244 int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); 2248 2245 /** 2249 - * rdma netdev operations 2246 + * rdma netdev operation 2250 2247 * 2251 2248 * Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it 2252 2249 * doesn't support the specified rdma netdev type. ··· 2258 2255 const char *name, 2259 2256 unsigned char name_assign_type, 2260 2257 void (*setup)(struct net_device *)); 2261 - void (*free_rdma_netdev)(struct net_device *netdev); 2262 2258 2263 2259 struct module *owner; 2264 2260 struct device dev;