Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/cma: Add support for network namespaces

Add support for network namespaces in the ib_cma module. This is
accomplished by:

1. Adding network namespace parameter for rdma_create_id. This parameter is
used to populate the network namespace field in rdma_id_private.
rdma_create_id keeps a reference on the network namespace.
2. Using the network namespace from the rdma_id instead of init_net inside
of ib_cma, when listening on an ID and when looking for an ID for an
incoming request.
3. Decrementing the reference count for the appropriate network namespace
when calling rdma_destroy_id.

In order to preserve the current behavior init_net is passed when calling
from other modules.

Signed-off-by: Guy Shapiro <guysh@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Yotam Kenneth <yotamke@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

authored by

Guy Shapiro and committed by
Doug Ledford
fa20105e 4be74b42

+52 -34
+28 -18
drivers/infiniband/core/cma.c
··· 600 600 return 0; 601 601 } 602 602 603 - struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, 603 + struct rdma_cm_id *rdma_create_id(struct net *net, 604 + rdma_cm_event_handler event_handler, 604 605 void *context, enum rdma_port_space ps, 605 606 enum ib_qp_type qp_type) 606 607 { ··· 625 624 INIT_LIST_HEAD(&id_priv->listen_list); 626 625 INIT_LIST_HEAD(&id_priv->mc_list); 627 626 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 628 - id_priv->id.route.addr.dev_addr.net = &init_net; 627 + id_priv->id.route.addr.dev_addr.net = get_net(net); 629 628 630 629 return &id_priv->id; 631 630 } ··· 1279 1278 cma_protocol_roce(&id_priv->id); 1280 1279 1281 1280 return !addr->dev_addr.bound_dev_if || 1282 - (net_eq(dev_net(net_dev), &init_net) && 1281 + (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1283 1282 addr->dev_addr.bound_dev_if == net_dev->ifindex); 1284 1283 } 1285 1284 ··· 1340 1339 } 1341 1340 } 1342 1341 1343 - bind_list = cma_ps_find(&init_net, 1342 + bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1344 1343 rdma_ps_from_service_id(req.service_id), 1345 1344 cma_port_from_service_id(req.service_id)); 1346 1345 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); ··· 1412 1411 static void cma_release_port(struct rdma_id_private *id_priv) 1413 1412 { 1414 1413 struct rdma_bind_list *bind_list = id_priv->bind_list; 1414 + struct net *net = id_priv->id.route.addr.dev_addr.net; 1415 1415 1416 1416 if (!bind_list) 1417 1417 return; ··· 1420 1418 mutex_lock(&lock); 1421 1419 hlist_del(&id_priv->node); 1422 1420 if (hlist_empty(&bind_list->owners)) { 1423 - cma_ps_remove(&init_net, bind_list->ps, bind_list->port); 1421 + cma_ps_remove(net, bind_list->ps, bind_list->port); 1424 1422 kfree(bind_list); 1425 1423 } 1426 1424 mutex_unlock(&lock); ··· 1479 1477 cma_deref_id(id_priv->id.context); 1480 1478 1481 1479 kfree(id_priv->id.route.path_rec); 1480 + put_net(id_priv->id.route.addr.dev_addr.net); 1482 1481 kfree(id_priv); 1483 1482 } 1484 1483 EXPORT_SYMBOL(rdma_destroy_id); ··· 1610 1607 ib_event->param.req_rcvd.primary_path->service_id; 1611 1608 int ret; 1612 1609 1613 - id = rdma_create_id(listen_id->event_handler, listen_id->context, 1610 + id = rdma_create_id(listen_id->route.addr.dev_addr.net, 1611 + listen_id->event_handler, listen_id->context, 1614 1612 listen_id->ps, ib_event->param.req_rcvd.qp_type); 1615 1613 if (IS_ERR(id)) 1616 1614 return NULL; ··· 1666 1662 struct rdma_id_private *id_priv; 1667 1663 struct rdma_cm_id *id; 1668 1664 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1665 + struct net *net = listen_id->route.addr.dev_addr.net; 1669 1666 int ret; 1670 1667 1671 - id = rdma_create_id(listen_id->event_handler, listen_id->context, 1668 + id = rdma_create_id(net, listen_id->event_handler, listen_id->context, 1672 1669 listen_id->ps, IB_QPT_UD); 1673 1670 if (IS_ERR(id)) 1674 1671 return NULL; ··· 1906 1901 return -ECONNABORTED; 1907 1902 1908 1903 /* Create a new RDMA id for the new IW CM ID */ 1909 - new_cm_id = rdma_create_id(listen_id->id.event_handler, 1904 + new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, 1905 + listen_id->id.event_handler, 1910 1906 listen_id->id.context, 1911 1907 RDMA_PS_TCP, IB_QPT_RC); 1912 1908 if (IS_ERR(new_cm_id)) { ··· 2035 2029 { 2036 2030 struct rdma_id_private *dev_id_priv; 2037 2031 struct rdma_cm_id *id; 2032 + struct net *net = id_priv->id.route.addr.dev_addr.net; 2038 2033 int ret; 2039 2034 2040 2035 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2041 2036 return; 2042 2037 2043 - id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, 2038 + id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2044 2039 id_priv->id.qp_type); 2045 2040 if (IS_ERR(id)) 2046 2041 return; ··· 2715 2708 if (!bind_list) 2716 2709 return -ENOMEM; 2717 2710 2718 - ret = cma_ps_alloc(&init_net, ps, bind_list, snum); 2711 + ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 2712 + snum); 2719 2713 if (ret < 0) 2720 2714 goto err; 2721 2715 ··· 2735 2727 static unsigned int last_used_port; 2736 2728 int low, high, remaining; 2737 2729 unsigned int rover; 2730 + struct net *net = id_priv->id.route.addr.dev_addr.net; 2738 2731 2739 - inet_get_local_port_range(&init_net, &low, &high); 2732 + inet_get_local_port_range(net, &low, &high); 2740 2733 remaining = (high - low) + 1; 2741 2734 rover = prandom_u32() % remaining + low; 2742 2735 retry: 2743 2736 if (last_used_port != rover && 2744 - !cma_ps_find(&init_net, ps, (unsigned short)rover)) { 2737 + !cma_ps_find(net, ps, (unsigned short)rover)) { 2745 2738 int ret = cma_alloc_port(ps, id_priv, rover); 2746 2739 /* 2747 2740 * Remember previously used port number in order to avoid ··· 2808 2799 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 2809 2800 return -EACCES; 2810 2801 2811 - bind_list = cma_ps_find(&init_net, ps, snum); 2802 + bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 2812 2803 if (!bind_list) { 2813 2804 ret = cma_alloc_port(ps, id_priv, snum); 2814 2805 } else { ··· 3000 2991 if (addr->sa_family == AF_INET) 3001 2992 id_priv->afonly = 1; 3002 2993 #if IS_ENABLED(CONFIG_IPV6) 3003 - else if (addr->sa_family == AF_INET6) 3004 - id_priv->afonly = init_net.ipv6.sysctl.bindv6only; 2994 + else if (addr->sa_family == AF_INET6) { 2995 + struct net *net = id_priv->id.route.addr.dev_addr.net; 2996 + 2997 + id_priv->afonly = net->ipv6.sysctl.bindv6only; 2998 + } 3005 2999 #endif 3006 3000 } 3007 3001 ret = cma_get_port(id_priv); ··· 3809 3797 dev_addr = &id_priv->id.route.addr.dev_addr; 3810 3798 3811 3799 if ((dev_addr->bound_dev_if == ndev->ifindex) && 3800 + (net_eq(dev_net(ndev), dev_addr->net)) && 3812 3801 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 3813 3802 printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", 3814 3803 ndev->name, &id_priv->id); ··· 3834 3821 struct cma_device *cma_dev; 3835 3822 struct rdma_id_private *id_priv; 3836 3823 int ret = NOTIFY_DONE; 3837 - 3838 - if (dev_net(ndev) != &init_net) 3839 - return NOTIFY_DONE; 3840 3824 3841 3825 if (event != NETDEV_BONDING_FAILOVER) 3842 3826 return NOTIFY_DONE;
+2 -1
drivers/infiniband/core/ucma.c
··· 472 472 return -ENOMEM; 473 473 474 474 ctx->uid = cmd.uid; 475 - ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type); 475 + ctx->cm_id = rdma_create_id(&init_net, ucma_event_handler, ctx, cmd.ps, 476 + qp_type); 476 477 if (IS_ERR(ctx->cm_id)) { 477 478 ret = PTR_ERR(ctx->cm_id); 478 479 goto err1;
+1 -1
drivers/infiniband/ulp/iser/iser_verbs.c
··· 1017 1017 ib_conn->beacon.wr_id = ISER_BEACON_WRID; 1018 1018 ib_conn->beacon.opcode = IB_WR_SEND; 1019 1019 1020 - ib_conn->cma_id = rdma_create_id(iser_cma_handler, 1020 + ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, 1021 1021 (void *)iser_conn, 1022 1022 RDMA_PS_TCP, IB_QPT_RC); 1023 1023 if (IS_ERR(ib_conn->cma_id)) {
+1 -1
drivers/infiniband/ulp/isert/ib_isert.c
··· 3096 3096 sa = (struct sockaddr *)&np->np_sockaddr; 3097 3097 isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa); 3098 3098 3099 - id = rdma_create_id(isert_cma_handler, isert_np, 3099 + id = rdma_create_id(&init_net, isert_cma_handler, isert_np, 3100 3100 RDMA_PS_TCP, IB_QPT_RC); 3101 3101 if (IS_ERR(id)) { 3102 3102 isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id));
+3 -1
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
··· 128 128 IBLND_CREDIT_HIGHWATER_V1 : \ 129 129 *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */ 130 130 131 - #define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt) 131 + #define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \ 132 + cb, dev, \ 133 + ps, qpt) 132 134 133 135 static inline int 134 136 kiblnd_concurrent_sends_v1(void)
+5 -1
include/rdma/rdma_cm.h
··· 160 160 /** 161 161 * rdma_create_id - Create an RDMA identifier. 162 162 * 163 + * @net: The network namespace in which to create the new id. 163 164 * @event_handler: User callback invoked to report events associated with the 164 165 * returned rdma_id. 165 166 * @context: User specified context associated with the id. 166 167 * @ps: RDMA port space. 167 168 * @qp_type: type of queue pair associated with the id. 169 + * 170 + * The id holds a reference on the network namespace until it is destroyed. 168 171 */ 169 - struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, 172 + struct rdma_cm_id *rdma_create_id(struct net *net, 173 + rdma_cm_event_handler event_handler, 170 174 void *context, enum rdma_port_space ps, 171 175 enum ib_qp_type qp_type); 172 176
+2 -2
net/9p/trans_rdma.c
··· 655 655 return -ENOMEM; 656 656 657 657 /* Create the RDMA CM ID */ 658 - rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, 659 - IB_QPT_RC); 658 + rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client, 659 + RDMA_PS_TCP, IB_QPT_RC); 660 660 if (IS_ERR(rdma->cm_id)) 661 661 goto error; 662 662
+1 -1
net/rds/ib.c
··· 317 317 /* Create a CMA ID and try to bind it. This catches both 318 318 * IB and iWARP capable NICs. 319 319 */ 320 - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); 320 + cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); 321 321 if (IS_ERR(cm_id)) 322 322 return PTR_ERR(cm_id); 323 323
+1 -1
net/rds/ib_cm.c
··· 565 565 566 566 /* XXX I wonder what affect the port space has */ 567 567 /* delegate cm event handler to rdma_transport */ 568 - ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, 568 + ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn, 569 569 RDMA_PS_TCP, IB_QPT_RC); 570 570 if (IS_ERR(ic->i_cm_id)) { 571 571 ret = PTR_ERR(ic->i_cm_id);
+1 -1
net/rds/iw.c
··· 223 223 /* Create a CMA ID and try to bind it. This catches both 224 224 * IB and iWARP capable NICs. 225 225 */ 226 - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); 226 + cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); 227 227 if (IS_ERR(cm_id)) 228 228 return PTR_ERR(cm_id); 229 229
+1 -1
net/rds/iw_cm.c
··· 524 524 525 525 /* XXX I wonder what affect the port space has */ 526 526 /* delegate cm event handler to rdma_transport */ 527 - ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, 527 + ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn, 528 528 RDMA_PS_TCP, IB_QPT_RC); 529 529 if (IS_ERR(ic->i_cm_id)) { 530 530 ret = PTR_ERR(ic->i_cm_id);
+2 -2
net/rds/rdma_transport.c
··· 142 142 struct rdma_cm_id *cm_id; 143 143 int ret; 144 144 145 - cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, 146 - IB_QPT_RC); 145 + cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL, 146 + RDMA_PS_TCP, IB_QPT_RC); 147 147 if (IS_ERR(cm_id)) { 148 148 ret = PTR_ERR(cm_id); 149 149 printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
+2 -2
net/sunrpc/xprtrdma/svc_rdma_transport.c
··· 692 692 if (!cma_xprt) 693 693 return ERR_PTR(-ENOMEM); 694 694 695 - listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, 696 - IB_QPT_RC); 695 + listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt, 696 + RDMA_PS_TCP, IB_QPT_RC); 697 697 if (IS_ERR(listen_id)) { 698 698 ret = PTR_ERR(listen_id); 699 699 dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
+2 -1
net/sunrpc/xprtrdma/verbs.c
··· 432 432 433 433 init_completion(&ia->ri_done); 434 434 435 - id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); 435 + id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, 436 + IB_QPT_RC); 436 437 if (IS_ERR(id)) { 437 438 rc = PTR_ERR(id); 438 439 dprintk("RPC: %s: rdma_create_id() failed %i\n",