Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/cma: Add multicast communication support

Extend rdma_cm to support multicast communication. Multicast support
is added to the existing RDMA_PS_UDP port space, as well as a new
RDMA_PS_IPOIB port space. The latter port space allows joining the
multicast groups used by IPoIB, which enables offloading IPoIB traffic
to a separate QP. The port space determines the signature used in the
MGID when joining the group. The newly added RDMA_PS_IPOIB also
allows for unicast operations, similar to RDMA_PS_UDP.

Supporting the RDMA_PS_IPOIB requires changing how UD QPs are initialized,
since we can no longer assume that the qkey is constant. This requires
saving the Q_Key to use when attaching to a device, so that it is
available when creating the QP. The Q_Key information is exported to
the user through the existing rdma_init_qp_attr() interface.

Multicast support is also exported to userspace through the rdma_ucm.

Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by

Sean Hefty and committed by
Roland Dreier
c8f6a362 faec2f7b

+549 -52
+315 -44
drivers/infiniband/core/cma.c
··· 71 71 static DEFINE_IDR(sdp_ps); 72 72 static DEFINE_IDR(tcp_ps); 73 73 static DEFINE_IDR(udp_ps); 74 + static DEFINE_IDR(ipoib_ps); 74 75 static int next_port; 75 76 76 77 struct cma_device { ··· 117 116 struct list_head list; 118 117 struct list_head listen_list; 119 118 struct cma_device *cma_dev; 119 + struct list_head mc_list; 120 120 121 121 enum cma_state state; 122 122 spinlock_t lock; ··· 136 134 } cm_id; 137 135 138 136 u32 seq_num; 137 + u32 qkey; 139 138 u32 qp_num; 140 139 u8 srq; 140 + }; 141 + 142 + struct cma_multicast { 143 + struct rdma_id_private *id_priv; 144 + union { 145 + struct ib_sa_multicast *ib; 146 + } multicast; 147 + struct list_head list; 148 + void *context; 149 + struct sockaddr addr; 150 + u8 pad[sizeof(struct sockaddr_in6) - 151 + sizeof(struct sockaddr)]; 141 152 }; 142 153 143 154 struct cma_work { ··· 258 243 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); 259 244 } 260 245 246 + static inline int cma_is_ud_ps(enum rdma_port_space ps) 247 + { 248 + return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB); 249 + } 250 + 261 251 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 262 252 struct cma_device *cma_dev) 263 253 { ··· 285 265 id_priv->cma_dev = NULL; 286 266 } 287 267 268 + static int cma_set_qkey(struct ib_device *device, u8 port_num, 269 + enum rdma_port_space ps, 270 + struct rdma_dev_addr *dev_addr, u32 *qkey) 271 + { 272 + struct ib_sa_mcmember_rec rec; 273 + int ret = 0; 274 + 275 + switch (ps) { 276 + case RDMA_PS_UDP: 277 + *qkey = RDMA_UDP_QKEY; 278 + break; 279 + case RDMA_PS_IPOIB: 280 + ib_addr_get_mgid(dev_addr, &rec.mgid); 281 + ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec); 282 + *qkey = be32_to_cpu(rec.qkey); 283 + break; 284 + default: 285 + break; 286 + } 287 + return ret; 288 + } 289 + 288 290 static int cma_acquire_dev(struct rdma_id_private *id_priv) 289 291 { 290 - enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type; 292 + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 291 293 struct cma_device *cma_dev; 292 294 union ib_gid gid; 293 295 int ret = -ENODEV; 294 296 295 - switch (rdma_node_get_transport(dev_type)) { 297 + switch (rdma_node_get_transport(dev_addr->dev_type)) { 296 298 case RDMA_TRANSPORT_IB: 297 - ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 299 + ib_addr_get_sgid(dev_addr, &gid); 298 300 break; 299 301 case RDMA_TRANSPORT_IWARP: 300 - iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 302 + iw_addr_get_sgid(dev_addr, &gid); 301 303 break; 302 304 default: 303 305 return -ENODEV; ··· 329 287 ret = ib_find_cached_gid(cma_dev->device, &gid, 330 288 &id_priv->id.port_num, NULL); 331 289 if (!ret) { 332 - cma_attach_to_dev(id_priv, cma_dev); 290 + ret = cma_set_qkey(cma_dev->device, 291 + id_priv->id.port_num, 292 + id_priv->id.ps, dev_addr, 293 + &id_priv->qkey); 294 + if (!ret) 295 + cma_attach_to_dev(id_priv, cma_dev); 333 296 break; 334 297 } 335 298 } ··· 372 325 init_waitqueue_head(&id_priv->wait_remove); 373 326 atomic_set(&id_priv->dev_remove, 0); 374 327 INIT_LIST_HEAD(&id_priv->listen_list); 328 + INIT_LIST_HEAD(&id_priv->mc_list); 375 329 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 376 330 377 331 return &id_priv->id; 378 332 } 379 333 EXPORT_SYMBOL(rdma_create_id); 380 334 381 - static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 335 + static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 382 336 { 383 337 struct ib_qp_attr qp_attr; 384 - struct rdma_dev_addr *dev_addr; 385 - int ret; 338 + int qp_attr_mask, ret; 386 339 387 - dev_addr = &id_priv->id.route.addr.dev_addr; 388 - ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 389 - ib_addr_get_pkey(dev_addr), 390 - &qp_attr.pkey_index); 340 + qp_attr.qp_state = IB_QPS_INIT; 341 + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 391 342 if (ret) 392 343 return ret; 393 344 394 - qp_attr.qp_state = IB_QPS_INIT; 395 - qp_attr.qp_access_flags = 0; 396 - qp_attr.port_num = id_priv->id.port_num; 397 - return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS | 398 - IB_QP_PKEY_INDEX | IB_QP_PORT); 345 + ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 346 + if (ret) 347 + return ret; 348 + 349 + qp_attr.qp_state = IB_QPS_RTR; 350 + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 351 + if (ret) 352 + return ret; 353 + 354 + qp_attr.qp_state = IB_QPS_RTS; 355 + qp_attr.sq_psn = 0; 356 + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 357 + 358 + return ret; 399 359 } 400 360 401 - static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 361 + static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 402 362 { 403 363 struct ib_qp_attr qp_attr; 364 + int qp_attr_mask, ret; 404 365 405 366 qp_attr.qp_state = IB_QPS_INIT; 406 - qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; 367 + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 368 + if (ret) 369 + return ret; 407 370 408 - return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS); 371 + return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 409 372 } 410 373 411 374 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, ··· 433 376 if (IS_ERR(qp)) 434 377 return PTR_ERR(qp); 435 378 436 - switch (rdma_node_get_transport(id->device->node_type)) { 437 - case RDMA_TRANSPORT_IB: 438 - ret = cma_init_ib_qp(id_priv, qp); 439 - break; 440 - case RDMA_TRANSPORT_IWARP: 441 - ret = cma_init_iw_qp(id_priv, qp); 442 - break; 443 - default: 444 - ret = -ENOSYS; 445 - break; 446 - } 447 - 379 + if (cma_is_ud_ps(id_priv->id.ps)) 380 + ret = cma_init_ud_qp(id_priv, qp); 381 + else 382 + ret = cma_init_conn_qp(id_priv, qp); 448 383 if (ret) 449 384 goto err; 450 385 ··· 509 460 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); 510 461 } 511 462 463 + static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 464 + struct ib_qp_attr *qp_attr, int *qp_attr_mask) 465 + { 466 + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 467 + int ret; 468 + 469 + ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 470 + ib_addr_get_pkey(dev_addr), 471 + &qp_attr->pkey_index); 472 + if (ret) 473 + return ret; 474 + 475 + qp_attr->port_num = id_priv->id.port_num; 476 + *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 477 + 478 + if (cma_is_ud_ps(id_priv->id.ps)) { 479 + qp_attr->qkey = id_priv->qkey; 480 + *qp_attr_mask |= IB_QP_QKEY; 481 + } else { 482 + qp_attr->qp_access_flags = 0; 483 + *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 484 + } 485 + return 0; 486 + } 487 + 512 488 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 513 489 int *qp_attr_mask) 514 490 { 515 491 struct rdma_id_private *id_priv; 516 - int ret; 492 + int ret = 0; 517 493 518 494 id_priv = container_of(id, struct rdma_id_private, id); 519 495 switch (rdma_node_get_transport(id_priv->id.device->node_type)) { 520 496 case RDMA_TRANSPORT_IB: 521 - ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 522 - qp_attr_mask); 497 + if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps)) 498 + ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 499 + else 500 + ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 501 + qp_attr_mask); 523 502 if (qp_attr->qp_state == IB_QPS_RTR) 524 503 qp_attr->rq_psn = id_priv->seq_num; 525 504 break; 526 505 case RDMA_TRANSPORT_IWARP: 527 - ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 528 - qp_attr_mask); 506 + if (!id_priv->cm_id.iw) { 507 + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; 508 + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 509 + } else 510 + ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 511 + qp_attr_mask); 529 512 break; 530 513 default: 531 514 ret = -ENOSYS; ··· 779 698 mutex_unlock(&lock); 780 699 } 781 700 701 + static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 702 + { 703 + struct cma_multicast *mc; 704 + 705 + while (!list_empty(&id_priv->mc_list)) { 706 + mc = container_of(id_priv->mc_list.next, 707 + struct cma_multicast, list); 708 + list_del(&mc->list); 709 + ib_sa_free_multicast(mc->multicast.ib); 710 + kfree(mc); 711 + } 712 + } 713 + 782 714 void rdma_destroy_id(struct rdma_cm_id *id) 783 715 { 784 716 struct rdma_id_private *id_priv; ··· 816 722 default: 817 723 break; 818 724 } 725 + cma_leave_mc_groups(id_priv); 819 726 mutex_lock(&lock); 820 727 cma_detach_from_dev(id_priv); 821 728 } ··· 1067 972 memset(&event, 0, sizeof event); 1068 973 offset = cma_user_data_offset(listen_id->id.ps); 1069 974 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1070 - if (listen_id->id.ps == RDMA_PS_UDP) { 975 + if (cma_is_ud_ps(listen_id->id.ps)) { 1071 976 conn_id = cma_new_udp_id(&listen_id->id, ib_event); 1072 977 event.param.ud.private_data = ib_event->private_data + offset; 1073 978 event.param.ud.private_data_len = ··· 1820 1725 struct rdma_bind_list *bind_list; 1821 1726 int port, ret; 1822 1727 1823 - bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 1728 + bind_list = kmalloc(sizeof *bind_list, GFP_KERNEL); 1824 1729 if (!bind_list) 1825 1730 return -ENOMEM; 1826 1731 ··· 1942 1847 case RDMA_PS_UDP: 1943 1848 ps = &udp_ps; 1944 1849 break; 1850 + case RDMA_PS_IPOIB: 1851 + ps = &ipoib_ps; 1852 + break; 1945 1853 default: 1946 1854 return -EPROTONOSUPPORT; 1947 1855 } ··· 2059 1961 event.status = ib_event->param.sidr_rep_rcvd.status; 2060 1962 break; 2061 1963 } 2062 - if (rep->qkey != RDMA_UD_QKEY) { 1964 + if (id_priv->qkey != rep->qkey) { 2063 1965 event.event = RDMA_CM_EVENT_UNREACHABLE; 2064 1966 event.status = -EINVAL; 2065 1967 break; ··· 2258 2160 2259 2161 switch (rdma_node_get_transport(id->device->node_type)) { 2260 2162 case RDMA_TRANSPORT_IB: 2261 - if (id->ps == RDMA_PS_UDP) 2163 + if (cma_is_ud_ps(id->ps)) 2262 2164 ret = cma_resolve_ib_udp(id_priv, conn_param); 2263 2165 else 2264 2166 ret = cma_connect_ib(id_priv, conn_param); ··· 2354 2256 rep.status = status; 2355 2257 if (status == IB_SIDR_SUCCESS) { 2356 2258 rep.qp_num = id_priv->qp_num; 2357 - rep.qkey = RDMA_UD_QKEY; 2259 + rep.qkey = id_priv->qkey; 2358 2260 } 2359 2261 rep.private_data = private_data; 2360 2262 rep.private_data_len = private_data_len; ··· 2378 2280 2379 2281 switch (rdma_node_get_transport(id->device->node_type)) { 2380 2282 case RDMA_TRANSPORT_IB: 2381 - if (id->ps == RDMA_PS_UDP) 2283 + if (cma_is_ud_ps(id->ps)) 2382 2284 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 2383 2285 conn_param->private_data, 2384 2286 conn_param->private_data_len); ··· 2439 2341 2440 2342 switch (rdma_node_get_transport(id->device->node_type)) { 2441 2343 case RDMA_TRANSPORT_IB: 2442 - if (id->ps == RDMA_PS_UDP) 2344 + if (cma_is_ud_ps(id->ps)) 2443 2345 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 2444 2346 private_data, private_data_len); 2445 2347 else ··· 2489 2391 return ret; 2490 2392 } 2491 2393 EXPORT_SYMBOL(rdma_disconnect); 2394 + 2395 + static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 2396 + { 2397 + struct rdma_id_private *id_priv; 2398 + struct cma_multicast *mc = multicast->context; 2399 + struct rdma_cm_event event; 2400 + int ret; 2401 + 2402 + id_priv = mc->id_priv; 2403 + atomic_inc(&id_priv->dev_remove); 2404 + if (!cma_comp(id_priv, CMA_ADDR_BOUND) && 2405 + !cma_comp(id_priv, CMA_ADDR_RESOLVED)) 2406 + goto out; 2407 + 2408 + if (!status && id_priv->id.qp) 2409 + status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 2410 + multicast->rec.mlid); 2411 + 2412 + memset(&event, 0, sizeof event); 2413 + event.status = status; 2414 + event.param.ud.private_data = mc->context; 2415 + if (!status) { 2416 + event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 2417 + ib_init_ah_from_mcmember(id_priv->id.device, 2418 + id_priv->id.port_num, &multicast->rec, 2419 + &event.param.ud.ah_attr); 2420 + event.param.ud.qp_num = 0xFFFFFF; 2421 + event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 2422 + } else 2423 + event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 2424 + 2425 + ret = id_priv->id.event_handler(&id_priv->id, &event); 2426 + if (ret) { 2427 + cma_exch(id_priv, CMA_DESTROYING); 2428 + cma_release_remove(id_priv); 2429 + rdma_destroy_id(&id_priv->id); 2430 + return 0; 2431 + } 2432 + out: 2433 + cma_release_remove(id_priv); 2434 + return 0; 2435 + } 2436 + 2437 + static void cma_set_mgid(struct rdma_id_private *id_priv, 2438 + struct sockaddr *addr, union ib_gid *mgid) 2439 + { 2440 + unsigned char mc_map[MAX_ADDR_LEN]; 2441 + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2442 + struct sockaddr_in *sin = (struct sockaddr_in *) addr; 2443 + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 2444 + 2445 + if (cma_any_addr(addr)) { 2446 + memset(mgid, 0, sizeof *mgid); 2447 + } else if ((addr->sa_family == AF_INET6) && 2448 + ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) == 2449 + 0xFF10A01B)) { 2450 + /* IPv6 address is an SA assigned MGID. */ 2451 + memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 2452 + } else { 2453 + ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); 2454 + if (id_priv->id.ps == RDMA_PS_UDP) 2455 + mc_map[7] = 0x01; /* Use RDMA CM signature */ 2456 + mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8; 2457 + mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr); 2458 + *mgid = *(union ib_gid *) (mc_map + 4); 2459 + } 2460 + } 2461 + 2462 + static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 2463 + struct cma_multicast *mc) 2464 + { 2465 + struct ib_sa_mcmember_rec rec; 2466 + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2467 + ib_sa_comp_mask comp_mask; 2468 + int ret; 2469 + 2470 + ib_addr_get_mgid(dev_addr, &rec.mgid); 2471 + ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 2472 + &rec.mgid, &rec); 2473 + if (ret) 2474 + return ret; 2475 + 2476 + cma_set_mgid(id_priv, &mc->addr, &rec.mgid); 2477 + if (id_priv->id.ps == RDMA_PS_UDP) 2478 + rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 2479 + ib_addr_get_sgid(dev_addr, &rec.port_gid); 2480 + rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2481 + rec.join_state = 1; 2482 + 2483 + comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 2484 + IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 2485 + IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 2486 + IB_SA_MCMEMBER_REC_FLOW_LABEL | 2487 + IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 2488 + 2489 + mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 2490 + id_priv->id.port_num, &rec, 2491 + comp_mask, GFP_KERNEL, 2492 + cma_ib_mc_handler, mc); 2493 + if (IS_ERR(mc->multicast.ib)) 2494 + return PTR_ERR(mc->multicast.ib); 2495 + 2496 + return 0; 2497 + } 2498 + 2499 + int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 2500 + void *context) 2501 + { 2502 + struct rdma_id_private *id_priv; 2503 + struct cma_multicast *mc; 2504 + int ret; 2505 + 2506 + id_priv = container_of(id, struct rdma_id_private, id); 2507 + if (!cma_comp(id_priv, CMA_ADDR_BOUND) && 2508 + !cma_comp(id_priv, CMA_ADDR_RESOLVED)) 2509 + return -EINVAL; 2510 + 2511 + mc = kmalloc(sizeof *mc, GFP_KERNEL); 2512 + if (!mc) 2513 + return -ENOMEM; 2514 + 2515 + memcpy(&mc->addr, addr, ip_addr_size(addr)); 2516 + mc->context = context; 2517 + mc->id_priv = id_priv; 2518 + 2519 + spin_lock(&id_priv->lock); 2520 + list_add(&mc->list, &id_priv->mc_list); 2521 + spin_unlock(&id_priv->lock); 2522 + 2523 + switch (rdma_node_get_transport(id->device->node_type)) { 2524 + case RDMA_TRANSPORT_IB: 2525 + ret = cma_join_ib_multicast(id_priv, mc); 2526 + break; 2527 + default: 2528 + ret = -ENOSYS; 2529 + break; 2530 + } 2531 + 2532 + if (ret) { 2533 + spin_lock_irq(&id_priv->lock); 2534 + list_del(&mc->list); 2535 + spin_unlock_irq(&id_priv->lock); 2536 + kfree(mc); 2537 + } 2538 + return ret; 2539 + } 2540 + EXPORT_SYMBOL(rdma_join_multicast); 2541 + 2542 + void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 2543 + { 2544 + struct rdma_id_private *id_priv; 2545 + struct cma_multicast *mc; 2546 + 2547 + id_priv = container_of(id, struct rdma_id_private, id); 2548 + spin_lock_irq(&id_priv->lock); 2549 + list_for_each_entry(mc, &id_priv->mc_list, list) { 2550 + if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) { 2551 + list_del(&mc->list); 2552 + spin_unlock_irq(&id_priv->lock); 2553 + 2554 + if (id->qp) 2555 + ib_detach_mcast(id->qp, 2556 + &mc->multicast.ib->rec.mgid, 2557 + mc->multicast.ib->rec.mlid); 2558 + ib_sa_free_multicast(mc->multicast.ib); 2559 + kfree(mc); 2560 + return; 2561 + } 2562 + } 2563 + spin_unlock_irq(&id_priv->lock); 2564 + } 2565 + EXPORT_SYMBOL(rdma_leave_multicast); 2492 2566 2493 2567 static void cma_add_one(struct ib_device *device) 2494 2568 { ··· 2792 2522 idr_destroy(&sdp_ps); 2793 2523 idr_destroy(&tcp_ps); 2794 2524 idr_destroy(&udp_ps); 2525 + idr_destroy(&ipoib_ps); 2795 2526 } 2796 2527 2797 2528 module_init(cma_init);
+201 -3
drivers/infiniband/core/ucma.c
··· 70 70 u64 uid; 71 71 72 72 struct list_head list; 73 + struct list_head mc_list; 74 + }; 75 + 76 + struct ucma_multicast { 77 + struct ucma_context *ctx; 78 + int id; 79 + int events_reported; 80 + 81 + u64 uid; 82 + struct list_head list; 83 + struct sockaddr addr; 84 + u8 pad[sizeof(struct sockaddr_in6) - 85 + sizeof(struct sockaddr)]; 73 86 }; 74 87 75 88 struct ucma_event { 76 89 struct ucma_context *ctx; 90 + struct ucma_multicast *mc; 77 91 struct list_head list; 78 92 struct rdma_cm_id *cm_id; 79 93 struct rdma_ucm_event_resp resp; ··· 95 81 96 82 static DEFINE_MUTEX(mut); 97 83 static DEFINE_IDR(ctx_idr); 84 + static DEFINE_IDR(multicast_idr); 98 85 99 86 static inline struct ucma_context *_ucma_find_context(int id, 100 87 struct ucma_file *file) ··· 139 124 140 125 atomic_set(&ctx->ref, 1); 141 126 init_completion(&ctx->comp); 127 + INIT_LIST_HEAD(&ctx->mc_list); 142 128 ctx->file = file; 143 129 144 130 do { ··· 160 144 161 145 error: 162 146 kfree(ctx); 147 + return NULL; 148 + } 149 + 150 + static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) 151 + { 152 + struct ucma_multicast *mc; 153 + int ret; 154 + 155 + mc = kzalloc(sizeof(*mc), GFP_KERNEL); 156 + if (!mc) 157 + return NULL; 158 + 159 + do { 160 + ret = idr_pre_get(&multicast_idr, GFP_KERNEL); 161 + if (!ret) 162 + goto error; 163 + 164 + mutex_lock(&mut); 165 + ret = idr_get_new(&multicast_idr, mc, &mc->id); 166 + mutex_unlock(&mut); 167 + } while (ret == -EAGAIN); 168 + 169 + if (ret) 170 + goto error; 171 + 172 + mc->ctx = ctx; 173 + list_add_tail(&mc->list, &ctx->mc_list); 174 + return mc; 175 + 176 + error: 177 + kfree(mc); 163 178 return NULL; 164 179 } 165 180 ··· 227 180 struct ucma_event *uevent) 228 181 { 229 182 uevent->ctx = ctx; 230 - uevent->resp.uid = ctx->uid; 231 - uevent->resp.id = ctx->id; 183 + switch (event->event) { 184 + case RDMA_CM_EVENT_MULTICAST_JOIN: 185 + case RDMA_CM_EVENT_MULTICAST_ERROR: 186 + uevent->mc = (struct ucma_multicast *) 187 + event->param.ud.private_data; 188 + uevent->resp.uid = uevent->mc->uid; 189 + uevent->resp.id = uevent->mc->id; 190 + break; 191 + default: 192 + uevent->resp.uid = ctx->uid; 193 + uevent->resp.id = ctx->id; 194 + break; 195 + } 232 196 } 233 197 234 198 static int ucma_event_handler(struct rdma_cm_id *cm_id, ··· 257 199 ucma_set_event_context(ctx, event, uevent); 258 200 uevent->resp.event = event->event; 259 201 uevent->resp.status = event->status; 260 - if (cm_id->ps == RDMA_PS_UDP) 202 + if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB) 261 203 ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); 262 204 else 263 205 ucma_copy_conn_event(&uevent->resp.param.conn, ··· 348 290 349 291 list_del(&uevent->list); 350 292 uevent->ctx->events_reported++; 293 + if (uevent->mc) 294 + uevent->mc->events_reported++; 351 295 kfree(uevent); 352 296 done: 353 297 mutex_unlock(&file->mut); ··· 402 342 return ret; 403 343 } 404 344 345 + static void ucma_cleanup_multicast(struct ucma_context *ctx) 346 + { 347 + struct ucma_multicast *mc, *tmp; 348 + 349 + mutex_lock(&mut); 350 + list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { 351 + list_del(&mc->list); 352 + idr_remove(&multicast_idr, mc->id); 353 + kfree(mc); 354 + } 355 + mutex_unlock(&mut); 356 + } 357 + 405 358 static void ucma_cleanup_events(struct ucma_context *ctx) 406 359 { 407 360 struct ucma_event *uevent, *tmp; ··· 433 360 } 434 361 } 435 362 363 + static void ucma_cleanup_mc_events(struct ucma_multicast *mc) 364 + { 365 + struct ucma_event *uevent, *tmp; 366 + 367 + list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { 368 + if (uevent->mc != mc) 369 + continue; 370 + 371 + list_del(&uevent->list); 372 + kfree(uevent); 373 + } 374 + } 375 + 436 376 static int ucma_free_ctx(struct ucma_context *ctx) 437 377 { 438 378 int events_reported; 439 379 440 380 /* No new events will be generated after destroying the id. */ 441 381 rdma_destroy_id(ctx->cm_id); 382 + 383 + ucma_cleanup_multicast(ctx); 442 384 443 385 /* Cleanup events not yet reported to the user. */ 444 386 mutex_lock(&ctx->file->mut); ··· 819 731 return ret; 820 732 } 821 733 734 + static ssize_t ucma_join_multicast(struct ucma_file *file, 735 + const char __user *inbuf, 736 + int in_len, int out_len) 737 + { 738 + struct rdma_ucm_join_mcast cmd; 739 + struct rdma_ucm_create_id_resp resp; 740 + struct ucma_context *ctx; 741 + struct ucma_multicast *mc; 742 + int ret; 743 + 744 + if (out_len < sizeof(resp)) 745 + return -ENOSPC; 746 + 747 + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 748 + return -EFAULT; 749 + 750 + ctx = ucma_get_ctx(file, cmd.id); 751 + if (IS_ERR(ctx)) 752 + return PTR_ERR(ctx); 753 + 754 + mutex_lock(&file->mut); 755 + mc = ucma_alloc_multicast(ctx); 756 + if (IS_ERR(mc)) { 757 + ret = PTR_ERR(mc); 758 + goto err1; 759 + } 760 + 761 + mc->uid = cmd.uid; 762 + memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr); 763 + ret = rdma_join_multicast(ctx->cm_id, &mc->addr, mc); 764 + if (ret) 765 + goto err2; 766 + 767 + resp.id = mc->id; 768 + if (copy_to_user((void __user *)(unsigned long)cmd.response, 769 + &resp, sizeof(resp))) { 770 + ret = -EFAULT; 771 + goto err3; 772 + } 773 + 774 + mutex_unlock(&file->mut); 775 + ucma_put_ctx(ctx); 776 + return 0; 777 + 778 + err3: 779 + rdma_leave_multicast(ctx->cm_id, &mc->addr); 780 + ucma_cleanup_mc_events(mc); 781 + err2: 782 + mutex_lock(&mut); 783 + idr_remove(&multicast_idr, mc->id); 784 + mutex_unlock(&mut); 785 + list_del(&mc->list); 786 + kfree(mc); 787 + err1: 788 + mutex_unlock(&file->mut); 789 + ucma_put_ctx(ctx); 790 + return ret; 791 + } 792 + 793 + static ssize_t ucma_leave_multicast(struct ucma_file *file, 794 + const char __user *inbuf, 795 + int in_len, int out_len) 796 + { 797 + struct rdma_ucm_destroy_id cmd; 798 + struct rdma_ucm_destroy_id_resp resp; 799 + struct ucma_multicast *mc; 800 + int ret = 0; 801 + 802 + if (out_len < sizeof(resp)) 803 + return -ENOSPC; 804 + 805 + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 806 + return -EFAULT; 807 + 808 + mutex_lock(&mut); 809 + mc = idr_find(&multicast_idr, cmd.id); 810 + if (!mc) 811 + mc = ERR_PTR(-ENOENT); 812 + else if (mc->ctx->file != file) 813 + mc = ERR_PTR(-EINVAL); 814 + else { 815 + idr_remove(&multicast_idr, mc->id); 816 + atomic_inc(&mc->ctx->ref); 817 + } 818 + mutex_unlock(&mut); 819 + 820 + if (IS_ERR(mc)) { 821 + ret = PTR_ERR(mc); 822 + goto out; 823 + } 824 + 825 + rdma_leave_multicast(mc->ctx->cm_id, &mc->addr); 826 + mutex_lock(&mc->ctx->file->mut); 827 + ucma_cleanup_mc_events(mc); 828 + list_del(&mc->list); 829 + mutex_unlock(&mc->ctx->file->mut); 830 + 831 + ucma_put_ctx(mc->ctx); 832 + resp.events_reported = mc->events_reported; 833 + kfree(mc); 834 + 835 + if (copy_to_user((void __user *)(unsigned long)cmd.response, 836 + &resp, sizeof(resp))) 837 + ret = -EFAULT; 838 + out: 839 + return ret; 840 + } 841 + 822 842 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 823 843 const char __user *inbuf, 824 844 int in_len, int out_len) = { ··· 946 750 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 947 751 [RDMA_USER_CM_CMD_SET_OPTION] = NULL, 948 752 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 753 + [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, 754 + [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 949 755 }; 950 756 951 757 static ssize_t ucma_write(struct file *filp, const char __user *buf,
+20 -1
include/rdma/rdma_cm.h
··· 52 52 RDMA_CM_EVENT_ESTABLISHED, 53 53 RDMA_CM_EVENT_DISCONNECTED, 54 54 RDMA_CM_EVENT_DEVICE_REMOVAL, 55 + RDMA_CM_EVENT_MULTICAST_JOIN, 56 + RDMA_CM_EVENT_MULTICAST_ERROR 55 57 }; 56 58 57 59 enum rdma_port_space { 58 60 RDMA_PS_SDP = 0x0001, 61 + RDMA_PS_IPOIB= 0x0002, 59 62 RDMA_PS_TCP = 0x0106, 60 63 RDMA_PS_UDP = 0x0111, 61 64 RDMA_PS_SCTP = 0x0183 ··· 297 294 */ 298 295 int rdma_disconnect(struct rdma_cm_id *id); 299 296 300 - #endif /* RDMA_CM_H */ 297 + /** 298 + * rdma_join_multicast - Join the multicast group specified by the given 299 + * address. 300 + * @id: Communication identifier associated with the request. 301 + * @addr: Multicast address identifying the group to join. 302 + * @context: User-defined context associated with the join request, returned 303 + * to the user through the private_data pointer in multicast events. 304 + */ 305 + int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 306 + void *context); 301 307 308 + /** 309 + * rdma_leave_multicast - Leave the multicast group specified by the given 310 + * address. 311 + */ 312 + void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr); 313 + 314 + #endif /* RDMA_CM_H */
+2 -2
include/rdma/rdma_cm_ib.h
··· 44 44 int rdma_set_ib_paths(struct rdma_cm_id *id, 45 45 struct ib_sa_path_rec *path_rec, int num_paths); 46 46 47 - /* Global qkey for UD QPs and multicast groups. */ 48 - #define RDMA_UD_QKEY 0x01234567 47 + /* Global qkey for UDP QPs and multicast groups. */ 48 + #define RDMA_UDP_QKEY 0x01234567 49 49 50 50 #endif /* RDMA_CM_IB_H */
+11 -2
include/rdma/rdma_user_cm.h
··· 38 38 #include <rdma/ib_user_verbs.h> 39 39 #include <rdma/ib_user_sa.h> 40 40 41 - #define RDMA_USER_CM_ABI_VERSION 3 41 + #define RDMA_USER_CM_ABI_VERSION 4 42 42 43 43 #define RDMA_MAX_PRIVATE_DATA 256 44 44 ··· 58 58 RDMA_USER_CM_CMD_GET_EVENT, 59 59 RDMA_USER_CM_CMD_GET_OPTION, 60 60 RDMA_USER_CM_CMD_SET_OPTION, 61 - RDMA_USER_CM_CMD_NOTIFY 61 + RDMA_USER_CM_CMD_NOTIFY, 62 + RDMA_USER_CM_CMD_JOIN_MCAST, 63 + RDMA_USER_CM_CMD_LEAVE_MCAST 62 64 }; 63 65 64 66 /* ··· 188 186 struct rdma_ucm_notify { 189 187 __u32 id; 190 188 __u32 event; 189 + }; 190 + 191 + struct rdma_ucm_join_mcast { 192 + __u64 response; /* rdma_ucm_create_id_resp */ 193 + __u64 uid; 194 + struct sockaddr_in6 addr; 195 + __u32 id; 191 196 }; 192 197 193 198 struct rdma_ucm_get_event {