Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/umad: Add P_Key index support

Add support for setting the P_Key index of sent MADs and getting the
P_Key index of received MADs. This requires a change to the layout of
the ABI structure struct ib_user_mad_hdr, so to avoid breaking
compatibility, we default to the old (unchanged) ABI and add a new
ioctl IB_USER_MAD_ENABLE_PKEY that allows applications that are aware
of the new ABI to opt into using it.

We plan on switching to the new ABI by default in a year or so, and
this patch adds a warning that is printed when an application uses the
old ABI, to push people towards converting to the new ABI.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Reviewed-by: Sean Hefty <sean.hefty@intel.com>
Reviewed-by: Hal Rosenstock <hal@xsigo.com>

+136 -30
+14
Documentation/infiniband/user_mad.txt
··· 99 99 request/response pairs. The upper 32 bits are reserved for use by 100 100 the kernel and will be overwritten before a MAD is sent. 101 101 102 + P_Key Index Handling 103 + 104 + The old ib_umad interface did not allow setting the P_Key index for 105 + MADs that are sent and did not provide a way for obtaining the P_Key 106 + index of received MADs. A new layout for struct ib_user_mad_hdr 107 + with a pkey_index member has been defined; however, to preserve 108 + binary compatibility with older applications, this new layout will 109 + not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called 110 + before a file descriptor is used for anything else. 111 + 112 + In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented 113 + to 6, the new layout of struct ib_user_mad_hdr will be used by 114 + default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed. 115 + 102 116 Setting IsSM Capability Bit 103 117 104 118 To set the IsSM capability bit for a port, simply open the
+73 -29
drivers/infiniband/core/user_mad.c
··· 118 118 wait_queue_head_t recv_wait; 119 119 struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; 120 120 int agents_dead; 121 + u8 use_pkey_index; 122 + u8 already_used; 121 123 }; 122 124 123 125 struct ib_umad_packet { ··· 147 145 container_of(ref, struct ib_umad_device, ref); 148 146 149 147 kfree(dev); 148 + } 149 + 150 + static int hdr_size(struct ib_umad_file *file) 151 + { 152 + return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : 153 + sizeof (struct ib_user_mad_hdr_old); 150 154 } 151 155 152 156 /* caller must hold port->mutex at least for reading */ ··· 229 221 packet->length = mad_recv_wc->mad_len; 230 222 packet->recv_wc = mad_recv_wc; 231 223 232 - packet->mad.hdr.status = 0; 233 - packet->mad.hdr.length = sizeof (struct ib_user_mad) + 234 - mad_recv_wc->mad_len; 235 - packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); 236 - packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); 237 - packet->mad.hdr.sl = mad_recv_wc->wc->sl; 238 - packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; 224 + packet->mad.hdr.status = 0; 225 + packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; 226 + packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); 227 + packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); 228 + packet->mad.hdr.sl = mad_recv_wc->wc->sl; 229 + packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; 230 + packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; 239 231 packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); 240 232 if (packet->mad.hdr.grh_present) { 241 233 struct ib_ah_attr ah_attr; ··· 261 253 ib_free_recv_mad(mad_recv_wc); 262 254 } 263 255 264 - static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet, 265 - size_t count) 256 + static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, 257 + struct ib_umad_packet *packet, size_t count) 266 258 { 267 259 struct ib_mad_recv_buf *recv_buf; 268 260 int left, seg_payload, offset, max_seg_payload; ··· 270 262 /* We need enough room to copy the first (or only) MAD segment. */ 271 263 recv_buf = &packet->recv_wc->recv_buf; 272 264 if ((packet->length <= sizeof (*recv_buf->mad) && 273 - count < sizeof (packet->mad) + packet->length) || 265 + count < hdr_size(file) + packet->length) || 274 266 (packet->length > sizeof (*recv_buf->mad) && 275 - count < sizeof (packet->mad) + sizeof (*recv_buf->mad))) 267 + count < hdr_size(file) + sizeof (*recv_buf->mad))) 276 268 return -EINVAL; 277 269 278 - if (copy_to_user(buf, &packet->mad, sizeof (packet->mad))) 270 + if (copy_to_user(buf, &packet->mad, hdr_size(file))) 279 271 return -EFAULT; 280 272 281 - buf += sizeof (packet->mad); 273 + buf += hdr_size(file); 282 274 seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); 283 275 if (copy_to_user(buf, recv_buf->mad, seg_payload)) 284 276 return -EFAULT; ··· 288 280 * Multipacket RMPP MAD message. Copy remainder of message. 289 281 * Note that last segment may have a shorter payload. 290 282 */ 291 - if (count < sizeof (packet->mad) + packet->length) { 283 + if (count < hdr_size(file) + packet->length) { 292 284 /* 293 285 * The buffer is too small, return the first RMPP segment, 294 286 * which includes the RMPP message length. ··· 308 300 return -EFAULT; 309 301 } 310 302 } 311 - return sizeof (packet->mad) + packet->length; 303 + return hdr_size(file) + packet->length; 312 304 } 313 305 314 - static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet, 315 - size_t count) 306 + static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, 307 + struct ib_umad_packet *packet, size_t count) 316 308 { 317 - ssize_t size = sizeof (packet->mad) + packet->length; 309 + ssize_t size = hdr_size(file) + packet->length; 318 310 319 311 if (count < size) 320 312 return -EINVAL; 321 313 322 - if (copy_to_user(buf, &packet->mad, size)) 314 + if (copy_to_user(buf, &packet->mad, hdr_size(file))) 315 + return -EFAULT; 316 + 317 + buf += hdr_size(file); 318 + 319 + if (copy_to_user(buf, packet->mad.data, packet->length)) 323 320 return -EFAULT; 324 321 325 322 return size; ··· 337 324 struct ib_umad_packet *packet; 338 325 ssize_t ret; 339 326 340 - if (count < sizeof (struct ib_user_mad)) 327 + if (count < hdr_size(file)) 341 328 return -EINVAL; 342 329 343 330 spin_lock_irq(&file->recv_lock); ··· 361 348 spin_unlock_irq(&file->recv_lock); 362 349 363 350 if (packet->recv_wc) 364 - ret = copy_recv_mad(buf, packet, count); 351 + ret = copy_recv_mad(file, buf, packet, count); 365 352 else 366 - ret = copy_send_mad(buf, packet, count); 353 + ret = copy_send_mad(file, buf, packet, count); 367 354 368 355 if (ret < 0) { 369 356 /* Requeue packet */ ··· 455 442 __be64 *tid; 456 443 int ret, data_len, hdr_len, copy_offset, rmpp_active; 457 444 458 - if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) 445 + if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) 459 446 return -EINVAL; 460 447 461 448 packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); 462 449 if (!packet) 463 450 return -ENOMEM; 464 451 465 - if (copy_from_user(&packet->mad, buf, 466 - sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) { 452 + if (copy_from_user(&packet->mad, buf, hdr_size(file))) { 467 453 ret = -EFAULT; 468 454 goto err; 469 455 } ··· 470 458 if (packet->mad.hdr.id < 0 || 471 459 packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { 472 460 ret = -EINVAL; 461 + goto err; 462 + } 463 + 464 + buf += hdr_size(file); 465 + 466 + if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) { 467 + ret = -EFAULT; 473 468 goto err; 474 469 } 475 470 ··· 519 500 IB_MGMT_RMPP_FLAG_ACTIVE; 520 501 } 521 502 522 - data_len = count - sizeof (struct ib_user_mad) - hdr_len; 503 + data_len = count - hdr_size(file) - hdr_len; 523 504 packet->msg = ib_create_send_mad(agent, 524 505 be32_to_cpu(packet->mad.hdr.qpn), 525 - 0, rmpp_active, hdr_len, 526 - data_len, GFP_KERNEL); 506 + packet->mad.hdr.pkey_index, rmpp_active, 507 + hdr_len, data_len, GFP_KERNEL); 527 508 if (IS_ERR(packet->msg)) { 528 509 ret = PTR_ERR(packet->msg); 529 510 goto err_ah; ··· 536 517 537 518 /* Copy MAD header. Any RMPP header is already in place. */ 538 519 memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); 539 - buf += sizeof (struct ib_user_mad); 540 520 541 521 if (!rmpp_active) { 542 522 if (copy_from_user(packet->msg->mad + copy_offset, ··· 664 646 goto out; 665 647 } 666 648 649 + if (!file->already_used) { 650 + file->already_used = 1; 651 + if (!file->use_pkey_index) { 652 + printk(KERN_WARNING "user_mad: process %s did not enable " 653 + "P_Key index support.\n", current->comm); 654 + printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt " 655 + "has info on the new ABI.\n"); 656 + } 657 + } 658 + 667 659 file->agent[agent_id] = agent; 668 660 ret = 0; 669 661 ··· 710 682 return ret; 711 683 } 712 684 685 + static long ib_umad_enable_pkey(struct ib_umad_file *file) 686 + { 687 + int ret = 0; 688 + 689 + down_write(&file->port->mutex); 690 + if (file->already_used) 691 + ret = -EINVAL; 692 + else 693 + file->use_pkey_index = 1; 694 + up_write(&file->port->mutex); 695 + 696 + return ret; 697 + } 698 + 713 699 static long ib_umad_ioctl(struct file *filp, unsigned int cmd, 714 700 unsigned long arg) 715 701 { ··· 732 690 return ib_umad_reg_agent(filp->private_data, arg); 733 691 case IB_USER_MAD_UNREGISTER_AGENT: 734 692 return ib_umad_unreg_agent(filp->private_data, arg); 693 + case IB_USER_MAD_ENABLE_PKEY: 694 + return ib_umad_enable_pkey(filp->private_data); 735 695 default: 736 696 return -ENOIOCTLCMD; 737 697 }
+49 -1
include/rdma/ib_user_mad.h
··· 52 52 */ 53 53 54 54 /** 55 - * ib_user_mad_hdr - MAD packet header 55 + * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index 56 56 * @id - ID of agent MAD received with/to be sent with 57 57 * @status - 0 on successful receive, ETIMEDOUT if no response 58 58 * received (transaction ID in data[] will be set to TID of original ··· 71 71 * @gid - Remote GID in GRH 72 72 * @flow_label - Flow label in GRH 73 73 */ 74 + struct ib_user_mad_hdr_old { 75 + __u32 id; 76 + __u32 status; 77 + __u32 timeout_ms; 78 + __u32 retries; 79 + __u32 length; 80 + __be32 qpn; 81 + __be32 qkey; 82 + __be16 lid; 83 + __u8 sl; 84 + __u8 path_bits; 85 + __u8 grh_present; 86 + __u8 gid_index; 87 + __u8 hop_limit; 88 + __u8 traffic_class; 89 + __u8 gid[16]; 90 + __be32 flow_label; 91 + }; 92 + 93 + /** 94 + * ib_user_mad_hdr - MAD packet header 95 + * This layout allows specifying/receiving the P_Key index. To use 96 + * this capability, an application must call the 97 + * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before 98 + * any other actions with the file handle. 99 + * @id - ID of agent MAD received with/to be sent with 100 + * @status - 0 on successful receive, ETIMEDOUT if no response 101 + * received (transaction ID in data[] will be set to TID of original 102 + * request) (ignored on send) 103 + * @timeout_ms - Milliseconds to wait for response (unset on receive) 104 + * @retries - Number of automatic retries to attempt 105 + * @qpn - Remote QP number received from/to be sent to 106 + * @qkey - Remote Q_Key to be sent with (unset on receive) 107 + * @lid - Remote lid received from/to be sent to 108 + * @sl - Service level received with/to be sent with 109 + * @path_bits - Local path bits received with/to be sent with 110 + * @grh_present - If set, GRH was received/should be sent 111 + * @gid_index - Local GID index to send with (unset on receive) 112 + * @hop_limit - Hop limit in GRH 113 + * @traffic_class - Traffic class in GRH 114 + * @gid - Remote GID in GRH 115 + * @flow_label - Flow label in GRH 116 + * @pkey_index - P_Key index 117 + */ 74 118 struct ib_user_mad_hdr { 75 119 __u32 id; 76 120 __u32 status; ··· 132 88 __u8 traffic_class; 133 89 __u8 gid[16]; 134 90 __be32 flow_label; 91 + __u16 pkey_index; 92 + __u8 reserved[6]; 135 93 }; 136 94 137 95 /** ··· 179 133 struct ib_user_mad_reg_req) 180 134 181 135 #define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) 136 + 137 + #define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) 182 138 183 139 #endif /* IB_USER_MAD_H */