Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/uverbs: Extend modify_qp and support packet pacing

An new uverbs command ib_uverbs_ex_modify_qp is added to support more QP
attributes. User driver should choose to call the legacy/extended API
based on input mask.

IB_USER_LAST_QP_ATTR_MASK is added to indicated the maximum bit position
which supports legacy ib_uverbs_modify_qp.
IB_USER_LEGACY_LAST_QP_ATTR_MASK indicates the maximum bit position
which supports ib_uverbs_ex_modify_qp, the value of this mask should be
updated if new mask is added later.

Along with this change, rate_limit is supported by the extended command,
user driver could use it to control packet packing.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

authored by

Bodong Wang and committed by
Doug Ledford
189aba99 528e5a1b

+144 -67
+1
drivers/infiniband/core/uverbs.h
··· 289 289 IB_UVERBS_DECLARE_EX_CMD(destroy_wq); 290 290 IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); 291 291 IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); 292 + IB_UVERBS_DECLARE_EX_CMD(modify_qp); 292 293 293 294 #endif /* UVERBS_H */
+121 -67
drivers/infiniband/core/uverbs_cmd.c
··· 2328 2328 } 2329 2329 } 2330 2330 2331 - ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, 2332 - struct ib_device *ib_dev, 2333 - const char __user *buf, int in_len, 2334 - int out_len) 2331 + static int modify_qp(struct ib_uverbs_file *file, 2332 + struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata) 2335 2333 { 2336 - struct ib_uverbs_modify_qp cmd; 2337 - struct ib_udata udata; 2338 - struct ib_qp *qp; 2339 - struct ib_qp_attr *attr; 2340 - int ret; 2341 - 2342 - if (copy_from_user(&cmd, buf, sizeof cmd)) 2343 - return -EFAULT; 2344 - 2345 - INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, 2346 - out_len); 2334 + struct ib_qp_attr *attr; 2335 + struct ib_qp *qp; 2336 + int ret; 2347 2337 2348 2338 attr = kmalloc(sizeof *attr, GFP_KERNEL); 2349 2339 if (!attr) 2350 2340 return -ENOMEM; 2351 2341 2352 - qp = idr_read_qp(cmd.qp_handle, file->ucontext); 2342 + qp = idr_read_qp(cmd->base.qp_handle, file->ucontext); 2353 2343 if (!qp) { 2354 2344 ret = -EINVAL; 2355 2345 goto out; 2356 2346 } 2357 2347 2358 - attr->qp_state = cmd.qp_state; 2359 - attr->cur_qp_state = cmd.cur_qp_state; 2360 - attr->path_mtu = cmd.path_mtu; 2361 - attr->path_mig_state = cmd.path_mig_state; 2362 - attr->qkey = cmd.qkey; 2363 - attr->rq_psn = cmd.rq_psn; 2364 - attr->sq_psn = cmd.sq_psn; 2365 - attr->dest_qp_num = cmd.dest_qp_num; 2366 - attr->qp_access_flags = cmd.qp_access_flags; 2367 - attr->pkey_index = cmd.pkey_index; 2368 - attr->alt_pkey_index = cmd.alt_pkey_index; 2369 - attr->en_sqd_async_notify = cmd.en_sqd_async_notify; 2370 - attr->max_rd_atomic = cmd.max_rd_atomic; 2371 - attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; 2372 - attr->min_rnr_timer = cmd.min_rnr_timer; 2373 - attr->port_num = cmd.port_num; 2374 - attr->timeout = cmd.timeout; 2375 - attr->retry_cnt = cmd.retry_cnt; 2376 - attr->rnr_retry = cmd.rnr_retry; 2377 - attr->alt_port_num = cmd.alt_port_num; 2378 - attr->alt_timeout = cmd.alt_timeout; 2348 + attr->qp_state = cmd->base.qp_state; 2349 + attr->cur_qp_state = cmd->base.cur_qp_state; 2350 + attr->path_mtu = cmd->base.path_mtu; 2351 + attr->path_mig_state = cmd->base.path_mig_state; 2352 + attr->qkey = cmd->base.qkey; 2353 + attr->rq_psn = cmd->base.rq_psn; 2354 + attr->sq_psn = cmd->base.sq_psn; 2355 + attr->dest_qp_num = cmd->base.dest_qp_num; 2356 + attr->qp_access_flags = cmd->base.qp_access_flags; 2357 + attr->pkey_index = cmd->base.pkey_index; 2358 + attr->alt_pkey_index = cmd->base.alt_pkey_index; 2359 + attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify; 2360 + attr->max_rd_atomic = cmd->base.max_rd_atomic; 2361 + attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic; 2362 + attr->min_rnr_timer = cmd->base.min_rnr_timer; 2363 + attr->port_num = cmd->base.port_num; 2364 + attr->timeout = cmd->base.timeout; 2365 + attr->retry_cnt = cmd->base.retry_cnt; 2366 + attr->rnr_retry = cmd->base.rnr_retry; 2367 + attr->alt_port_num = cmd->base.alt_port_num; 2368 + attr->alt_timeout = cmd->base.alt_timeout; 2369 + attr->rate_limit = cmd->rate_limit; 2379 2370 2380 - memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); 2381 - attr->ah_attr.grh.flow_label = cmd.dest.flow_label; 2382 - attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; 2383 - attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; 2384 - attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; 2385 - attr->ah_attr.dlid = cmd.dest.dlid; 2386 - attr->ah_attr.sl = cmd.dest.sl; 2387 - attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; 2388 - attr->ah_attr.static_rate = cmd.dest.static_rate; 2389 - attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; 2390 - attr->ah_attr.port_num = cmd.dest.port_num; 2371 + memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16); 2372 + attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label; 2373 + attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index; 2374 + attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit; 2375 + attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class; 2376 + attr->ah_attr.dlid = cmd->base.dest.dlid; 2377 + attr->ah_attr.sl = cmd->base.dest.sl; 2378 + attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits; 2379 + attr->ah_attr.static_rate = cmd->base.dest.static_rate; 2380 + attr->ah_attr.ah_flags = cmd->base.dest.is_global ? 2381 + IB_AH_GRH : 0; 2382 + attr->ah_attr.port_num = cmd->base.dest.port_num; 2391 2383 2392 - memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); 2393 - attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; 2394 - attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; 2395 - attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; 2396 - attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; 2397 - attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; 2398 - attr->alt_ah_attr.sl = cmd.alt_dest.sl; 2399 - attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; 2400 - attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; 2401 - attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; 2402 - attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; 2384 + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16); 2385 + attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label; 2386 + attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index; 2387 + attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit; 2388 + attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class; 2389 + attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid; 2390 + attr->alt_ah_attr.sl = cmd->base.alt_dest.sl; 2391 + attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits; 2392 + attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate; 2393 + attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ? 2394 + IB_AH_GRH : 0; 2395 + attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num; 2403 2396 2404 2397 if (qp->real_qp == qp) { 2405 - if (cmd.attr_mask & IB_QP_AV) { 2398 + if (cmd->base.attr_mask & IB_QP_AV) { 2406 2399 ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); 2407 2400 if (ret) 2408 2401 goto release_qp; 2409 2402 } 2410 2403 ret = qp->device->modify_qp(qp, attr, 2411 - modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); 2404 + modify_qp_mask(qp->qp_type, 2405 + cmd->base.attr_mask), 2406 + udata); 2412 2407 } else { 2413 - ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); 2408 + ret = ib_modify_qp(qp, attr, 2409 + modify_qp_mask(qp->qp_type, 2410 + cmd->base.attr_mask)); 2414 2411 } 2415 - 2416 - if (ret) 2417 - goto release_qp; 2418 - 2419 - ret = in_len; 2420 2412 2421 2413 release_qp: 2422 2414 put_qp_read(qp); 2423 2415 2424 2416 out: 2425 2417 kfree(attr); 2418 + 2419 + return ret; 2420 + } 2421 + 2422 + ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, 2423 + struct ib_device *ib_dev, 2424 + const char __user *buf, int in_len, 2425 + int out_len) 2426 + { 2427 + struct ib_uverbs_ex_modify_qp cmd = {}; 2428 + struct ib_udata udata; 2429 + int ret; 2430 + 2431 + if (copy_from_user(&cmd.base, buf, sizeof(cmd.base))) 2432 + return -EFAULT; 2433 + 2434 + if (cmd.base.attr_mask & 2435 + ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) 2436 + return -EOPNOTSUPP; 2437 + 2438 + INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL, 2439 + in_len - sizeof(cmd.base), out_len); 2440 + 2441 + ret = modify_qp(file, &cmd, &udata); 2442 + if (ret) 2443 + return ret; 2444 + 2445 + return in_len; 2446 + } 2447 + 2448 + int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, 2449 + struct ib_device *ib_dev, 2450 + struct ib_udata *ucore, 2451 + struct ib_udata *uhw) 2452 + { 2453 + struct ib_uverbs_ex_modify_qp cmd = {}; 2454 + int ret; 2455 + 2456 + /* 2457 + * Last bit is reserved for extending the attr_mask by 2458 + * using another field. 2459 + */ 2460 + BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); 2461 + 2462 + if (ucore->inlen < sizeof(cmd.base)) 2463 + return -EINVAL; 2464 + 2465 + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); 2466 + if (ret) 2467 + return ret; 2468 + 2469 + if (cmd.base.attr_mask & 2470 + ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) 2471 + return -EOPNOTSUPP; 2472 + 2473 + if (ucore->inlen > sizeof(cmd)) { 2474 + if (ib_is_udata_cleared(ucore, sizeof(cmd), 2475 + ucore->inlen - sizeof(cmd))) 2476 + return -EOPNOTSUPP; 2477 + } 2478 + 2479 + ret = modify_qp(file, &cmd, uhw); 2426 2480 2427 2481 return ret; 2428 2482 }
+1
drivers/infiniband/core/uverbs_main.c
··· 137 137 [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, 138 138 [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, 139 139 [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, 140 + [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, 140 141 }; 141 142 142 143 static void ib_uverbs_add_one(struct ib_device *device);
+21
include/uapi/rdma/ib_user_verbs.h
··· 37 37 #define IB_USER_VERBS_H 38 38 39 39 #include <linux/types.h> 40 + #include <rdma/ib_verbs.h> 40 41 41 42 /* 42 43 * Increment this value if any changes that break userspace ABI ··· 94 93 IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, 95 94 IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, 96 95 IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, 96 + IB_USER_VERBS_EX_CMD_MODIFY_QP = IB_USER_VERBS_CMD_MODIFY_QP, 97 97 IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, 98 98 IB_USER_VERBS_EX_CMD_DESTROY_FLOW, 99 99 IB_USER_VERBS_EX_CMD_CREATE_WQ, ··· 547 545 IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, 548 546 }; 549 547 548 + enum { 549 + IB_USER_LEGACY_LAST_QP_ATTR_MASK = IB_QP_DEST_QPN 550 + }; 551 + 552 + enum { 553 + IB_USER_LAST_QP_ATTR_MASK = IB_QP_RATE_LIMIT 554 + }; 555 + 550 556 struct ib_uverbs_ex_create_qp { 551 557 __u64 user_handle; 552 558 __u32 pd_handle; ··· 694 684 __u64 driver_data[0]; 695 685 }; 696 686 687 + struct ib_uverbs_ex_modify_qp { 688 + struct ib_uverbs_modify_qp base; 689 + __u32 rate_limit; 690 + __u32 reserved; 691 + }; 692 + 697 693 struct ib_uverbs_modify_qp_resp { 694 + }; 695 + 696 + struct ib_uverbs_ex_modify_qp_resp { 697 + __u32 comp_mask; 698 + __u32 response_length; 698 699 }; 699 700 700 701 struct ib_uverbs_destroy_qp {