Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/vmw_pvrdma: Use resource ids from physical device if available

This change allows the RDMA stack to use physical resource numbers if they
are passed up from the device. This is accomplished by separating the
concept of the QP number from the QP handle. Previously, the two were the
same, as the QP number was exposed to the guest and also used to reference
a virtual QP in the device backend.

With physical resource numbers exposed, the QP number given to the guest
is the number assigned from the physical HCA's QP, while the QP handle is
still the internal handle used to reference a virtual QP. Regardless of
whether the device is exposing physical ids, the driver will still try to
pick up the QP handle from the backend if possible. The MR keys exposed to
the guest will also be the MR keys created by the physical HCA, instead of
virtual MR keys. The distinction between handle and keys is already
present for MRs so there is no need to do anything special here.

A new version of the create QP response has been added to the device API
to pass up the QP number and handle. The driver will also report these to
userspace in the udata response if userspace supports it or not create the
queuepair if not. I also had to do a refactor of the destroy qp code to
reuse it if we fail to copy to userspace.

Link: https://lore.kernel.org/r/20191028181444.19448-1-aditr@vmware.com
Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Signed-off-by: Bryan Tan <bryantan@vmware.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

authored by

Bryan Tan and committed by
Jason Gunthorpe
a52dc3a1 c9121262

+106 -29
+14 -1
drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
··· 58 58 #define PVRDMA_ROCEV1_VERSION 17 59 59 #define PVRDMA_ROCEV2_VERSION 18 60 60 #define PVRDMA_PPN64_VERSION 19 61 - #define PVRDMA_VERSION PVRDMA_PPN64_VERSION 61 + #define PVRDMA_QPHANDLE_VERSION 20 62 + #define PVRDMA_VERSION PVRDMA_QPHANDLE_VERSION 62 63 63 64 #define PVRDMA_BOARD_ID 1 64 65 #define PVRDMA_REV_ID 1 ··· 582 581 u32 max_inline_data; 583 582 }; 584 583 584 + struct pvrdma_cmd_create_qp_resp_v2 { 585 + struct pvrdma_cmd_resp_hdr hdr; 586 + u32 qpn; 587 + u32 qp_handle; 588 + u32 max_send_wr; 589 + u32 max_recv_wr; 590 + u32 max_send_sge; 591 + u32 max_recv_sge; 592 + u32 max_inline_data; 593 + }; 594 + 585 595 struct pvrdma_cmd_modify_qp { 586 596 struct pvrdma_cmd_hdr hdr; 587 597 u32 qp_handle; ··· 675 663 struct pvrdma_cmd_create_cq_resp create_cq_resp; 676 664 struct pvrdma_cmd_resize_cq_resp resize_cq_resp; 677 665 struct pvrdma_cmd_create_qp_resp create_qp_resp; 666 + struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2; 678 667 struct pvrdma_cmd_query_qp_resp query_qp_resp; 679 668 struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; 680 669 struct pvrdma_cmd_create_srq_resp create_srq_resp;
+87 -28
drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
··· 52 52 53 53 #include "pvrdma.h" 54 54 55 + static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, 56 + struct pvrdma_qp *qp); 57 + 55 58 static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq, 56 59 struct pvrdma_cq **recv_cq) 57 60 { ··· 198 195 union pvrdma_cmd_resp rsp; 199 196 struct pvrdma_cmd_create_qp *cmd = &req.create_qp; 200 197 struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp; 198 + struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2; 201 199 struct pvrdma_create_qp ucmd; 200 + struct pvrdma_create_qp_resp qp_resp = {}; 202 201 unsigned long flags; 203 202 int ret; 204 203 bool is_srq = !!init_attr->srq; ··· 262 257 263 258 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { 264 259 ret = -EFAULT; 260 + goto err_qp; 261 + } 262 + 263 + /* Userspace supports qpn and qp handles? */ 264 + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION && 265 + udata->outlen < sizeof(qp_resp)) { 266 + dev_warn(&dev->pdev->dev, 267 + "create queuepair not supported\n"); 268 + ret = -EOPNOTSUPP; 265 269 goto err_qp; 266 270 } 267 271 ··· 393 379 } 394 380 395 381 /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */ 396 - qp->qp_handle = resp->qpn; 397 382 qp->port = init_attr->port_num; 398 - qp->ibqp.qp_num = resp->qpn; 383 + 384 + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) { 385 + qp->ibqp.qp_num = resp_v2->qpn; 386 + qp->qp_handle = resp_v2->qp_handle; 387 + } else { 388 + qp->ibqp.qp_num = resp->qpn; 389 + qp->qp_handle = resp->qpn; 390 + } 391 + 399 392 spin_lock_irqsave(&dev->qp_tbl_lock, flags); 400 393 dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp; 401 394 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); 395 + 396 + if (udata) { 397 + qp_resp.qpn = qp->ibqp.qp_num; 398 + qp_resp.qp_handle = qp->qp_handle; 399 + 400 + if (ib_copy_to_udata(udata, &qp_resp, 401 + min(udata->outlen, sizeof(qp_resp)))) { 402 + dev_warn(&dev->pdev->dev, 403 + "failed to copy back udata\n"); 404 + __pvrdma_destroy_qp(dev, qp); 405 + return ERR_PTR(-EINVAL); 406 + } 407 + } 402 408 403 409 return &qp->ibqp; 404 410 ··· 434 400 return ERR_PTR(ret); 435 401 } 436 402 437 - static void pvrdma_free_qp(struct pvrdma_qp *qp) 403 + static void _pvrdma_free_qp(struct pvrdma_qp *qp) 438 404 { 405 + unsigned long flags; 439 406 struct pvrdma_dev *dev = to_vdev(qp->ibqp.device); 440 - struct pvrdma_cq *scq; 441 - struct pvrdma_cq *rcq; 442 - unsigned long flags, scq_flags, rcq_flags; 443 - 444 - /* In case cq is polling */ 445 - get_cqs(qp, &scq, &rcq); 446 - pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); 447 - 448 - _pvrdma_flush_cqe(qp, scq); 449 - if (scq != rcq) 450 - _pvrdma_flush_cqe(qp, rcq); 451 407 452 408 spin_lock_irqsave(&dev->qp_tbl_lock, flags); 453 409 dev->qp_tbl[qp->qp_handle] = NULL; 454 410 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); 455 - 456 - pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); 457 411 458 412 if (refcount_dec_and_test(&qp->refcnt)) 459 413 complete(&qp->free); ··· 457 435 atomic_dec(&dev->num_qps); 458 436 } 459 437 460 - /** 461 - * pvrdma_destroy_qp - destroy a queue pair 462 - * @qp: the queue pair to destroy 463 - * @udata: user data or null for kernel object 464 - * 465 - * @return: 0 on success. 466 - */ 467 - int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) 438 + static void pvrdma_free_qp(struct pvrdma_qp *qp) 468 439 { 469 - struct pvrdma_qp *vqp = to_vqp(qp); 440 + struct pvrdma_cq *scq; 441 + struct pvrdma_cq *rcq; 442 + unsigned long scq_flags, rcq_flags; 443 + 444 + /* In case cq is polling */ 445 + get_cqs(qp, &scq, &rcq); 446 + pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); 447 + 448 + _pvrdma_flush_cqe(qp, scq); 449 + if (scq != rcq) 450 + _pvrdma_flush_cqe(qp, rcq); 451 + 452 + /* 453 + * We're now unlocking the CQs before clearing out the qp handle this 454 + * should still be safe. We have destroyed the backend QP and flushed 455 + * the CQEs so there should be no other completions for this QP. 456 + */ 457 + pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); 458 + 459 + _pvrdma_free_qp(qp); 460 + } 461 + 462 + static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev, 463 + u32 qp_handle) 464 + { 470 465 union pvrdma_cmd_req req; 471 466 struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp; 472 467 int ret; 473 468 474 469 memset(cmd, 0, sizeof(*cmd)); 475 470 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP; 476 - cmd->qp_handle = vqp->qp_handle; 471 + cmd->qp_handle = qp_handle; 477 472 478 - ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0); 473 + ret = pvrdma_cmd_post(dev, &req, NULL, 0); 479 474 if (ret < 0) 480 - dev_warn(&to_vdev(qp->device)->pdev->dev, 475 + dev_warn(&dev->pdev->dev, 481 476 "destroy queuepair failed, error: %d\n", ret); 477 + } 482 478 479 + /** 480 + * pvrdma_destroy_qp - destroy a queue pair 481 + * @qp: the queue pair to destroy 482 + * @udata: user data or null for kernel object 483 + * 484 + * @return: always 0. 485 + */ 486 + int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) 487 + { 488 + struct pvrdma_qp *vqp = to_vqp(qp); 489 + 490 + _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle); 483 491 pvrdma_free_qp(vqp); 484 492 485 493 return 0; 494 + } 495 + 496 + static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, 497 + struct pvrdma_qp *qp) 498 + { 499 + _pvrdma_destroy_qp_work(dev, qp->qp_handle); 500 + _pvrdma_free_qp(qp); 486 501 } 487 502 488 503 /**
+5
include/uapi/rdma/vmw_pvrdma-abi.h
··· 179 179 __aligned_u64 qp_addr; 180 180 }; 181 181 182 + struct pvrdma_create_qp_resp { 183 + __u32 qpn; 184 + __u32 qp_handle; 185 + }; 186 + 182 187 /* PVRDMA masked atomic compare and swap */ 183 188 struct pvrdma_ex_cmp_swap { 184 189 __aligned_u64 swap_val;