Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rdma: Enable ib_alloc_cq to spread work over a device's comp_vectors

Send and Receive completion is handled on a single CPU selected at
the time each Completion Queue is allocated. Typically this is when
an initiator instantiates an RDMA transport, or when a target
accepts an RDMA connection.

Some ULPs cannot open a connection per CPU to spread completion
workload across available CPUs and MSI vectors. For such ULPs,
provide an API that allows the RDMA core to select a completion
vector based on the device's complement of available comp_vecs.

ULPs that invoke ib_alloc_cq() with only comp_vector 0 are converted
to use the new API so that their completion workloads interfere less
with each other.

Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Cc: <linux-cifs@vger.kernel.org>
Cc: <v9fs-developer@lists.sourceforge.net>
Link: https://lore.kernel.org/r/20190729171923.13428.52555.stgit@manet.1015granger.net
Signed-off-by: Doug Ledford <dledford@redhat.com>

authored by

Chuck Lever and committed by
Doug Ledford
20cf4e02 31d0e6c1

+68 -20
+28
drivers/infiniband/core/cq.c
··· 253 253 EXPORT_SYMBOL(__ib_alloc_cq_user); 254 254 255 255 /** 256 + * __ib_alloc_cq_any - allocate a completion queue 257 + * @dev: device to allocate the CQ for 258 + * @private: driver private data, accessible from cq->cq_context 259 + * @nr_cqe: number of CQEs to allocate 260 + * @poll_ctx: context to poll the CQ from 261 + * @caller: module owner name 262 + * 263 + * Attempt to spread ULP Completion Queues over each device's interrupt 264 + * vectors. A simple best-effort mechanism is used. 265 + */ 266 + struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, 267 + int nr_cqe, enum ib_poll_context poll_ctx, 268 + const char *caller) 269 + { 270 + static atomic_t counter; 271 + int comp_vector = 0; 272 + 273 + if (dev->num_comp_vectors > 1) 274 + comp_vector = 275 + atomic_inc_return(&counter) % 276 + min_t(int, dev->num_comp_vectors, num_online_cpus()); 277 + 278 + return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, 279 + caller, NULL); 280 + } 281 + EXPORT_SYMBOL(__ib_alloc_cq_any); 282 + 283 + /** 256 284 * ib_free_cq_user - free a completion queue 257 285 * @cq: completion queue to free. 258 286 * @udata: User data or NULL for kernel object
+2 -2
drivers/infiniband/ulp/srpt/ib_srpt.c
··· 1767 1767 goto out; 1768 1768 1769 1769 retry: 1770 - ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + sq_size, 1771 - 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE); 1770 + ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size, 1771 + IB_POLL_WORKQUEUE); 1772 1772 if (IS_ERR(ch->cq)) { 1773 1773 ret = PTR_ERR(ch->cq); 1774 1774 pr_err("failed to create CQ cqe= %d ret= %d\n",
+6 -4
fs/cifs/smbdirect.c
··· 1654 1654 1655 1655 info->send_cq = NULL; 1656 1656 info->recv_cq = NULL; 1657 - info->send_cq = ib_alloc_cq(info->id->device, info, 1658 - info->send_credit_target, 0, IB_POLL_SOFTIRQ); 1657 + info->send_cq = 1658 + ib_alloc_cq_any(info->id->device, info, 1659 + info->send_credit_target, IB_POLL_SOFTIRQ); 1659 1660 if (IS_ERR(info->send_cq)) { 1660 1661 info->send_cq = NULL; 1661 1662 goto alloc_cq_failed; 1662 1663 } 1663 1664 1664 - info->recv_cq = ib_alloc_cq(info->id->device, info, 1665 - info->receive_credit_max, 0, IB_POLL_SOFTIRQ); 1665 + info->recv_cq = 1666 + ib_alloc_cq_any(info->id->device, info, 1667 + info->receive_credit_max, IB_POLL_SOFTIRQ); 1666 1668 if (IS_ERR(info->recv_cq)) { 1667 1669 info->recv_cq = NULL; 1668 1670 goto alloc_cq_failed;
+19
include/rdma/ib_verbs.h
··· 3711 3711 NULL); 3712 3712 } 3713 3713 3714 + struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, 3715 + int nr_cqe, enum ib_poll_context poll_ctx, 3716 + const char *caller); 3717 + 3718 + /** 3719 + * ib_alloc_cq_any: Allocate kernel CQ 3720 + * @dev: The IB device 3721 + * @private: Private data attached to the CQE 3722 + * @nr_cqe: Number of CQEs in the CQ 3723 + * @poll_ctx: Context used for polling the CQ 3724 + */ 3725 + static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, 3726 + void *private, int nr_cqe, 3727 + enum ib_poll_context poll_ctx) 3728 + { 3729 + return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx, 3730 + KBUILD_MODNAME); 3731 + } 3732 + 3714 3733 /** 3715 3734 * ib_free_cq_user - Free kernel/user CQ 3716 3735 * @cq: The CQ to free
+3 -3
net/9p/trans_rdma.c
··· 685 685 goto error; 686 686 687 687 /* Create the Completion Queue */ 688 - rdma->cq = ib_alloc_cq(rdma->cm_id->device, client, 689 - opts.sq_depth + opts.rq_depth + 1, 690 - 0, IB_POLL_SOFTIRQ); 688 + rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client, 689 + opts.sq_depth + opts.rq_depth + 1, 690 + IB_POLL_SOFTIRQ); 691 691 if (IS_ERR(rdma->cq)) 692 692 goto error; 693 693
+4 -4
net/sunrpc/xprtrdma/svc_rdma_transport.c
··· 454 454 dprintk("svcrdma: error creating PD for connect request\n"); 455 455 goto errout; 456 456 } 457 - newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth, 458 - 0, IB_POLL_WORKQUEUE); 457 + newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth, 458 + IB_POLL_WORKQUEUE); 459 459 if (IS_ERR(newxprt->sc_sq_cq)) { 460 460 dprintk("svcrdma: error creating SQ CQ for connect request\n"); 461 461 goto errout; 462 462 } 463 - newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth, 464 - 0, IB_POLL_WORKQUEUE); 463 + newxprt->sc_rq_cq = 464 + ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE); 465 465 if (IS_ERR(newxprt->sc_rq_cq)) { 466 466 dprintk("svcrdma: error creating RQ CQ for connect request\n"); 467 467 goto errout;
+6 -7
net/sunrpc/xprtrdma/verbs.c
··· 521 521 init_waitqueue_head(&ep->rep_connect_wait); 522 522 ep->rep_receive_count = 0; 523 523 524 - sendcq = ib_alloc_cq(ia->ri_id->device, NULL, 525 - ep->rep_attr.cap.max_send_wr + 1, 526 - ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0, 527 - IB_POLL_WORKQUEUE); 524 + sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL, 525 + ep->rep_attr.cap.max_send_wr + 1, 526 + IB_POLL_WORKQUEUE); 528 527 if (IS_ERR(sendcq)) { 529 528 rc = PTR_ERR(sendcq); 530 529 goto out1; 531 530 } 532 531 533 - recvcq = ib_alloc_cq(ia->ri_id->device, NULL, 534 - ep->rep_attr.cap.max_recv_wr + 1, 535 - 0, IB_POLL_WORKQUEUE); 532 + recvcq = ib_alloc_cq_any(ia->ri_id->device, NULL, 533 + ep->rep_attr.cap.max_recv_wr + 1, 534 + IB_POLL_WORKQUEUE); 536 535 if (IS_ERR(recvcq)) { 537 536 rc = PTR_ERR(recvcq); 538 537 goto out2;