Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

svcrdma: Convert rdma->sc_rw_ctxts to llist

Relieve contention on sc_rw_ctxt_lock by converting rdma->sc_rw_ctxts
to an llist.

The goal is to reduce the average overhead of Send completions,
because a transport's completion handlers are single-threaded on
one CPU core. This change reduces CPU utilization of each Send
completion by 2-3% on my server.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-By: Tom Talpey <tom@talpey.com>

+37 -16
+1 -1
include/linux/sunrpc/svc_rdma.h
··· 92 92 spinlock_t sc_send_lock; 93 93 struct llist_head sc_send_ctxts; 94 94 spinlock_t sc_rw_ctxt_lock; 95 - struct list_head sc_rw_ctxts; 95 + struct llist_head sc_rw_ctxts; 96 96 97 97 u32 sc_pending_recvs; 98 98 u32 sc_recv_batch;
+35 -14
net/sunrpc/xprtrdma/svc_rdma_rw.c
··· 35 35 * controlling svcxprt_rdma is destroyed. 36 36 */ 37 37 struct svc_rdma_rw_ctxt { 38 + struct llist_node rw_node; 38 39 struct list_head rw_list; 39 40 struct rdma_rw_ctx rw_ctx; 40 41 unsigned int rw_nents; ··· 54 53 svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) 55 54 { 56 55 struct svc_rdma_rw_ctxt *ctxt; 56 + struct llist_node *node; 57 57 58 58 spin_lock(&rdma->sc_rw_ctxt_lock); 59 - 60 - ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts); 61 - if (ctxt) { 62 - list_del(&ctxt->rw_list); 63 - spin_unlock(&rdma->sc_rw_ctxt_lock); 59 + node = llist_del_first(&rdma->sc_rw_ctxts); 60 + spin_unlock(&rdma->sc_rw_ctxt_lock); 61 + if (node) { 62 + ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); 64 63 } else { 65 - spin_unlock(&rdma->sc_rw_ctxt_lock); 66 64 ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), 67 65 GFP_KERNEL); 68 66 if (!ctxt) 69 67 goto out_noctx; 68 + 70 69 INIT_LIST_HEAD(&ctxt->rw_list); 71 70 } 72 71 ··· 84 83 return NULL; 85 84 } 86 85 86 + static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, 87 + struct svc_rdma_rw_ctxt *ctxt, 88 + struct llist_head *list) 89 + { 90 + sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE); 91 + llist_add(&ctxt->rw_node, list); 92 + } 93 + 87 94 static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, 88 95 struct svc_rdma_rw_ctxt *ctxt) 89 96 { 90 - sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE); 91 - 92 - spin_lock(&rdma->sc_rw_ctxt_lock); 93 - list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts); 94 - spin_unlock(&rdma->sc_rw_ctxt_lock); 97 + __svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts); 95 98 } 96 99 97 100 /** ··· 106 101 void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma) 107 102 { 108 103 struct svc_rdma_rw_ctxt *ctxt; 104 + struct llist_node *node; 109 105 110 - while ((ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts)) != NULL) { 111 - list_del(&ctxt->rw_list); 106 + while ((node = llist_del_first(&rdma->sc_rw_ctxts)) != NULL) { 107 + ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); 112 108 kfree(ctxt); 113 109 } 114 110 } ··· 177 171 cc->cc_sqecount = 0; 178 172 } 179 173 174 + /* 175 + * The consumed rw_ctx's are cleaned and placed on a local llist so 176 + * that only one atomic llist operation is needed to put them all 177 + * back on the free list. 178 + */ 180 179 static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, 181 180 enum dma_data_direction dir) 182 181 { 183 182 struct svcxprt_rdma *rdma = cc->cc_rdma; 183 + struct llist_node *first, *last; 184 184 struct svc_rdma_rw_ctxt *ctxt; 185 + LLIST_HEAD(free); 185 186 187 + first = last = NULL; 186 188 while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) { 187 189 list_del(&ctxt->rw_list); 188 190 189 191 rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp, 190 192 rdma->sc_port_num, ctxt->rw_sg_table.sgl, 191 193 ctxt->rw_nents, dir); 192 - svc_rdma_put_rw_ctxt(rdma, ctxt); 194 + __svc_rdma_put_rw_ctxt(rdma, ctxt, &free); 195 + 196 + ctxt->rw_node.next = first; 197 + first = &ctxt->rw_node; 198 + if (!last) 199 + last = first; 193 200 } 201 + if (first) 202 + llist_add_batch(first, last, &rdma->sc_rw_ctxts); 194 203 } 195 204 196 205 /* State for sending a Write or Reply chunk.
+1 -1
net/sunrpc/xprtrdma/svc_rdma_transport.c
··· 138 138 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); 139 139 init_llist_head(&cma_xprt->sc_send_ctxts); 140 140 init_llist_head(&cma_xprt->sc_recv_ctxts); 141 - INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts); 141 + init_llist_head(&cma_xprt->sc_rw_ctxts); 142 142 init_waitqueue_head(&cma_xprt->sc_send_wait); 143 143 144 144 spin_lock_init(&cma_xprt->sc_lock);