Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xprtrdma: Remove temp allocation of rpcrdma_rep objects

The original code was designed so that most calls to
rpcrdma_rep_create() would occur on the NUMA node that the device
preferred. There are a few cases where that's not possible, so
those reps are marked as temporary.

However, we have the device (and its preferred node) already in
rpcrdma_rep_create(), so let's use that to guarantee the memory
is allocated from the correct node.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

authored by

Chuck Lever and committed by
Anna Schumaker
0e13dd9e 9d53378c

+26 -37
+1 -2
net/sunrpc/xprtrdma/rpc_rdma.c
··· 1471 1471 credits = 1; /* don't deadlock */ 1472 1472 else if (credits > r_xprt->rx_ep->re_max_requests) 1473 1473 credits = r_xprt->rx_ep->re_max_requests; 1474 - rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1), 1475 - false); 1474 + rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1)); 1476 1475 if (buf->rb_credits != credits) 1477 1476 rpcrdma_update_cwnd(r_xprt, credits); 1478 1477
+24 -33
net/sunrpc/xprtrdma/verbs.c
··· 69 69 struct rpcrdma_sendctx *sc); 70 70 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); 71 71 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); 72 - static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); 73 72 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); 74 73 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 75 74 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); 76 75 static void rpcrdma_ep_get(struct rpcrdma_ep *ep); 77 76 static int rpcrdma_ep_put(struct rpcrdma_ep *ep); 77 + static struct rpcrdma_regbuf * 78 + rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction, 79 + int node); 78 80 static struct rpcrdma_regbuf * 79 81 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction); 80 82 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); ··· 512 510 * outstanding Receives. 513 511 */ 514 512 rpcrdma_ep_get(ep); 515 - rpcrdma_post_recvs(r_xprt, 1, true); 513 + rpcrdma_post_recvs(r_xprt, 1); 516 514 517 515 rc = rdma_connect(ep->re_id, &ep->re_remote_cma); 518 516 if (rc) ··· 945 943 } 946 944 947 945 static noinline 948 - struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, 949 - bool temp) 946 + struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt) 950 947 { 951 948 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 949 + struct rpcrdma_ep *ep = r_xprt->rx_ep; 950 + struct ib_device *device = ep->re_id->device; 952 951 struct rpcrdma_rep *rep; 953 952 954 953 rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS); 955 954 if (rep == NULL) 956 955 goto out; 957 956 958 - rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, 959 - DMA_FROM_DEVICE); 957 + rep->rr_rdmabuf = rpcrdma_regbuf_alloc_node(ep->re_inline_recv, 958 + DMA_FROM_DEVICE, 959 + ibdev_to_node(device)); 960 960 if (!rep->rr_rdmabuf) 961 961 goto out_free; 962 962 ··· 973 969 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 974 970 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 975 971 rep->rr_recv_wr.num_sge = 1; 976 - rep->rr_temp = temp; 977 972 978 973 spin_lock(&buf->rb_lock); 979 974 list_add(&rep->rr_all, &buf->rb_all_reps); ··· 989 986 { 990 987 rpcrdma_regbuf_free(rep->rr_rdmabuf); 991 988 kfree(rep); 992 - } 993 - 994 - static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) 995 - { 996 - struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf; 997 - 998 - spin_lock(&buf->rb_lock); 999 - list_del(&rep->rr_all); 1000 - spin_unlock(&buf->rb_lock); 1001 - 1002 - rpcrdma_rep_free(rep); 1003 989 } 1004 990 1005 991 static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) ··· 1022 1030 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1023 1031 struct rpcrdma_rep *rep; 1024 1032 1025 - list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { 1033 + list_for_each_entry(rep, &buf->rb_all_reps, rr_all) 1026 1034 rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); 1027 - rep->rr_temp = true; /* Mark this rep for destruction */ 1028 - } 1029 1035 } 1030 1036 1031 1037 static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) ··· 1240 1250 * or Replies they may be registered externally via frwr_map. 1241 1251 */ 1242 1252 static struct rpcrdma_regbuf * 1243 - rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) 1253 + rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction, 1254 + int node) 1244 1255 { 1245 1256 struct rpcrdma_regbuf *rb; 1246 1257 1247 - rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS); 1258 + rb = kmalloc_node(sizeof(*rb), XPRTRDMA_GFP_FLAGS, node); 1248 1259 if (!rb) 1249 1260 return NULL; 1250 - rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS); 1261 + rb->rg_data = kmalloc_node(size, XPRTRDMA_GFP_FLAGS, node); 1251 1262 if (!rb->rg_data) { 1252 1263 kfree(rb); 1253 1264 return NULL; ··· 1258 1267 rb->rg_direction = direction; 1259 1268 rb->rg_iov.length = size; 1260 1269 return rb; 1270 + } 1271 + 1272 + static struct rpcrdma_regbuf * 1273 + rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) 1274 + { 1275 + return rpcrdma_regbuf_alloc_node(size, direction, NUMA_NO_NODE); 1261 1276 } 1262 1277 1263 1278 /** ··· 1343 1346 * rpcrdma_post_recvs - Refill the Receive Queue 1344 1347 * @r_xprt: controlling transport instance 1345 1348 * @needed: current credit grant 1346 - * @temp: mark Receive buffers to be deleted after one use 1347 1349 * 1348 1350 */ 1349 - void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) 1351 + void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed) 1350 1352 { 1351 1353 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1352 1354 struct rpcrdma_ep *ep = r_xprt->rx_ep; ··· 1359 1363 if (likely(ep->re_receive_count > needed)) 1360 1364 goto out; 1361 1365 needed -= ep->re_receive_count; 1362 - if (!temp) 1363 - needed += RPCRDMA_MAX_RECV_BATCH; 1366 + needed += RPCRDMA_MAX_RECV_BATCH; 1364 1367 1365 1368 if (atomic_inc_return(&ep->re_receiving) > 1) 1366 1369 goto out; ··· 1368 1373 wr = NULL; 1369 1374 while (needed) { 1370 1375 rep = rpcrdma_rep_get_locked(buf); 1371 - if (rep && rep->rr_temp) { 1372 - rpcrdma_rep_destroy(rep); 1373 - continue; 1374 - } 1375 1376 if (!rep) 1376 - rep = rpcrdma_rep_create(r_xprt, temp); 1377 + rep = rpcrdma_rep_create(r_xprt); 1377 1378 if (!rep) 1378 1379 break; 1379 1380 if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
+1 -2
net/sunrpc/xprtrdma/xprt_rdma.h
··· 200 200 __be32 rr_proc; 201 201 int rr_wc_flags; 202 202 u32 rr_inv_rkey; 203 - bool rr_temp; 204 203 struct rpcrdma_regbuf *rr_rdmabuf; 205 204 struct rpcrdma_xprt *rr_rxprt; 206 205 struct rpc_rqst *rr_rqst; ··· 467 468 int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt); 468 469 void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt); 469 470 470 - void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp); 471 + void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed); 471 472 472 473 /* 473 474 * Buffer calls - xprtrdma/verbs.c