Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfs-rdma-for-5.5-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

NFSoRDMA Client Updates for Linux 5.5

New Features:
- New tracepoints for congestion control and Local Invalidate WRs

Bugfixes and Cleanups:
- Eliminate log noise in call_reserveresult
- Fix unstable connections after a reconnect
- Clean up some code duplication
- Close race between waking a sender and posting a receive
- Fix MR list corruption, and clean up MR usage
- Remove unused rpcrdma_sendctx fields
- Try to avoid DMA mapping pages if it is too costly
- Wake pending tasks if connection fails
- Replace some dprintk()s with tracepoints

+661 -409
+138 -72
include/trace/events/rpcrdma.h
··· 85 85 ), \ 86 86 TP_ARGS(r_xprt)) 87 87 88 + DECLARE_EVENT_CLASS(xprtrdma_connect_class, 89 + TP_PROTO( 90 + const struct rpcrdma_xprt *r_xprt, 91 + int rc 92 + ), 93 + 94 + TP_ARGS(r_xprt, rc), 95 + 96 + TP_STRUCT__entry( 97 + __field(const void *, r_xprt) 98 + __field(int, rc) 99 + __field(int, connect_status) 100 + __string(addr, rpcrdma_addrstr(r_xprt)) 101 + __string(port, rpcrdma_portstr(r_xprt)) 102 + ), 103 + 104 + TP_fast_assign( 105 + __entry->r_xprt = r_xprt; 106 + __entry->rc = rc; 107 + __entry->connect_status = r_xprt->rx_ep.rep_connected; 108 + __assign_str(addr, rpcrdma_addrstr(r_xprt)); 109 + __assign_str(port, rpcrdma_portstr(r_xprt)); 110 + ), 111 + 112 + TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connect status=%d", 113 + __get_str(addr), __get_str(port), __entry->r_xprt, 114 + __entry->rc, __entry->connect_status 115 + ) 116 + ); 117 + 118 + #define DEFINE_CONN_EVENT(name) \ 119 + DEFINE_EVENT(xprtrdma_connect_class, xprtrdma_##name, \ 120 + TP_PROTO( \ 121 + const struct rpcrdma_xprt *r_xprt, \ 122 + int rc \ 123 + ), \ 124 + TP_ARGS(r_xprt, rc)) 125 + 88 126 DECLARE_EVENT_CLASS(xprtrdma_rdch_event, 89 127 TP_PROTO( 90 128 const struct rpc_task *task, ··· 371 333 ) 372 334 ); 373 335 374 - TRACE_EVENT(xprtrdma_disconnect, 336 + TRACE_EVENT(xprtrdma_inline_thresh, 375 337 TP_PROTO( 376 - const struct rpcrdma_xprt *r_xprt, 377 - int status 338 + const struct rpcrdma_xprt *r_xprt 378 339 ), 379 340 380 - TP_ARGS(r_xprt, status), 341 + TP_ARGS(r_xprt), 381 342 382 343 TP_STRUCT__entry( 383 344 __field(const void *, r_xprt) 384 - __field(int, status) 385 - __field(int, connected) 345 + __field(unsigned int, inline_send) 346 + __field(unsigned int, inline_recv) 347 + __field(unsigned int, max_send) 348 + __field(unsigned int, max_recv) 349 + __string(addr, rpcrdma_addrstr(r_xprt)) 350 + __string(port, rpcrdma_portstr(r_xprt)) 351 + ), 352 + 353 + TP_fast_assign( 354 + const struct rpcrdma_ep *ep = &r_xprt->rx_ep; 355 + 356 + __entry->r_xprt = r_xprt; 357 + __entry->inline_send = ep->rep_inline_send; 358 + __entry->inline_recv = ep->rep_inline_recv; 359 + __entry->max_send = ep->rep_max_inline_send; 360 + __entry->max_recv = ep->rep_max_inline_recv; 361 + __assign_str(addr, rpcrdma_addrstr(r_xprt)); 362 + __assign_str(port, rpcrdma_portstr(r_xprt)); 363 + ), 364 + 365 + TP_printk("peer=[%s]:%s r_xprt=%p neg send/recv=%u/%u, calc send/recv=%u/%u", 366 + __get_str(addr), __get_str(port), __entry->r_xprt, 367 + __entry->inline_send, __entry->inline_recv, 368 + __entry->max_send, __entry->max_recv 369 + ) 370 + ); 371 + 372 + DEFINE_CONN_EVENT(connect); 373 + DEFINE_CONN_EVENT(disconnect); 374 + 375 + DEFINE_RXPRT_EVENT(xprtrdma_create); 376 + DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); 377 + DEFINE_RXPRT_EVENT(xprtrdma_remove); 378 + DEFINE_RXPRT_EVENT(xprtrdma_reinsert); 379 + DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); 380 + DEFINE_RXPRT_EVENT(xprtrdma_op_close); 381 + DEFINE_RXPRT_EVENT(xprtrdma_op_setport); 382 + 383 + TRACE_EVENT(xprtrdma_op_connect, 384 + TP_PROTO( 385 + const struct rpcrdma_xprt *r_xprt, 386 + unsigned long delay 387 + ), 388 + 389 + TP_ARGS(r_xprt, delay), 390 + 391 + TP_STRUCT__entry( 392 + __field(const void *, r_xprt) 393 + __field(unsigned long, delay) 386 394 __string(addr, rpcrdma_addrstr(r_xprt)) 387 395 __string(port, rpcrdma_portstr(r_xprt)) 388 396 ), 389 397 390 398 TP_fast_assign( 391 399 __entry->r_xprt = r_xprt; 392 - __entry->status = status; 393 - __entry->connected = r_xprt->rx_ep.rep_connected; 400 + __entry->delay = delay; 394 401 __assign_str(addr, rpcrdma_addrstr(r_xprt)); 395 402 __assign_str(port, rpcrdma_portstr(r_xprt)); 396 403 ), 397 404 398 - TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected", 399 - __get_str(addr), __get_str(port), 400 - __entry->r_xprt, __entry->status, 401 - __entry->connected == 1 ? "still " : "dis" 405 + TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu", 406 + __get_str(addr), __get_str(port), __entry->r_xprt, 407 + __entry->delay 402 408 ) 403 409 ); 404 410 405 - DEFINE_RXPRT_EVENT(xprtrdma_conn_start); 406 - DEFINE_RXPRT_EVENT(xprtrdma_conn_tout); 407 - DEFINE_RXPRT_EVENT(xprtrdma_create); 408 - DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); 409 - DEFINE_RXPRT_EVENT(xprtrdma_remove); 410 - DEFINE_RXPRT_EVENT(xprtrdma_reinsert); 411 - DEFINE_RXPRT_EVENT(xprtrdma_reconnect); 412 - DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); 413 - DEFINE_RXPRT_EVENT(xprtrdma_op_close); 414 - DEFINE_RXPRT_EVENT(xprtrdma_op_connect); 415 411 416 412 TRACE_EVENT(xprtrdma_op_set_cto, 417 413 TP_PROTO( ··· 604 532 DEFINE_WRCH_EVENT(reply); 605 533 606 534 TRACE_DEFINE_ENUM(rpcrdma_noch); 535 + TRACE_DEFINE_ENUM(rpcrdma_noch_pullup); 536 + TRACE_DEFINE_ENUM(rpcrdma_noch_mapped); 607 537 TRACE_DEFINE_ENUM(rpcrdma_readch); 608 538 TRACE_DEFINE_ENUM(rpcrdma_areadch); 609 539 TRACE_DEFINE_ENUM(rpcrdma_writech); ··· 614 540 #define xprtrdma_show_chunktype(x) \ 615 541 __print_symbolic(x, \ 616 542 { rpcrdma_noch, "inline" }, \ 543 + { rpcrdma_noch_pullup, "pullup" }, \ 544 + { rpcrdma_noch_mapped, "mapped" }, \ 617 545 { rpcrdma_readch, "read list" }, \ 618 546 { rpcrdma_areadch, "*read list" }, \ 619 547 { rpcrdma_writech, "write list" }, \ ··· 743 667 __entry->client_id = rqst->rq_task->tk_client ? 744 668 rqst->rq_task->tk_client->cl_clid : -1; 745 669 __entry->req = req; 746 - __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; 747 - __entry->signaled = req->rl_sendctx->sc_wr.send_flags & 748 - IB_SEND_SIGNALED; 670 + __entry->num_sge = req->rl_wr.num_sge; 671 + __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; 749 672 __entry->status = status; 750 673 ), 751 674 ··· 807 732 TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)", 808 733 __get_str(addr), __get_str(port), __entry->r_xprt, 809 734 __entry->count, __entry->posted, __entry->status 735 + ) 736 + ); 737 + 738 + TRACE_EVENT(xprtrdma_post_linv, 739 + TP_PROTO( 740 + const struct rpcrdma_req *req, 741 + int status 742 + ), 743 + 744 + TP_ARGS(req, status), 745 + 746 + TP_STRUCT__entry( 747 + __field(const void *, req) 748 + __field(int, status) 749 + __field(u32, xid) 750 + ), 751 + 752 + TP_fast_assign( 753 + __entry->req = req; 754 + __entry->status = status; 755 + __entry->xid = be32_to_cpu(req->rl_slot.rq_xid); 756 + ), 757 + 758 + TP_printk("req=%p xid=0x%08x status=%d", 759 + __entry->req, __entry->xid, __entry->status 810 760 ) 811 761 ); 812 762 ··· 1121 1021 TRACE_EVENT(xprtrdma_fixup, 1122 1022 TP_PROTO( 1123 1023 const struct rpc_rqst *rqst, 1124 - int len, 1125 - int hdrlen 1024 + unsigned long fixup 1126 1025 ), 1127 1026 1128 - TP_ARGS(rqst, len, hdrlen), 1027 + TP_ARGS(rqst, fixup), 1129 1028 1130 1029 TP_STRUCT__entry( 1131 1030 __field(unsigned int, task_id) 1132 1031 __field(unsigned int, client_id) 1133 - __field(const void *, base) 1134 - __field(int, len) 1135 - __field(int, hdrlen) 1032 + __field(unsigned long, fixup) 1033 + __field(size_t, headlen) 1034 + __field(unsigned int, pagelen) 1035 + __field(size_t, taillen) 1136 1036 ), 1137 1037 1138 1038 TP_fast_assign( 1139 1039 __entry->task_id = rqst->rq_task->tk_pid; 1140 1040 __entry->client_id = rqst->rq_task->tk_client->cl_clid; 1141 - __entry->base = rqst->rq_rcv_buf.head[0].iov_base; 1142 - __entry->len = len; 1143 - __entry->hdrlen = hdrlen; 1041 + __entry->fixup = fixup; 1042 + __entry->headlen = rqst->rq_rcv_buf.head[0].iov_len; 1043 + __entry->pagelen = rqst->rq_rcv_buf.page_len; 1044 + __entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len; 1144 1045 ), 1145 1046 1146 - TP_printk("task:%u@%u base=%p len=%d hdrlen=%d", 1147 - __entry->task_id, __entry->client_id, 1148 - __entry->base, __entry->len, __entry->hdrlen 1149 - ) 1150 - ); 1151 - 1152 - TRACE_EVENT(xprtrdma_fixup_pg, 1153 - TP_PROTO( 1154 - const struct rpc_rqst *rqst, 1155 - int pageno, 1156 - const void *pos, 1157 - int len, 1158 - int curlen 1159 - ), 1160 - 1161 - TP_ARGS(rqst, pageno, pos, len, curlen), 1162 - 1163 - TP_STRUCT__entry( 1164 - __field(unsigned int, task_id) 1165 - __field(unsigned int, client_id) 1166 - __field(const void *, pos) 1167 - __field(int, pageno) 1168 - __field(int, len) 1169 - __field(int, curlen) 1170 - ), 1171 - 1172 - TP_fast_assign( 1173 - __entry->task_id = rqst->rq_task->tk_pid; 1174 - __entry->client_id = rqst->rq_task->tk_client->cl_clid; 1175 - __entry->pos = pos; 1176 - __entry->pageno = pageno; 1177 - __entry->len = len; 1178 - __entry->curlen = curlen; 1179 - ), 1180 - 1181 - TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d", 1182 - __entry->task_id, __entry->client_id, 1183 - __entry->pageno, __entry->pos, __entry->len, __entry->curlen 1047 + TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu", 1048 + __entry->task_id, __entry->client_id, __entry->fixup, 1049 + __entry->headlen, __entry->pagelen, __entry->taillen 1184 1050 ) 1185 1051 ); 1186 1052
+93
include/trace/events/sunrpc.h
··· 777 777 __get_str(addr), __get_str(port), __entry->status) 778 778 ); 779 779 780 + DECLARE_EVENT_CLASS(xprt_writelock_event, 781 + TP_PROTO( 782 + const struct rpc_xprt *xprt, const struct rpc_task *task 783 + ), 784 + 785 + TP_ARGS(xprt, task), 786 + 787 + TP_STRUCT__entry( 788 + __field(unsigned int, task_id) 789 + __field(unsigned int, client_id) 790 + __field(unsigned int, snd_task_id) 791 + ), 792 + 793 + TP_fast_assign( 794 + if (task) { 795 + __entry->task_id = task->tk_pid; 796 + __entry->client_id = task->tk_client ? 797 + task->tk_client->cl_clid : -1; 798 + } else { 799 + __entry->task_id = -1; 800 + __entry->client_id = -1; 801 + } 802 + __entry->snd_task_id = xprt->snd_task ? 803 + xprt->snd_task->tk_pid : -1; 804 + ), 805 + 806 + TP_printk("task:%u@%u snd_task:%u", 807 + __entry->task_id, __entry->client_id, 808 + __entry->snd_task_id) 809 + ); 810 + 811 + #define DEFINE_WRITELOCK_EVENT(name) \ 812 + DEFINE_EVENT(xprt_writelock_event, xprt_##name, \ 813 + TP_PROTO( \ 814 + const struct rpc_xprt *xprt, \ 815 + const struct rpc_task *task \ 816 + ), \ 817 + TP_ARGS(xprt, task)) 818 + 819 + DEFINE_WRITELOCK_EVENT(reserve_xprt); 820 + DEFINE_WRITELOCK_EVENT(release_xprt); 821 + 822 + DECLARE_EVENT_CLASS(xprt_cong_event, 823 + TP_PROTO( 824 + const struct rpc_xprt *xprt, const struct rpc_task *task 825 + ), 826 + 827 + TP_ARGS(xprt, task), 828 + 829 + TP_STRUCT__entry( 830 + __field(unsigned int, task_id) 831 + __field(unsigned int, client_id) 832 + __field(unsigned int, snd_task_id) 833 + __field(unsigned long, cong) 834 + __field(unsigned long, cwnd) 835 + __field(bool, wait) 836 + ), 837 + 838 + TP_fast_assign( 839 + if (task) { 840 + __entry->task_id = task->tk_pid; 841 + __entry->client_id = task->tk_client ? 842 + task->tk_client->cl_clid : -1; 843 + } else { 844 + __entry->task_id = -1; 845 + __entry->client_id = -1; 846 + } 847 + __entry->snd_task_id = xprt->snd_task ? 848 + xprt->snd_task->tk_pid : -1; 849 + __entry->cong = xprt->cong; 850 + __entry->cwnd = xprt->cwnd; 851 + __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); 852 + ), 853 + 854 + TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s", 855 + __entry->task_id, __entry->client_id, 856 + __entry->snd_task_id, __entry->cong, __entry->cwnd, 857 + __entry->wait ? " (wait)" : "") 858 + ); 859 + 860 + #define DEFINE_CONG_EVENT(name) \ 861 + DEFINE_EVENT(xprt_cong_event, xprt_##name, \ 862 + TP_PROTO( \ 863 + const struct rpc_xprt *xprt, \ 864 + const struct rpc_task *task \ 865 + ), \ 866 + TP_ARGS(xprt, task)) 867 + 868 + DEFINE_CONG_EVENT(reserve_cong); 869 + DEFINE_CONG_EVENT(release_cong); 870 + DEFINE_CONG_EVENT(get_cong); 871 + DEFINE_CONG_EVENT(put_cong); 872 + 780 873 TRACE_EVENT(xs_stream_read_data, 781 874 TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total), 782 875
+2 -12
net/sunrpc/clnt.c
··· 1679 1679 return; 1680 1680 } 1681 1681 1682 - printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n", 1683 - __func__, status); 1684 1682 rpc_call_rpcerror(task, -EIO); 1685 1683 return; 1686 1684 } ··· 1687 1689 * Even though there was an error, we may have acquired 1688 1690 * a request slot somehow. Make sure not to leak it. 1689 1691 */ 1690 - if (task->tk_rqstp) { 1691 - printk(KERN_ERR "%s: status=%d, request allocated anyway\n", 1692 - __func__, status); 1692 + if (task->tk_rqstp) 1693 1693 xprt_release(task); 1694 - } 1695 1694 1696 1695 switch (status) { 1697 1696 case -ENOMEM: ··· 1697 1702 case -EAGAIN: /* woken up; retry */ 1698 1703 task->tk_action = call_retry_reserve; 1699 1704 return; 1700 - case -EIO: /* probably a shutdown */ 1701 - break; 1702 1705 default: 1703 - printk(KERN_ERR "%s: unrecognized error %d, exiting\n", 1704 - __func__, status); 1705 - break; 1706 + rpc_call_rpcerror(task, status); 1706 1707 } 1707 - rpc_call_rpcerror(task, status); 1708 1708 } 1709 1709 1710 1710 /*
+13 -9
net/sunrpc/xprt.c
··· 205 205 206 206 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 207 207 if (task == xprt->snd_task) 208 - return 1; 208 + goto out_locked; 209 209 goto out_sleep; 210 210 } 211 211 if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) 212 212 goto out_unlock; 213 213 xprt->snd_task = task; 214 214 215 + out_locked: 216 + trace_xprt_reserve_xprt(xprt, task); 215 217 return 1; 216 218 217 219 out_unlock: 218 220 xprt_clear_locked(xprt); 219 221 out_sleep: 220 - dprintk("RPC: %5u failed to lock transport %p\n", 221 - task->tk_pid, xprt); 222 222 task->tk_status = -EAGAIN; 223 223 if (RPC_IS_SOFT(task)) 224 224 rpc_sleep_on_timeout(&xprt->sending, task, NULL, ··· 269 269 270 270 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 271 271 if (task == xprt->snd_task) 272 - return 1; 272 + goto out_locked; 273 273 goto out_sleep; 274 274 } 275 275 if (req == NULL) { 276 276 xprt->snd_task = task; 277 - return 1; 277 + goto out_locked; 278 278 } 279 279 if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) 280 280 goto out_unlock; 281 281 if (!xprt_need_congestion_window_wait(xprt)) { 282 282 xprt->snd_task = task; 283 - return 1; 283 + goto out_locked; 284 284 } 285 285 out_unlock: 286 286 xprt_clear_locked(xprt); 287 287 out_sleep: 288 - dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); 289 288 task->tk_status = -EAGAIN; 290 289 if (RPC_IS_SOFT(task)) 291 290 rpc_sleep_on_timeout(&xprt->sending, task, NULL, ··· 292 293 else 293 294 rpc_sleep_on(&xprt->sending, task, NULL); 294 295 return 0; 296 + out_locked: 297 + trace_xprt_reserve_cong(xprt, task); 298 + return 1; 295 299 } 296 300 EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); 297 301 ··· 359 357 xprt_clear_locked(xprt); 360 358 __xprt_lock_write_next(xprt); 361 359 } 360 + trace_xprt_release_xprt(xprt, task); 362 361 } 363 362 EXPORT_SYMBOL_GPL(xprt_release_xprt); 364 363 ··· 377 374 xprt_clear_locked(xprt); 378 375 __xprt_lock_write_next_cong(xprt); 379 376 } 377 + trace_xprt_release_cong(xprt, task); 380 378 } 381 379 EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); 382 380 ··· 399 395 { 400 396 if (req->rq_cong) 401 397 return 1; 402 - dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n", 403 - req->rq_task->tk_pid, xprt->cong, xprt->cwnd); 398 + trace_xprt_get_cong(xprt, req->rq_task); 404 399 if (RPCXPRT_CONGESTED(xprt)) { 405 400 xprt_set_congestion_window_wait(xprt); 406 401 return 0; ··· 421 418 req->rq_cong = 0; 422 419 xprt->cong -= RPC_CWNDSCALE; 423 420 xprt_test_and_clear_congestion_window_wait(xprt); 421 + trace_xprt_put_cong(xprt, req->rq_task); 424 422 __xprt_lock_write_next_cong(xprt); 425 423 } 426 424
+1 -1
net/sunrpc/xprtrdma/backchannel.c
··· 79 79 *p = xdr_zero; 80 80 81 81 if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, 82 - &rqst->rq_snd_buf, rpcrdma_noch)) 82 + &rqst->rq_snd_buf, rpcrdma_noch_pullup)) 83 83 return -EIO; 84 84 85 85 trace_xprtrdma_cb_reply(rqst);
+11 -42
net/sunrpc/xprtrdma/frwr_ops.c
··· 36 36 * connect worker from running concurrently. 37 37 * 38 38 * When the underlying transport disconnects, MRs that are in flight 39 - * are flushed and are likely unusable. Thus all flushed MRs are 40 - * destroyed. New MRs are created on demand. 39 + * are flushed and are likely unusable. Thus all MRs are destroyed. 40 + * New MRs are created on demand. 41 41 */ 42 42 43 43 #include <linux/sunrpc/rpc_rdma.h> ··· 88 88 kfree(mr); 89 89 } 90 90 91 - static void frwr_mr_recycle(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) 91 + static void frwr_mr_recycle(struct rpcrdma_mr *mr) 92 92 { 93 + struct rpcrdma_xprt *r_xprt = mr->mr_xprt; 94 + 93 95 trace_xprtrdma_mr_recycle(mr); 94 96 95 97 if (mr->mr_dir != DMA_NONE) { ··· 107 105 spin_unlock(&r_xprt->rx_buf.rb_lock); 108 106 109 107 frwr_release_mr(mr); 110 - } 111 - 112 - /* MRs are dynamically allocated, so simply clean up and release the MR. 113 - * A replacement MR will subsequently be allocated on demand. 114 - */ 115 - static void 116 - frwr_mr_recycle_worker(struct work_struct *work) 117 - { 118 - struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, 119 - mr_recycle); 120 - 121 - frwr_mr_recycle(mr->mr_xprt, mr); 122 - } 123 - 124 - /* frwr_recycle - Discard MRs 125 - * @req: request to reset 126 - * 127 - * Used after a reconnect. These MRs could be in flight, we can't 128 - * tell. Safe thing to do is release them. 129 - */ 130 - void frwr_recycle(struct rpcrdma_req *req) 131 - { 132 - struct rpcrdma_mr *mr; 133 - 134 - while ((mr = rpcrdma_mr_pop(&req->rl_registered))) 135 - frwr_mr_recycle(mr->mr_xprt, mr); 136 108 } 137 109 138 110 /* frwr_reset - Place MRs back on the free list ··· 142 166 struct ib_mr *frmr; 143 167 int rc; 144 168 145 - /* NB: ib_alloc_mr and device drivers typically allocate 146 - * memory with GFP_KERNEL. 147 - */ 148 169 frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); 149 170 if (IS_ERR(frmr)) 150 171 goto out_mr_err; ··· 153 180 mr->frwr.fr_mr = frmr; 154 181 mr->mr_dir = DMA_NONE; 155 182 INIT_LIST_HEAD(&mr->mr_list); 156 - INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); 157 183 init_completion(&mr->frwr.fr_linv_done); 158 184 159 185 sg_init_table(sg, depth); ··· 396 424 struct ib_send_wr *post_wr; 397 425 struct rpcrdma_mr *mr; 398 426 399 - post_wr = &req->rl_sendctx->sc_wr; 427 + post_wr = &req->rl_wr; 400 428 list_for_each_entry(mr, &req->rl_registered, mr_list) { 401 429 struct rpcrdma_frwr *frwr; 402 430 ··· 412 440 post_wr = &frwr->fr_regwr.wr; 413 441 } 414 442 415 - /* If ib_post_send fails, the next ->send_request for 416 - * @req will queue these MRs for recovery. 417 - */ 418 443 return ib_post_send(ia->ri_id->qp, post_wr, NULL); 419 444 } 420 445 ··· 437 468 static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr) 438 469 { 439 470 if (wc->status != IB_WC_SUCCESS) 440 - rpcrdma_mr_recycle(mr); 471 + frwr_mr_recycle(mr); 441 472 else 442 473 rpcrdma_mr_put(mr); 443 474 } ··· 539 570 */ 540 571 bad_wr = NULL; 541 572 rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); 542 - trace_xprtrdma_post_send(req, rc); 543 573 544 574 /* The final LOCAL_INV WR in the chain is supposed to 545 575 * do the wake. If it was never posted, the wake will ··· 551 583 552 584 /* Recycle MRs in the LOCAL_INV chain that did not get posted. 553 585 */ 586 + trace_xprtrdma_post_linv(req, rc); 554 587 while (bad_wr) { 555 588 frwr = container_of(bad_wr, struct rpcrdma_frwr, 556 589 fr_invwr); ··· 559 590 bad_wr = bad_wr->next; 560 591 561 592 list_del_init(&mr->mr_list); 562 - rpcrdma_mr_recycle(mr); 593 + frwr_mr_recycle(mr); 563 594 } 564 595 } 565 596 ··· 642 673 */ 643 674 bad_wr = NULL; 644 675 rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); 645 - trace_xprtrdma_post_send(req, rc); 646 676 if (!rc) 647 677 return; 648 678 649 679 /* Recycle MRs in the LOCAL_INV chain that did not get posted. 650 680 */ 681 + trace_xprtrdma_post_linv(req, rc); 651 682 while (bad_wr) { 652 683 frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr); 653 684 mr = container_of(frwr, struct rpcrdma_mr, frwr); 654 685 bad_wr = bad_wr->next; 655 686 656 - rpcrdma_mr_recycle(mr); 687 + frwr_mr_recycle(mr); 657 688 } 658 689 659 690 /* The final LOCAL_INV WR in the chain is supposed to
+277 -154
net/sunrpc/xprtrdma/rpc_rdma.c
··· 78 78 size += rpcrdma_segment_maxsz * sizeof(__be32); 79 79 size += sizeof(__be32); /* list discriminator */ 80 80 81 - dprintk("RPC: %s: max call header size = %u\n", 82 - __func__, size); 83 81 return size; 84 82 } 85 83 ··· 98 100 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32); 99 101 size += sizeof(__be32); /* list discriminator */ 100 102 101 - dprintk("RPC: %s: max reply header size = %u\n", 102 - __func__, size); 103 103 return size; 104 104 } 105 105 ··· 359 363 out_getmr_err: 360 364 trace_xprtrdma_nomrs(req); 361 365 xprt_wait_for_buffer_space(&r_xprt->rx_xprt); 362 - if (r_xprt->rx_ep.rep_connected != -ENODEV) 363 - schedule_work(&r_xprt->rx_buf.rb_refresh_worker); 366 + rpcrdma_mrs_refresh(r_xprt); 364 367 return ERR_PTR(-EAGAIN); 365 368 } 366 369 ··· 388 393 unsigned int pos; 389 394 int nsegs; 390 395 391 - if (rtype == rpcrdma_noch) 396 + if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped) 392 397 goto done; 393 398 394 399 pos = rqst->rq_snd_buf.head[0].iov_len; ··· 560 565 */ 561 566 void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc) 562 567 { 568 + struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf; 563 569 struct ib_sge *sge; 564 570 565 571 if (!sc->sc_unmap_count) ··· 572 576 */ 573 577 for (sge = &sc->sc_sges[2]; sc->sc_unmap_count; 574 578 ++sge, --sc->sc_unmap_count) 575 - ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length, 579 + ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length, 576 580 DMA_TO_DEVICE); 577 581 578 582 kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done); ··· 585 589 { 586 590 struct rpcrdma_sendctx *sc = req->rl_sendctx; 587 591 struct rpcrdma_regbuf *rb = req->rl_rdmabuf; 588 - struct ib_sge *sge = sc->sc_sges; 592 + struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; 589 593 590 594 if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) 591 - goto out_regbuf; 595 + return false; 592 596 sge->addr = rdmab_addr(rb); 593 597 sge->length = len; 594 598 sge->lkey = rdmab_lkey(rb); 595 599 596 600 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, 597 601 DMA_TO_DEVICE); 598 - sc->sc_wr.num_sge++; 602 + return true; 603 + } 604 + 605 + /* The head iovec is straightforward, as it is usually already 606 + * DMA-mapped. Sync the content that has changed. 607 + */ 608 + static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt, 609 + struct rpcrdma_req *req, unsigned int len) 610 + { 611 + struct rpcrdma_sendctx *sc = req->rl_sendctx; 612 + struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; 613 + struct rpcrdma_regbuf *rb = req->rl_sendbuf; 614 + 615 + if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) 616 + return false; 617 + 618 + sge->addr = rdmab_addr(rb); 619 + sge->length = len; 620 + sge->lkey = rdmab_lkey(rb); 621 + 622 + ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, 623 + DMA_TO_DEVICE); 624 + return true; 625 + } 626 + 627 + /* If there is a page list present, DMA map and prepare an 628 + * SGE for each page to be sent. 629 + */ 630 + static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req, 631 + struct xdr_buf *xdr) 632 + { 633 + struct rpcrdma_sendctx *sc = req->rl_sendctx; 634 + struct rpcrdma_regbuf *rb = req->rl_sendbuf; 635 + unsigned int page_base, len, remaining; 636 + struct page **ppages; 637 + struct ib_sge *sge; 638 + 639 + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 640 + page_base = offset_in_page(xdr->page_base); 641 + remaining = xdr->page_len; 642 + while (remaining) { 643 + sge = &sc->sc_sges[req->rl_wr.num_sge++]; 644 + len = min_t(unsigned int, PAGE_SIZE - page_base, remaining); 645 + sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages, 646 + page_base, len, DMA_TO_DEVICE); 647 + if (ib_dma_mapping_error(rdmab_device(rb), sge->addr)) 648 + goto out_mapping_err; 649 + 650 + sge->length = len; 651 + sge->lkey = rdmab_lkey(rb); 652 + 653 + sc->sc_unmap_count++; 654 + ppages++; 655 + remaining -= len; 656 + page_base = 0; 657 + } 658 + 599 659 return true; 600 660 601 - out_regbuf: 602 - pr_err("rpcrdma: failed to DMA map a Send buffer\n"); 661 + out_mapping_err: 662 + trace_xprtrdma_dma_maperr(sge->addr); 603 663 return false; 604 664 } 605 665 606 - /* Prepare the Send SGEs. The head and tail iovec, and each entry 607 - * in the page list, gets its own SGE. 666 + /* The tail iovec may include an XDR pad for the page list, 667 + * as well as additional content, and may not reside in the 668 + * same page as the head iovec. 608 669 */ 609 - static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt, 610 - struct rpcrdma_req *req, 670 + static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req, 611 671 struct xdr_buf *xdr, 612 - enum rpcrdma_chunktype rtype) 672 + unsigned int page_base, unsigned int len) 613 673 { 614 674 struct rpcrdma_sendctx *sc = req->rl_sendctx; 615 - unsigned int sge_no, page_base, len, remaining; 675 + struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; 616 676 struct rpcrdma_regbuf *rb = req->rl_sendbuf; 617 - struct ib_sge *sge = sc->sc_sges; 618 - struct page *page, **ppages; 677 + struct page *page = virt_to_page(xdr->tail[0].iov_base); 619 678 620 - /* The head iovec is straightforward, as it is already 621 - * DMA-mapped. Sync the content that has changed. 622 - */ 623 - if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) 624 - goto out_regbuf; 625 - sc->sc_device = rdmab_device(rb); 626 - sge_no = 1; 627 - sge[sge_no].addr = rdmab_addr(rb); 628 - sge[sge_no].length = xdr->head[0].iov_len; 629 - sge[sge_no].lkey = rdmab_lkey(rb); 630 - ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr, 631 - sge[sge_no].length, DMA_TO_DEVICE); 679 + sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len, 680 + DMA_TO_DEVICE); 681 + if (ib_dma_mapping_error(rdmab_device(rb), sge->addr)) 682 + goto out_mapping_err; 632 683 633 - /* If there is a Read chunk, the page list is being handled 634 - * via explicit RDMA, and thus is skipped here. However, the 635 - * tail iovec may include an XDR pad for the page list, as 636 - * well as additional content, and may not reside in the 637 - * same page as the head iovec. 638 - */ 639 - if (rtype == rpcrdma_readch) { 640 - len = xdr->tail[0].iov_len; 641 - 642 - /* Do not include the tail if it is only an XDR pad */ 643 - if (len < 4) 644 - goto out; 645 - 646 - page = virt_to_page(xdr->tail[0].iov_base); 647 - page_base = offset_in_page(xdr->tail[0].iov_base); 648 - 649 - /* If the content in the page list is an odd length, 650 - * xdr_write_pages() has added a pad at the beginning 651 - * of the tail iovec. Force the tail's non-pad content 652 - * to land at the next XDR position in the Send message. 653 - */ 654 - page_base += len & 3; 655 - len -= len & 3; 656 - goto map_tail; 657 - } 658 - 659 - /* If there is a page list present, temporarily DMA map 660 - * and prepare an SGE for each page to be sent. 661 - */ 662 - if (xdr->page_len) { 663 - ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 664 - page_base = offset_in_page(xdr->page_base); 665 - remaining = xdr->page_len; 666 - while (remaining) { 667 - sge_no++; 668 - if (sge_no > RPCRDMA_MAX_SEND_SGES - 2) 669 - goto out_mapping_overflow; 670 - 671 - len = min_t(u32, PAGE_SIZE - page_base, remaining); 672 - sge[sge_no].addr = 673 - ib_dma_map_page(rdmab_device(rb), *ppages, 674 - page_base, len, DMA_TO_DEVICE); 675 - if (ib_dma_mapping_error(rdmab_device(rb), 676 - sge[sge_no].addr)) 677 - goto out_mapping_err; 678 - sge[sge_no].length = len; 679 - sge[sge_no].lkey = rdmab_lkey(rb); 680 - 681 - sc->sc_unmap_count++; 682 - ppages++; 683 - remaining -= len; 684 - page_base = 0; 685 - } 686 - } 687 - 688 - /* The tail iovec is not always constructed in the same 689 - * page where the head iovec resides (see, for example, 690 - * gss_wrap_req_priv). To neatly accommodate that case, 691 - * DMA map it separately. 692 - */ 693 - if (xdr->tail[0].iov_len) { 694 - page = virt_to_page(xdr->tail[0].iov_base); 695 - page_base = offset_in_page(xdr->tail[0].iov_base); 696 - len = xdr->tail[0].iov_len; 697 - 698 - map_tail: 699 - sge_no++; 700 - sge[sge_no].addr = 701 - ib_dma_map_page(rdmab_device(rb), page, page_base, len, 702 - DMA_TO_DEVICE); 703 - if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr)) 704 - goto out_mapping_err; 705 - sge[sge_no].length = len; 706 - sge[sge_no].lkey = rdmab_lkey(rb); 707 - sc->sc_unmap_count++; 708 - } 709 - 710 - out: 711 - sc->sc_wr.num_sge += sge_no; 712 - if (sc->sc_unmap_count) 713 - kref_get(&req->rl_kref); 684 + sge->length = len; 685 + sge->lkey = rdmab_lkey(rb); 686 + ++sc->sc_unmap_count; 714 687 return true; 715 688 716 - out_regbuf: 717 - pr_err("rpcrdma: failed to DMA map a Send buffer\n"); 718 - return false; 719 - 720 - out_mapping_overflow: 721 - rpcrdma_sendctx_unmap(sc); 722 - pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); 723 - return false; 724 - 725 689 out_mapping_err: 726 - rpcrdma_sendctx_unmap(sc); 727 - trace_xprtrdma_dma_maperr(sge[sge_no].addr); 690 + trace_xprtrdma_dma_maperr(sge->addr); 728 691 return false; 692 + } 693 + 694 + /* Copy the tail to the end of the head buffer. 695 + */ 696 + static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt, 697 + struct rpcrdma_req *req, 698 + struct xdr_buf *xdr) 699 + { 700 + unsigned char *dst; 701 + 702 + dst = (unsigned char *)xdr->head[0].iov_base; 703 + dst += xdr->head[0].iov_len + xdr->page_len; 704 + memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len); 705 + r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len; 706 + } 707 + 708 + /* Copy pagelist content into the head buffer. 709 + */ 710 + static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt, 711 + struct rpcrdma_req *req, 712 + struct xdr_buf *xdr) 713 + { 714 + unsigned int len, page_base, remaining; 715 + struct page **ppages; 716 + unsigned char *src, *dst; 717 + 718 + dst = (unsigned char *)xdr->head[0].iov_base; 719 + dst += xdr->head[0].iov_len; 720 + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 721 + page_base = offset_in_page(xdr->page_base); 722 + remaining = xdr->page_len; 723 + while (remaining) { 724 + src = page_address(*ppages); 725 + src += page_base; 726 + len = min_t(unsigned int, PAGE_SIZE - page_base, remaining); 727 + memcpy(dst, src, len); 728 + r_xprt->rx_stats.pullup_copy_count += len; 729 + 730 + ppages++; 731 + dst += len; 732 + remaining -= len; 733 + page_base = 0; 734 + } 735 + } 736 + 737 + /* Copy the contents of @xdr into @rl_sendbuf and DMA sync it. 738 + * When the head, pagelist, and tail are small, a pull-up copy 739 + * is considerably less costly than DMA mapping the components 740 + * of @xdr. 741 + * 742 + * Assumptions: 743 + * - the caller has already verified that the total length 744 + * of the RPC Call body will fit into @rl_sendbuf. 745 + */ 746 + static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt, 747 + struct rpcrdma_req *req, 748 + struct xdr_buf *xdr) 749 + { 750 + if (unlikely(xdr->tail[0].iov_len)) 751 + rpcrdma_pullup_tail_iov(r_xprt, req, xdr); 752 + 753 + if (unlikely(xdr->page_len)) 754 + rpcrdma_pullup_pagelist(r_xprt, req, xdr); 755 + 756 + /* The whole RPC message resides in the head iovec now */ 757 + return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len); 758 + } 759 + 760 + static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt, 761 + struct rpcrdma_req *req, 762 + struct xdr_buf *xdr) 763 + { 764 + struct kvec *tail = &xdr->tail[0]; 765 + 766 + if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len)) 767 + return false; 768 + if (xdr->page_len) 769 + if (!rpcrdma_prepare_pagelist(req, xdr)) 770 + return false; 771 + if (tail->iov_len) 772 + if (!rpcrdma_prepare_tail_iov(req, xdr, 773 + offset_in_page(tail->iov_base), 774 + tail->iov_len)) 775 + return false; 776 + 777 + if (req->rl_sendctx->sc_unmap_count) 778 + kref_get(&req->rl_kref); 779 + return true; 780 + } 781 + 782 + static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt, 783 + struct rpcrdma_req *req, 784 + struct xdr_buf *xdr) 785 + { 786 + if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len)) 787 + return false; 788 + 789 + /* If there is a Read chunk, the page list is being handled 790 + * via explicit RDMA, and thus is skipped here. 791 + */ 792 + 793 + /* Do not include the tail if it is only an XDR pad */ 794 + if (xdr->tail[0].iov_len > 3) { 795 + unsigned int page_base, len; 796 + 797 + /* If the content in the page list is an odd length, 798 + * xdr_write_pages() adds a pad at the beginning of 799 + * the tail iovec. Force the tail's non-pad content to 800 + * land at the next XDR position in the Send message. 801 + */ 802 + page_base = offset_in_page(xdr->tail[0].iov_base); 803 + len = xdr->tail[0].iov_len; 804 + page_base += len & 3; 805 + len -= len & 3; 806 + if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len)) 807 + return false; 808 + kref_get(&req->rl_kref); 809 + } 810 + 811 + return true; 729 812 } 730 813 731 814 /** ··· 817 742 * 818 743 * Returns 0 on success; otherwise a negative errno is returned. 819 744 */ 820 - int 821 - rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, 822 - struct rpcrdma_req *req, u32 hdrlen, 823 - struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) 745 + inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, 746 + struct rpcrdma_req *req, u32 hdrlen, 747 + struct xdr_buf *xdr, 748 + enum rpcrdma_chunktype rtype) 824 749 { 825 750 int ret; 826 751 827 752 ret = -EAGAIN; 828 753 req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt); 829 754 if (!req->rl_sendctx) 830 - goto err; 831 - req->rl_sendctx->sc_wr.num_sge = 0; 755 + goto out_nosc; 832 756 req->rl_sendctx->sc_unmap_count = 0; 833 757 req->rl_sendctx->sc_req = req; 834 758 kref_init(&req->rl_kref); 759 + req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe; 760 + req->rl_wr.sg_list = req->rl_sendctx->sc_sges; 761 + req->rl_wr.num_sge = 0; 762 + req->rl_wr.opcode = IB_WR_SEND; 835 763 836 764 ret = -EIO; 837 765 if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) 838 - goto err; 839 - if (rtype != rpcrdma_areadch) 840 - if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype)) 841 - goto err; 766 + goto out_unmap; 767 + 768 + switch (rtype) { 769 + case rpcrdma_noch_pullup: 770 + if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr)) 771 + goto out_unmap; 772 + break; 773 + case rpcrdma_noch_mapped: 774 + if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr)) 775 + goto out_unmap; 776 + break; 777 + case rpcrdma_readch: 778 + if (!rpcrdma_prepare_readch(r_xprt, req, xdr)) 779 + goto out_unmap; 780 + break; 781 + case rpcrdma_areadch: 782 + break; 783 + default: 784 + goto out_unmap; 785 + } 786 + 842 787 return 0; 843 788 844 - err: 789 + out_unmap: 790 + rpcrdma_sendctx_unmap(req->rl_sendctx); 791 + out_nosc: 845 792 trace_xprtrdma_prepsend_failed(&req->rl_slot, ret); 846 793 return ret; 847 794 } ··· 893 796 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 894 797 struct xdr_stream *xdr = &req->rl_stream; 895 798 enum rpcrdma_chunktype rtype, wtype; 799 + struct xdr_buf *buf = &rqst->rq_snd_buf; 896 800 bool ddp_allowed; 897 801 __be32 *p; 898 802 int ret; ··· 951 853 */ 952 854 if (rpcrdma_args_inline(r_xprt, rqst)) { 953 855 *p++ = rdma_msg; 954 - rtype = rpcrdma_noch; 955 - } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { 856 + rtype = buf->len < rdmab_length(req->rl_sendbuf) ? 857 + rpcrdma_noch_pullup : rpcrdma_noch_mapped; 858 + } else if (ddp_allowed && buf->flags & XDRBUF_WRITE) { 956 859 *p++ = rdma_msg; 957 860 rtype = rpcrdma_readch; 958 861 } else { ··· 961 862 *p++ = rdma_nomsg; 962 863 rtype = rpcrdma_areadch; 963 864 } 964 - 965 - /* If this is a retransmit, discard previously registered 966 - * chunks. Very likely the connection has been replaced, 967 - * so these registrations are invalid and unusable. 968 - */ 969 - frwr_recycle(req); 970 865 971 866 /* This implementation supports the following combinations 972 867 * of chunk lists in one RPC-over-RDMA Call message: ··· 995 902 goto out_err; 996 903 997 904 ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len, 998 - &rqst->rq_snd_buf, rtype); 905 + buf, rtype); 999 906 if (ret) 1000 907 goto out_err; 1001 908 ··· 1007 914 r_xprt->rx_stats.failed_marshal_count++; 1008 915 frwr_reset(req); 1009 916 return ret; 917 + } 918 + 919 + static void __rpcrdma_update_cwnd_locked(struct rpc_xprt *xprt, 920 + struct rpcrdma_buffer *buf, 921 + u32 grant) 922 + { 923 + buf->rb_credits = grant; 924 + xprt->cwnd = grant << RPC_CWNDSHIFT; 925 + } 926 + 927 + static void rpcrdma_update_cwnd(struct rpcrdma_xprt *r_xprt, u32 grant) 928 + { 929 + struct rpc_xprt *xprt = &r_xprt->rx_xprt; 930 + 931 + spin_lock(&xprt->transport_lock); 932 + __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, grant); 933 + spin_unlock(&xprt->transport_lock); 934 + } 935 + 936 + /** 937 + * rpcrdma_reset_cwnd - Reset the xprt's congestion window 938 + * @r_xprt: controlling transport instance 939 + * 940 + * Prepare @r_xprt for the next connection by reinitializing 941 + * its credit grant to one (see RFC 8166, Section 3.3.3). 942 + */ 943 + void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt) 944 + { 945 + struct rpc_xprt *xprt = &r_xprt->rx_xprt; 946 + 947 + spin_lock(&xprt->transport_lock); 948 + xprt->cong = 0; 949 + __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, 1); 950 + spin_unlock(&xprt->transport_lock); 1010 951 } 1011 952 1012 953 /** ··· 1082 955 curlen = rqst->rq_rcv_buf.head[0].iov_len; 1083 956 if (curlen > copy_len) 1084 957 curlen = copy_len; 1085 - trace_xprtrdma_fixup(rqst, copy_len, curlen); 1086 958 srcp += curlen; 1087 959 copy_len -= curlen; 1088 960 ··· 1101 975 if (curlen > pagelist_len) 1102 976 curlen = pagelist_len; 1103 977 1104 - trace_xprtrdma_fixup_pg(rqst, i, srcp, 1105 - copy_len, curlen); 1106 978 destp = kmap_atomic(ppages[i]); 1107 979 memcpy(destp + page_base, srcp, curlen); 1108 980 flush_dcache_page(ppages[i]); ··· 1132 1008 rqst->rq_private_buf.tail[0].iov_base = srcp; 1133 1009 } 1134 1010 1011 + if (fixup_copy_count) 1012 + trace_xprtrdma_fixup(rqst, fixup_copy_count); 1135 1013 return fixup_copy_count; 1136 1014 } 1137 1015 ··· 1482 1356 credits = 1; /* don't deadlock */ 1483 1357 else if (credits > buf->rb_max_requests) 1484 1358 credits = buf->rb_max_requests; 1485 - if (buf->rb_credits != credits) { 1486 - spin_lock(&xprt->transport_lock); 1487 - buf->rb_credits = credits; 1488 - xprt->cwnd = credits << RPC_CWNDSHIFT; 1489 - spin_unlock(&xprt->transport_lock); 1490 - } 1359 + if (buf->rb_credits != credits) 1360 + rpcrdma_update_cwnd(r_xprt, credits); 1361 + rpcrdma_post_recvs(r_xprt, false); 1491 1362 1492 1363 req = rpcr_to_rdmar(rqst); 1493 1364 if (req->rl_reply) {
+10 -23
net/sunrpc/xprtrdma/transport.c
··· 243 243 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); 244 244 xprt_clear_connecting(xprt); 245 245 if (r_xprt->rx_ep.rep_connected > 0) { 246 - if (!xprt_test_and_set_connected(xprt)) { 247 - xprt->stat.connect_count++; 248 - xprt->stat.connect_time += (long)jiffies - 249 - xprt->stat.connect_start; 250 - xprt_wake_pending_tasks(xprt, -EAGAIN); 251 - } 252 - } else { 253 - if (xprt_test_and_clear_connected(xprt)) 254 - xprt_wake_pending_tasks(xprt, rc); 246 + xprt->stat.connect_count++; 247 + xprt->stat.connect_time += (long)jiffies - 248 + xprt->stat.connect_start; 249 + xprt_set_connected(xprt); 250 + rc = -EAGAIN; 255 251 } 252 + xprt_wake_pending_tasks(xprt, rc); 256 253 } 257 254 258 255 /** ··· 422 425 return; 423 426 rpcrdma_ep_disconnect(ep, ia); 424 427 425 - /* Prepare @xprt for the next connection by reinitializing 426 - * its credit grant to one (see RFC 8166, Section 3.3.3). 427 - */ 428 - r_xprt->rx_buf.rb_credits = 1; 429 - xprt->cwnd = RPC_CWNDSHIFT; 430 - 431 428 out: 432 429 xprt->reestablish_timeout = 0; 433 430 ++xprt->connect_cookie; ··· 441 450 struct sockaddr *sap = (struct sockaddr *)&xprt->addr; 442 451 char buf[8]; 443 452 444 - dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", 445 - __func__, xprt, 446 - xprt->address_strings[RPC_DISPLAY_ADDR], 447 - xprt->address_strings[RPC_DISPLAY_PORT], 448 - port); 449 - 450 453 rpc_set_port(sap, port); 451 454 452 455 kfree(xprt->address_strings[RPC_DISPLAY_PORT]); ··· 450 465 kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); 451 466 snprintf(buf, sizeof(buf), "%4hx", port); 452 467 xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); 468 + 469 + trace_xprtrdma_op_setport(container_of(xprt, struct rpcrdma_xprt, 470 + rx_xprt)); 453 471 } 454 472 455 473 /** ··· 524 536 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 525 537 unsigned long delay; 526 538 527 - trace_xprtrdma_op_connect(r_xprt); 528 - 529 539 delay = 0; 530 540 if (r_xprt->rx_ep.rep_connected != 0) { 531 541 delay = xprt_reconnect_delay(xprt); 532 542 xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); 533 543 } 544 + trace_xprtrdma_op_connect(r_xprt, delay); 534 545 queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, 535 546 delay); 536 547 }
+110 -84
net/sunrpc/xprtrdma/verbs.c
··· 74 74 /* 75 75 * internal functions 76 76 */ 77 - static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); 77 + static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 78 + struct rpcrdma_sendctx *sc); 79 + static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); 78 80 static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf); 79 81 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 80 - static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); 81 - static void rpcrdma_mr_free(struct rpcrdma_mr *mr); 82 + static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); 82 83 static struct rpcrdma_regbuf * 83 84 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, 84 85 gfp_t flags); 85 86 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); 86 87 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); 87 - static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); 88 88 89 89 /* Wait for outstanding transport work to finish. ib_drain_qp 90 90 * handles the drains in the wrong order for us, so open code ··· 125 125 126 126 /** 127 127 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC 128 - * @cq: completion queue (ignored) 128 + * @cq: completion queue 129 129 * @wc: completed WR 130 130 * 131 131 */ ··· 138 138 139 139 /* WARNING: Only wr_cqe and status are reliable at this point */ 140 140 trace_xprtrdma_wc_send(sc, wc); 141 - rpcrdma_sendctx_put_locked(sc); 141 + rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc); 142 142 } 143 143 144 144 /** ··· 170 170 rdmab_addr(rep->rr_rdmabuf), 171 171 wc->byte_len, DMA_FROM_DEVICE); 172 172 173 - rpcrdma_post_recvs(r_xprt, false); 174 173 rpcrdma_reply_handler(rep); 175 174 return; 176 175 ··· 177 178 rpcrdma_recv_buffer_put(rep); 178 179 } 179 180 180 - static void 181 - rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, 182 - struct rdma_conn_param *param) 181 + static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt, 182 + struct rdma_conn_param *param) 183 183 { 184 184 const struct rpcrdma_connect_private *pmsg = param->private_data; 185 + struct rpcrdma_ep *ep = &r_xprt->rx_ep; 185 186 unsigned int rsize, wsize; 186 187 187 188 /* Default settings for RPC-over-RDMA Version One */ ··· 197 198 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 198 199 } 199 200 200 - if (rsize < r_xprt->rx_ep.rep_inline_recv) 201 - r_xprt->rx_ep.rep_inline_recv = rsize; 202 - if (wsize < r_xprt->rx_ep.rep_inline_send) 203 - r_xprt->rx_ep.rep_inline_send = wsize; 204 - dprintk("RPC: %s: max send %u, max recv %u\n", __func__, 205 - r_xprt->rx_ep.rep_inline_send, 206 - r_xprt->rx_ep.rep_inline_recv); 201 + if (rsize < ep->rep_inline_recv) 202 + ep->rep_inline_recv = rsize; 203 + if (wsize < ep->rep_inline_send) 204 + ep->rep_inline_send = wsize; 205 + 207 206 rpcrdma_set_max_header_sizes(r_xprt); 208 207 } 209 208 ··· 255 258 case RDMA_CM_EVENT_ESTABLISHED: 256 259 ++xprt->connect_cookie; 257 260 ep->rep_connected = 1; 258 - rpcrdma_update_connect_private(r_xprt, &event->param.conn); 261 + rpcrdma_update_cm_private(r_xprt, &event->param.conn); 262 + trace_xprtrdma_inline_thresh(r_xprt); 259 263 wake_up_all(&ep->rep_connect_wait); 260 264 break; 261 265 case RDMA_CM_EVENT_CONNECT_ERROR: ··· 296 298 struct rdma_cm_id *id; 297 299 int rc; 298 300 299 - trace_xprtrdma_conn_start(xprt); 300 - 301 301 init_completion(&ia->ri_done); 302 302 init_completion(&ia->ri_remove_done); 303 303 ··· 311 315 if (rc) 312 316 goto out; 313 317 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 314 - if (rc < 0) { 315 - trace_xprtrdma_conn_tout(xprt); 318 + if (rc < 0) 316 319 goto out; 317 - } 318 320 319 321 rc = ia->ri_async_rc; 320 322 if (rc) ··· 323 329 if (rc) 324 330 goto out; 325 331 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 326 - if (rc < 0) { 327 - trace_xprtrdma_conn_tout(xprt); 332 + if (rc < 0) 328 333 goto out; 329 - } 330 334 rc = ia->ri_async_rc; 331 335 if (rc) 332 336 goto out; ··· 401 409 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 402 410 struct rpcrdma_req *req; 403 411 404 - cancel_work_sync(&buf->rb_refresh_worker); 405 - 406 412 /* This is similar to rpcrdma_ep_destroy, but: 407 413 * - Don't cancel the connect worker. 408 414 * - Don't call rpcrdma_ep_disconnect, which waits ··· 427 437 rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); 428 438 rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); 429 439 } 430 - rpcrdma_mrs_destroy(buf); 440 + rpcrdma_mrs_destroy(r_xprt); 431 441 ib_dealloc_pd(ia->ri_pd); 432 442 ia->ri_pd = NULL; 433 443 ··· 512 522 init_waitqueue_head(&ep->rep_connect_wait); 513 523 ep->rep_receive_count = 0; 514 524 515 - sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL, 525 + sendcq = ib_alloc_cq_any(ia->ri_id->device, r_xprt, 516 526 ep->rep_attr.cap.max_send_wr + 1, 517 527 IB_POLL_WORKQUEUE); 518 528 if (IS_ERR(sendcq)) { ··· 620 630 pr_err("rpcrdma: rdma_create_qp returned %d\n", err); 621 631 goto out3; 622 632 } 623 - 624 - rpcrdma_mrs_create(r_xprt); 625 633 return 0; 626 634 627 635 out3: ··· 636 648 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 637 649 struct rdma_cm_id *id, *old; 638 650 int err, rc; 639 - 640 - trace_xprtrdma_reconnect(r_xprt); 641 651 642 652 rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia); 643 653 ··· 691 705 memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr)); 692 706 switch (ep->rep_connected) { 693 707 case 0: 694 - dprintk("RPC: %s: connecting...\n", __func__); 695 708 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr); 696 709 if (rc) { 697 710 rc = -ENETUNREACH; ··· 711 726 ep->rep_connected = 0; 712 727 xprt_clear_connected(xprt); 713 728 729 + rpcrdma_reset_cwnd(r_xprt); 714 730 rpcrdma_post_recvs(r_xprt, true); 715 731 716 732 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); ··· 728 742 goto out; 729 743 } 730 744 731 - dprintk("RPC: %s: connected\n", __func__); 745 + rpcrdma_mrs_create(r_xprt); 732 746 733 747 out: 734 748 if (rc) 735 749 ep->rep_connected = rc; 736 750 737 751 out_noupdate: 752 + trace_xprtrdma_connect(r_xprt, rc); 738 753 return rc; 739 754 } 740 755 ··· 744 757 * @ep: endpoint to disconnect 745 758 * @ia: associated interface adapter 746 759 * 747 - * This is separate from destroy to facilitate the ability 748 - * to reconnect without recreating the endpoint. 749 - * 750 - * This call is not reentrant, and must not be made in parallel 751 - * on the same endpoint. 760 + * Caller serializes. Either the transport send lock is held, 761 + * or we're being called to destroy the transport. 752 762 */ 753 763 void 754 764 rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ··· 764 780 trace_xprtrdma_disconnect(r_xprt, rc); 765 781 766 782 rpcrdma_xprt_drain(r_xprt); 783 + rpcrdma_reqs_reset(r_xprt); 784 + rpcrdma_mrs_destroy(r_xprt); 767 785 } 768 786 769 787 /* Fixed-size circular FIFO queue. This implementation is wait-free and ··· 803 817 if (!sc) 804 818 return NULL; 805 819 806 - sc->sc_wr.wr_cqe = &sc->sc_cqe; 807 - sc->sc_wr.sg_list = sc->sc_sges; 808 - sc->sc_wr.opcode = IB_WR_SEND; 809 820 sc->sc_cqe.done = rpcrdma_wc_send; 810 821 return sc; 811 822 } ··· 830 847 if (!sc) 831 848 return -ENOMEM; 832 849 833 - sc->sc_xprt = r_xprt; 834 850 buf->rb_sc_ctxs[i] = sc; 835 851 } 836 852 ··· 892 910 893 911 /** 894 912 * rpcrdma_sendctx_put_locked - Release a send context 913 + * @r_xprt: controlling transport instance 895 914 * @sc: send context to release 896 915 * 897 916 * Usage: Called from Send completion to return a sendctxt ··· 900 917 * 901 918 * The caller serializes calls to this function (per transport). 902 919 */ 903 - static void 904 - rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) 920 + static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 921 + struct rpcrdma_sendctx *sc) 905 922 { 906 - struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; 923 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 907 924 unsigned long next_tail; 908 925 909 926 /* Unmap SGEs of previously completed but unsignaled ··· 921 938 /* Paired with READ_ONCE */ 922 939 smp_store_release(&buf->rb_sc_tail, next_tail); 923 940 924 - xprt_write_space(&sc->sc_xprt->rx_xprt); 941 + xprt_write_space(&r_xprt->rx_xprt); 925 942 } 926 943 927 944 static void ··· 948 965 mr->mr_xprt = r_xprt; 949 966 950 967 spin_lock(&buf->rb_lock); 951 - list_add(&mr->mr_list, &buf->rb_mrs); 968 + rpcrdma_mr_push(mr, &buf->rb_mrs); 952 969 list_add(&mr->mr_all, &buf->rb_all_mrs); 953 970 spin_unlock(&buf->rb_lock); 954 971 } ··· 967 984 968 985 rpcrdma_mrs_create(r_xprt); 969 986 xprt_write_space(&r_xprt->rx_xprt); 987 + } 988 + 989 + /** 990 + * rpcrdma_mrs_refresh - Wake the MR refresh worker 991 + * @r_xprt: controlling transport instance 992 + * 993 + */ 994 + void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) 995 + { 996 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 997 + struct rpcrdma_ep *ep = &r_xprt->rx_ep; 998 + 999 + /* If there is no underlying device, it's no use to 1000 + * wake the refresh worker. 1001 + */ 1002 + if (ep->rep_connected != -ENODEV) { 1003 + /* The work is scheduled on a WQ_MEM_RECLAIM 1004 + * workqueue in order to prevent MR allocation 1005 + * from recursing into NFS during direct reclaim. 1006 + */ 1007 + queue_work(xprtiod_workqueue, &buf->rb_refresh_worker); 1008 + } 970 1009 } 971 1010 972 1011 /** ··· 1045 1040 kfree(req); 1046 1041 out1: 1047 1042 return NULL; 1043 + } 1044 + 1045 + /** 1046 + * rpcrdma_reqs_reset - Reset all reqs owned by a transport 1047 + * @r_xprt: controlling transport instance 1048 + * 1049 + * ASSUMPTION: the rb_allreqs list is stable for the duration, 1050 + * and thus can be walked without holding rb_lock. Eg. the 1051 + * caller is holding the transport send lock to exclude 1052 + * device removal or disconnection. 1053 + */ 1054 + static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) 1055 + { 1056 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1057 + struct rpcrdma_req *req; 1058 + 1059 + list_for_each_entry(req, &buf->rb_allreqs, rl_all) { 1060 + /* Credits are valid only for one connection */ 1061 + req->rl_slot.rq_cong = 0; 1062 + } 1048 1063 } 1049 1064 1050 1065 static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, ··· 1150 1125 INIT_LIST_HEAD(&buf->rb_all_mrs); 1151 1126 INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); 1152 1127 1153 - rpcrdma_mrs_create(r_xprt); 1154 - 1155 1128 INIT_LIST_HEAD(&buf->rb_send_bufs); 1156 1129 INIT_LIST_HEAD(&buf->rb_allreqs); 1157 1130 ··· 1157 1134 for (i = 0; i < buf->rb_max_requests; i++) { 1158 1135 struct rpcrdma_req *req; 1159 1136 1160 - req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE, 1137 + req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, 1161 1138 GFP_KERNEL); 1162 1139 if (!req) 1163 1140 goto out; 1164 1141 list_add(&req->rl_list, &buf->rb_send_bufs); 1165 1142 } 1166 1143 1167 - buf->rb_credits = 1; 1168 1144 init_llist_head(&buf->rb_free_reps); 1169 1145 1170 1146 rc = rpcrdma_sendctxs_create(r_xprt); ··· 1180 1158 * rpcrdma_req_destroy - Destroy an rpcrdma_req object 1181 1159 * @req: unused object to be destroyed 1182 1160 * 1183 - * This function assumes that the caller prevents concurrent device 1184 - * unload and transport tear-down. 1161 + * Relies on caller holding the transport send lock to protect 1162 + * removing req->rl_all from buf->rb_all_reqs safely. 1185 1163 */ 1186 1164 void rpcrdma_req_destroy(struct rpcrdma_req *req) 1187 1165 { 1166 + struct rpcrdma_mr *mr; 1167 + 1188 1168 list_del(&req->rl_all); 1189 1169 1190 - while (!list_empty(&req->rl_free_mrs)) 1191 - rpcrdma_mr_free(rpcrdma_mr_pop(&req->rl_free_mrs)); 1170 + while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) { 1171 + struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; 1172 + 1173 + spin_lock(&buf->rb_lock); 1174 + list_del(&mr->mr_all); 1175 + spin_unlock(&buf->rb_lock); 1176 + 1177 + frwr_release_mr(mr); 1178 + } 1192 1179 1193 1180 rpcrdma_regbuf_free(req->rl_recvbuf); 1194 1181 rpcrdma_regbuf_free(req->rl_sendbuf); ··· 1205 1174 kfree(req); 1206 1175 } 1207 1176 1208 - static void 1209 - rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) 1177 + /** 1178 + * rpcrdma_mrs_destroy - Release all of a transport's MRs 1179 + * @r_xprt: controlling transport instance 1180 + * 1181 + * Relies on caller holding the transport send lock to protect 1182 + * removing mr->mr_list from req->rl_free_mrs safely. 1183 + */ 1184 + static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) 1210 1185 { 1211 - struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 1212 - rx_buf); 1186 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1213 1187 struct rpcrdma_mr *mr; 1214 - unsigned int count; 1215 1188 1216 - count = 0; 1189 + cancel_work_sync(&buf->rb_refresh_worker); 1190 + 1217 1191 spin_lock(&buf->rb_lock); 1218 1192 while ((mr = list_first_entry_or_null(&buf->rb_all_mrs, 1219 1193 struct rpcrdma_mr, 1220 1194 mr_all)) != NULL) { 1195 + list_del(&mr->mr_list); 1221 1196 list_del(&mr->mr_all); 1222 1197 spin_unlock(&buf->rb_lock); 1223 1198 1224 1199 frwr_release_mr(mr); 1225 - count++; 1200 + 1226 1201 spin_lock(&buf->rb_lock); 1227 1202 } 1228 1203 spin_unlock(&buf->rb_lock); 1229 - r_xprt->rx_stats.mrs_allocated = 0; 1230 1204 } 1231 1205 1232 1206 /** ··· 1245 1209 void 1246 1210 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1247 1211 { 1248 - cancel_work_sync(&buf->rb_refresh_worker); 1249 - 1250 1212 rpcrdma_sendctxs_destroy(buf); 1251 1213 rpcrdma_reps_destroy(buf); 1252 1214 ··· 1256 1222 list_del(&req->rl_list); 1257 1223 rpcrdma_req_destroy(req); 1258 1224 } 1259 - 1260 - rpcrdma_mrs_destroy(buf); 1261 1225 } 1262 1226 1263 1227 /** ··· 1294 1262 } 1295 1263 1296 1264 rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs); 1297 - } 1298 - 1299 - static void rpcrdma_mr_free(struct rpcrdma_mr *mr) 1300 - { 1301 - struct rpcrdma_xprt *r_xprt = mr->mr_xprt; 1302 - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1303 - 1304 - mr->mr_req = NULL; 1305 - spin_lock(&buf->rb_lock); 1306 - rpcrdma_mr_push(mr, &buf->rb_mrs); 1307 - spin_unlock(&buf->rb_lock); 1308 1265 } 1309 1266 1310 1267 /** ··· 1458 1437 struct rpcrdma_ep *ep, 1459 1438 struct rpcrdma_req *req) 1460 1439 { 1461 - struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; 1440 + struct ib_send_wr *send_wr = &req->rl_wr; 1462 1441 int rc; 1463 1442 1464 1443 if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) { ··· 1476 1455 return 0; 1477 1456 } 1478 1457 1479 - static void 1480 - rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) 1458 + /** 1459 + * rpcrdma_post_recvs - Refill the Receive Queue 1460 + * @r_xprt: controlling transport instance 1461 + * @temp: mark Receive buffers to be deleted after use 1462 + * 1463 + */ 1464 + void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) 1481 1465 { 1482 1466 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1483 1467 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+6 -12
net/sunrpc/xprtrdma/xprt_rdma.h
··· 218 218 /* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes 219 219 */ 220 220 struct rpcrdma_req; 221 - struct rpcrdma_xprt; 222 221 struct rpcrdma_sendctx { 223 - struct ib_send_wr sc_wr; 224 222 struct ib_cqe sc_cqe; 225 - struct ib_device *sc_device; 226 - struct rpcrdma_xprt *sc_xprt; 227 223 struct rpcrdma_req *sc_req; 228 224 unsigned int sc_unmap_count; 229 225 struct ib_sge sc_sges[]; ··· 253 257 u32 mr_handle; 254 258 u32 mr_length; 255 259 u64 mr_offset; 256 - struct work_struct mr_recycle; 257 260 struct list_head mr_all; 258 261 }; 259 262 ··· 313 318 struct rpcrdma_rep *rl_reply; 314 319 struct xdr_stream rl_stream; 315 320 struct xdr_buf rl_hdrbuf; 321 + struct ib_send_wr rl_wr; 316 322 struct rpcrdma_sendctx *rl_sendctx; 317 323 struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ 318 324 struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ ··· 470 474 471 475 int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, 472 476 struct rpcrdma_req *); 477 + void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); 473 478 474 479 /* 475 480 * Buffer calls - xprtrdma/verbs.c ··· 484 487 485 488 struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); 486 489 void rpcrdma_mr_put(struct rpcrdma_mr *mr); 487 - 488 - static inline void 489 - rpcrdma_mr_recycle(struct rpcrdma_mr *mr) 490 - { 491 - schedule_work(&mr->mr_recycle); 492 - } 490 + void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt); 493 491 494 492 struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 495 493 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, ··· 534 542 /* Memory registration calls xprtrdma/frwr_ops.c 535 543 */ 536 544 bool frwr_is_supported(struct ib_device *device); 537 - void frwr_recycle(struct rpcrdma_req *req); 538 545 void frwr_reset(struct rpcrdma_req *req); 539 546 int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep); 540 547 int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); ··· 554 563 555 564 enum rpcrdma_chunktype { 556 565 rpcrdma_noch = 0, 566 + rpcrdma_noch_pullup, 567 + rpcrdma_noch_mapped, 557 568 rpcrdma_readch, 558 569 rpcrdma_areadch, 559 570 rpcrdma_writech, ··· 569 576 void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc); 570 577 int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); 571 578 void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); 579 + void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt); 572 580 void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); 573 581 void rpcrdma_reply_handler(struct rpcrdma_rep *rep); 574 582