Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfs-for-4.16-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull more NFS client updates from Trond Myklebust:
"A few bugfixes and some small sunrpc latency/performance improvements
before the merge window closes:

Stable fixes:

- fix an incorrect calculation of the RDMA send scatter gather
element limit

- fix an Oops when attempting to free resources after RDMA device
removal

Bugfixes:

- SUNRPC: Ensure we always release the TCP socket in a timely fashion
when the connection is shut down.

- SUNRPC: Don't call __UDPX_INC_STATS() from a preemptible context

Latency/Performance:

- SUNRPC: Queue latency sensitive socket tasks to the less contended
xprtiod queue

- SUNRPC: Make the xprtiod workqueue unbounded.

- SUNRPC: Make the rpciod workqueue unbounded"

* tag 'nfs-for-4.16-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs:
SUNRPC: Don't call __UDPX_INC_STATS() from a preemptible context
fix parallelism for rpc tasks
Make the xprtiod workqueue unbounded.
SUNRPC: Queue latency-sensitive socket tasks to xprtiod
SUNRPC: Ensure we always close the socket after a connection shuts down
xprtrdma: Fix BUG after a device removal
xprtrdma: Fix calculation of ri_max_send_sges

+36 -23
+3
include/linux/sunrpc/sched.h
··· 229 229 struct rpc_task *, 230 230 rpc_action action, 231 231 int priority); 232 + void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, 233 + struct rpc_wait_queue *queue, 234 + struct rpc_task *task); 232 235 void rpc_wake_up_queued_task(struct rpc_wait_queue *, 233 236 struct rpc_task *); 234 237 void rpc_wake_up(struct rpc_wait_queue *);
+14 -2
net/sunrpc/sched.c
··· 461 461 /* 462 462 * Wake up a task on a specific queue 463 463 */ 464 + void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, 465 + struct rpc_wait_queue *queue, 466 + struct rpc_task *task) 467 + { 468 + spin_lock_bh(&queue->lock); 469 + rpc_wake_up_task_on_wq_queue_locked(wq, queue, task); 470 + spin_unlock_bh(&queue->lock); 471 + } 472 + 473 + /* 474 + * Wake up a task on a specific queue 475 + */ 464 476 void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task) 465 477 { 466 478 spin_lock_bh(&queue->lock); ··· 1104 1092 * Create the rpciod thread and wait for it to start. 1105 1093 */ 1106 1094 dprintk("RPC: creating workqueue rpciod\n"); 1107 - wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); 1095 + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); 1108 1096 if (!wq) 1109 1097 goto out_failed; 1110 1098 rpciod_workqueue = wq; 1111 1099 /* Note: highpri because network receive is latency sensitive */ 1112 - wq = alloc_workqueue("xprtiod", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); 1100 + wq = alloc_workqueue("xprtiod", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_HIGHPRI, 0); 1113 1101 if (!wq) 1114 1102 goto free_rpciod; 1115 1103 xprtiod_workqueue = wq;
+2 -1
net/sunrpc/xprt.c
··· 517 517 if (xprt->snd_task) { 518 518 dprintk("RPC: write space: waking waiting task on " 519 519 "xprt %p\n", xprt); 520 - rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task); 520 + rpc_wake_up_queued_task_on_wq(xprtiod_workqueue, 521 + &xprt->pending, xprt->snd_task); 521 522 } 522 523 spin_unlock_bh(&xprt->transport_lock); 523 524 }
+1 -1
net/sunrpc/xprtrdma/rpc_rdma.c
··· 143 143 if (xdr->page_len) { 144 144 remaining = xdr->page_len; 145 145 offset = offset_in_page(xdr->page_base); 146 - count = 0; 146 + count = RPCRDMA_MIN_SEND_SGES; 147 147 while (remaining) { 148 148 remaining -= min_t(unsigned int, 149 149 PAGE_SIZE - offset, remaining);
+4 -4
net/sunrpc/xprtrdma/verbs.c
··· 505 505 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); 506 506 return -ENOMEM; 507 507 } 508 - ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES; 508 + ia->ri_max_send_sges = max_sge; 509 509 510 510 if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { 511 511 dprintk("RPC: %s: insufficient wqe's available\n", ··· 1502 1502 static void 1503 1503 rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) 1504 1504 { 1505 + if (!rb) 1506 + return; 1507 + 1505 1508 if (!rpcrdma_regbuf_is_mapped(rb)) 1506 1509 return; 1507 1510 ··· 1520 1517 void 1521 1518 rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) 1522 1519 { 1523 - if (!rb) 1524 - return; 1525 - 1526 1520 rpcrdma_dma_unmap_regbuf(rb); 1527 1521 kfree(rb); 1528 1522 }
+12 -15
net/sunrpc/xprtsock.c
··· 807 807 smp_mb__after_atomic(); 808 808 } 809 809 810 - static void xs_sock_mark_closed(struct rpc_xprt *xprt) 811 - { 812 - xs_sock_reset_connection_flags(xprt); 813 - /* Mark transport as closed and wake up all pending tasks */ 814 - xprt_disconnect_done(xprt); 815 - } 816 - 817 810 /** 818 811 * xs_error_report - callback to handle TCP socket state errors 819 812 * @sk: socket ··· 826 833 err = -sk->sk_err; 827 834 if (err == 0) 828 835 goto out; 829 - /* Is this a reset event? */ 830 - if (sk->sk_state == TCP_CLOSE) 831 - xs_sock_mark_closed(xprt); 832 836 dprintk("RPC: xs_error_report client %p, error=%d...\n", 833 837 xprt, -err); 834 838 trace_rpc_socket_error(xprt, sk->sk_socket, err); ··· 1068 1078 1069 1079 /* Suck it into the iovec, verify checksum if not done by hw. */ 1070 1080 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { 1071 - __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); 1072 1081 spin_lock(&xprt->recv_lock); 1082 + __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); 1073 1083 goto out_unpin; 1074 1084 } 1075 1085 1076 - __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); 1077 1086 1078 1087 spin_lock_bh(&xprt->transport_lock); 1079 1088 xprt_adjust_cwnd(xprt, task, copied); 1080 1089 spin_unlock_bh(&xprt->transport_lock); 1081 1090 spin_lock(&xprt->recv_lock); 1082 1091 xprt_complete_rqst(task, copied); 1092 + __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); 1083 1093 out_unpin: 1084 1094 xprt_unpin_rqst(rovr); 1085 1095 out_unlock: ··· 1645 1655 if (test_and_clear_bit(XPRT_SOCK_CONNECTING, 1646 1656 &transport->sock_state)) 1647 1657 xprt_clear_connecting(xprt); 1658 + clear_bit(XPRT_CLOSING, &xprt->state); 1648 1659 if (sk->sk_err) 1649 1660 xprt_wake_pending_tasks(xprt, -sk->sk_err); 1650 - xs_sock_mark_closed(xprt); 1661 + /* Trigger the socket release */ 1662 + xs_tcp_force_close(xprt); 1651 1663 } 1652 1664 out: 1653 1665 read_unlock_bh(&sk->sk_callback_lock); ··· 2257 2265 { 2258 2266 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2259 2267 struct socket *sock = transport->sock; 2268 + int skst = transport->inet ? transport->inet->sk_state : TCP_CLOSE; 2260 2269 2261 2270 if (sock == NULL) 2262 2271 return; 2263 - if (xprt_connected(xprt)) { 2272 + switch (skst) { 2273 + default: 2264 2274 kernel_sock_shutdown(sock, SHUT_RDWR); 2265 2275 trace_rpc_socket_shutdown(xprt, sock); 2266 - } else 2276 + break; 2277 + case TCP_CLOSE: 2278 + case TCP_TIME_WAIT: 2267 2279 xs_reset_transport(transport); 2280 + } 2268 2281 } 2269 2282 2270 2283 static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,