Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] RPC: separate TCP and UDP write space callbacks

Split the socket write space callback function into a TCP version and UDP
version, eliminating one dependence on the "xprt->stream" variable.

Keep the common pieces of this path in xprt.c so other transports can use
it too.

Test-plan:
Write-intensive workload on a single mount point.

Version: Thu, 11 Aug 2005 16:07:51 -0400

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

authored by

Chuck Lever and committed by
Trond Myklebust
c7b2cae8 55aa4f58

+89 -31
+2
include/linux/sunrpc/xprt.h
··· 240 240 * Transport switch helper functions 241 241 */ 242 242 void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); 243 + void xprt_wait_for_buffer_space(struct rpc_task *task); 244 + void xprt_write_space(struct rpc_xprt *xprt); 243 245 struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); 244 246 void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied); 245 247 void xprt_disconnect(struct rpc_xprt *xprt);
+34
net/sunrpc/xprt.c
··· 241 241 rpc_wake_up(&xprt->pending); 242 242 } 243 243 244 + /** 245 + * xprt_wait_for_buffer_space - wait for transport output buffer to clear 246 + * @task: task to be put to sleep 247 + * 248 + */ 249 + void xprt_wait_for_buffer_space(struct rpc_task *task) 250 + { 251 + struct rpc_rqst *req = task->tk_rqstp; 252 + struct rpc_xprt *xprt = req->rq_xprt; 253 + 254 + task->tk_timeout = req->rq_timeout; 255 + rpc_sleep_on(&xprt->pending, task, NULL, NULL); 256 + } 257 + 258 + /** 259 + * xprt_write_space - wake the task waiting for transport output buffer space 260 + * @xprt: transport with waiting tasks 261 + * 262 + * Can be called in a soft IRQ context, so xprt_write_space never sleeps. 263 + */ 264 + void xprt_write_space(struct rpc_xprt *xprt) 265 + { 266 + if (unlikely(xprt->shutdown)) 267 + return; 268 + 269 + spin_lock_bh(&xprt->transport_lock); 270 + if (xprt->snd_task) { 271 + dprintk("RPC: write space: waking waiting task on xprt %p\n", 272 + xprt); 273 + rpc_wake_up_task(xprt->snd_task); 274 + } 275 + spin_unlock_bh(&xprt->transport_lock); 276 + } 277 + 244 278 static void xprt_reset_majortimeo(struct rpc_rqst *req) 245 279 { 246 280 struct rpc_timeout *to = &req->rq_xprt->timeout;
+53 -31
net/sunrpc/xprtsock.c
··· 308 308 309 309 if (status == -EAGAIN) { 310 310 if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { 311 - /* Protect against races with xs_write_space */ 311 + /* Protect against races with write_space */ 312 312 spin_lock_bh(&xprt->transport_lock); 313 313 /* Don't race with disconnect */ 314 314 if (!xprt_connected(xprt)) 315 315 task->tk_status = -ENOTCONN; 316 - else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { 317 - task->tk_timeout = req->rq_timeout; 318 - rpc_sleep_on(&xprt->pending, task, NULL, NULL); 319 - } 316 + else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) 317 + xprt_wait_for_buffer_space(task); 320 318 spin_unlock_bh(&xprt->transport_lock); 321 319 return status; 322 320 } ··· 719 721 } 720 722 721 723 /** 722 - * xs_write_space - callback invoked when socket buffer space becomes 723 - * available 724 + * xs_udp_write_space - callback invoked when socket buffer space 725 + * becomes available 724 726 * @sk: socket whose state has changed 725 727 * 726 728 * Called when more output buffer space is available for this socket. 727 729 * We try not to wake our writers until they can make "significant" 728 - * progress, otherwise we'll waste resources thrashing sock_sendmsg 730 + * progress, otherwise we'll waste resources thrashing kernel_sendmsg 729 731 * with a bunch of small requests. 730 732 */ 731 - static void xs_write_space(struct sock *sk) 733 + static void xs_udp_write_space(struct sock *sk) 732 734 { 733 - struct rpc_xprt *xprt; 734 - struct socket *sock; 735 - 736 735 read_lock(&sk->sk_callback_lock); 737 - if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) 738 - goto out; 739 - if (xprt->shutdown) 740 - goto out; 741 736 742 - /* Wait until we have enough socket memory */ 743 - if (xprt->stream) { 744 - /* from net/core/stream.c:sk_stream_write_space */ 745 - if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) 737 + /* from net/core/sock.c:sock_def_write_space */ 738 + if (sock_writeable(sk)) { 739 + struct socket *sock; 740 + struct rpc_xprt *xprt; 741 + 742 + if (unlikely(!(sock = sk->sk_socket))) 746 743 goto out; 747 - } else { 748 - /* from net/core/sock.c:sock_def_write_space */ 749 - if (!sock_writeable(sk)) 744 + if (unlikely(!(xprt = xprt_from_sock(sk)))) 750 745 goto out; 746 + if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) 747 + goto out; 748 + 749 + xprt_write_space(xprt); 751 750 } 752 751 753 - if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) 754 - goto out; 752 + out: 753 + read_unlock(&sk->sk_callback_lock); 754 + } 755 755 756 - spin_lock_bh(&xprt->transport_lock); 757 - if (xprt->snd_task) 758 - rpc_wake_up_task(xprt->snd_task); 759 - spin_unlock_bh(&xprt->transport_lock); 760 - out: 756 + /** 757 + * xs_tcp_write_space - callback invoked when socket buffer space 758 + * becomes available 759 + * @sk: socket whose state has changed 760 + * 761 + * Called when more output buffer space is available for this socket. 762 + * We try not to wake our writers until they can make "significant" 763 + * progress, otherwise we'll waste resources thrashing kernel_sendmsg 764 + * with a bunch of small requests. 765 + */ 766 + static void xs_tcp_write_space(struct sock *sk) 767 + { 768 + read_lock(&sk->sk_callback_lock); 769 + 770 + /* from net/core/stream.c:sk_stream_write_space */ 771 + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 772 + struct socket *sock; 773 + struct rpc_xprt *xprt; 774 + 775 + if (unlikely(!(sock = sk->sk_socket))) 776 + goto out; 777 + if (unlikely(!(xprt = xprt_from_sock(sk)))) 778 + goto out; 779 + if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) 780 + goto out; 781 + 782 + xprt_write_space(xprt); 783 + } 784 + 785 + out: 761 786 read_unlock(&sk->sk_callback_lock); 762 787 } 763 788 ··· 876 855 xprt->old_write_space = sk->sk_write_space; 877 856 if (xprt->prot == IPPROTO_UDP) { 878 857 sk->sk_data_ready = xs_udp_data_ready; 858 + sk->sk_write_space = xs_udp_write_space; 879 859 sk->sk_no_check = UDP_CSUM_NORCV; 880 860 xprt_set_connected(xprt); 881 861 } else { 882 862 tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ 883 863 sk->sk_data_ready = xs_tcp_data_ready; 884 864 sk->sk_state_change = xs_tcp_state_change; 865 + sk->sk_write_space = xs_tcp_write_space; 885 866 xprt_clear_connected(xprt); 886 867 } 887 - sk->sk_write_space = xs_write_space; 888 868 889 869 /* Reset to new socket */ 890 870 xprt->sock = sock;