Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

SUNRPC: switchable buffer allocation

Add RPC client transport switch support for replacing buffer management
on a per-transport basis.

In the current IPv4 socket transport implementation, RPC buffers are
allocated as needed for each RPC message that is sent. Some transport
implementations may choose to use pre-allocated buffers for encoding,
sending, receiving, and unmarshalling RPC messages, however. For
transports capable of direct data placement, the buffers can be carved
out of a pre-registered area of memory rather than from a slab cache.

Test-plan:
Millions of fsx operations. Performance characterization with "sio" and
"iozone". Use oprofile and other tools to look for significant regression
in CPU utilization.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

authored by

Chuck Lever and committed by
Trond Myklebust
02107148 03c21733

+49 -36
+1 -2
include/linux/sunrpc/sched.h
··· 52 52 * RPC call state 53 53 */ 54 54 struct rpc_message tk_msg; /* RPC call info */ 55 - __u32 * tk_buffer; /* XDR buffer */ 56 - size_t tk_bufsize; 57 55 __u8 tk_garb_retry; 58 56 __u8 tk_cred_retry; 59 57 ··· 266 268 void rpc_wake_up_status(struct rpc_wait_queue *, int); 267 269 void rpc_delay(struct rpc_task *, unsigned long); 268 270 void * rpc_malloc(struct rpc_task *, size_t); 271 + void rpc_free(struct rpc_task *); 269 272 int rpciod_up(void); 270 273 void rpciod_down(void); 271 274 void rpciod_wake_up(void);
+5 -5
include/linux/sunrpc/xprt.h
··· 79 79 void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ 80 80 struct list_head rq_list; 81 81 82 + __u32 * rq_buffer; /* XDR encode buffer */ 83 + size_t rq_bufsize; 84 + 82 85 struct xdr_buf rq_private_buf; /* The receive buffer 83 86 * used in the softirq. 84 87 */ 85 88 unsigned long rq_majortimeo; /* major timeout alarm */ 86 89 unsigned long rq_timeout; /* Current timeout value */ 87 90 unsigned int rq_retries; /* # of retries */ 88 - /* 89 - * For authentication (e.g. auth_des) 90 - */ 91 - u32 rq_creddata[2]; 92 91 93 92 /* 94 93 * Partial send handling 95 94 */ 96 - 97 95 u32 rq_bytes_sent; /* Bytes we have sent */ 98 96 99 97 unsigned long rq_xtime; /* when transmitted */ ··· 105 107 int (*reserve_xprt)(struct rpc_task *task); 106 108 void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); 107 109 void (*connect)(struct rpc_task *task); 110 + void * (*buf_alloc)(struct rpc_task *task, size_t size); 111 + void (*buf_free)(struct rpc_task *task); 108 112 int (*send_request)(struct rpc_task *task); 109 113 void (*set_retrans_timeout)(struct rpc_task *task); 110 114 void (*timer)(struct rpc_task *task);
+8 -6
net/sunrpc/clnt.c
··· 644 644 645 645 /* 646 646 * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. 647 - * (Note: buffer memory is freed in rpc_task_release). 647 + * (Note: buffer memory is freed in xprt_release). 648 648 */ 649 649 static void 650 650 call_allocate(struct rpc_task *task) 651 651 { 652 + struct rpc_rqst *req = task->tk_rqstp; 653 + struct rpc_xprt *xprt = task->tk_xprt; 652 654 unsigned int bufsiz; 653 655 654 656 dprintk("RPC: %4d call_allocate (status %d)\n", 655 657 task->tk_pid, task->tk_status); 656 658 task->tk_action = call_bind; 657 - if (task->tk_buffer) 659 + if (req->rq_buffer) 658 660 return; 659 661 660 662 /* FIXME: compute buffer requirements more exactly using 661 663 * auth->au_wslack */ 662 664 bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; 663 665 664 - if (rpc_malloc(task, bufsiz << 1) != NULL) 666 + if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL) 665 667 return; 666 668 printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 667 669 ··· 706 704 task->tk_pid, task->tk_status); 707 705 708 706 /* Default buffer setup */ 709 - bufsiz = task->tk_bufsize >> 1; 710 - sndbuf->head[0].iov_base = (void *)task->tk_buffer; 707 + bufsiz = req->rq_bufsize >> 1; 708 + sndbuf->head[0].iov_base = (void *)req->rq_buffer; 711 709 sndbuf->head[0].iov_len = bufsiz; 712 710 sndbuf->tail[0].iov_len = 0; 713 711 sndbuf->page_len = 0; 714 712 sndbuf->len = 0; 715 713 sndbuf->buflen = bufsiz; 716 - rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); 714 + rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz); 717 715 rcvbuf->head[0].iov_len = bufsiz; 718 716 rcvbuf->tail[0].iov_len = 0; 719 717 rcvbuf->page_len = 0;
+27 -23
net/sunrpc/sched.c
··· 41 41 42 42 static void __rpc_default_timer(struct rpc_task *task); 43 43 static void rpciod_killall(void); 44 - static void rpc_free(struct rpc_task *task); 45 - 46 44 static void rpc_async_schedule(void *); 47 45 48 46 /* ··· 597 599 WARN_ON(RPC_ASSASSINATED(task)); 598 600 /* Always release the RPC slot and buffer memory */ 599 601 xprt_release(task); 600 - rpc_free(task); 601 602 } 602 603 } 603 604 } ··· 721 724 __rpc_execute((struct rpc_task *)arg); 722 725 } 723 726 724 - /* 725 - * Allocate memory for RPC purposes. 727 + /** 728 + * rpc_malloc - allocate an RPC buffer 729 + * @task: RPC task that will use this buffer 730 + * @size: requested byte size 726 731 * 727 732 * We try to ensure that some NFS reads and writes can always proceed 728 733 * by using a mempool when allocating 'small' buffers. 729 734 * In order to avoid memory starvation triggering more writebacks of 730 735 * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. 731 736 */ 732 - void * 733 - rpc_malloc(struct rpc_task *task, size_t size) 737 + void * rpc_malloc(struct rpc_task *task, size_t size) 734 738 { 739 + struct rpc_rqst *req = task->tk_rqstp; 735 740 gfp_t gfp; 736 741 737 742 if (task->tk_flags & RPC_TASK_SWAPPER) ··· 742 743 gfp = GFP_NOFS; 743 744 744 745 if (size > RPC_BUFFER_MAXSIZE) { 745 - task->tk_buffer = kmalloc(size, gfp); 746 - if (task->tk_buffer) 747 - task->tk_bufsize = size; 746 + req->rq_buffer = kmalloc(size, gfp); 747 + if (req->rq_buffer) 748 + req->rq_bufsize = size; 748 749 } else { 749 - task->tk_buffer = mempool_alloc(rpc_buffer_mempool, gfp); 750 - if (task->tk_buffer) 751 - task->tk_bufsize = RPC_BUFFER_MAXSIZE; 750 + req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp); 751 + if (req->rq_buffer) 752 + req->rq_bufsize = RPC_BUFFER_MAXSIZE; 752 753 } 753 - return task->tk_buffer; 754 + return req->rq_buffer; 754 755 } 755 756 756 - static void 757 - rpc_free(struct rpc_task *task) 757 + /** 758 + * rpc_free - free buffer allocated via rpc_malloc 759 + * @task: RPC task with a buffer to be freed 760 + * 761 + */ 762 + void rpc_free(struct rpc_task *task) 758 763 { 759 - if (task->tk_buffer) { 760 - if (task->tk_bufsize == RPC_BUFFER_MAXSIZE) 761 - mempool_free(task->tk_buffer, rpc_buffer_mempool); 764 + struct rpc_rqst *req = task->tk_rqstp; 765 + 766 + if (req->rq_buffer) { 767 + if (req->rq_bufsize == RPC_BUFFER_MAXSIZE) 768 + mempool_free(req->rq_buffer, rpc_buffer_mempool); 762 769 else 763 - kfree(task->tk_buffer); 764 - task->tk_buffer = NULL; 765 - task->tk_bufsize = 0; 770 + kfree(req->rq_buffer); 771 + req->rq_buffer = NULL; 772 + req->rq_bufsize = 0; 766 773 } 767 774 } 768 775 ··· 892 887 xprt_release(task); 893 888 if (task->tk_msg.rpc_cred) 894 889 rpcauth_unbindcred(task); 895 - rpc_free(task); 896 890 if (task->tk_client) { 897 891 rpc_release_client(task->tk_client); 898 892 task->tk_client = NULL;
+3
net/sunrpc/xprt.c
··· 838 838 req->rq_timeout = xprt->timeout.to_initval; 839 839 req->rq_task = task; 840 840 req->rq_xprt = xprt; 841 + req->rq_buffer = NULL; 842 + req->rq_bufsize = 0; 841 843 req->rq_xid = xprt_alloc_xid(xprt); 842 844 req->rq_release_snd_buf = NULL; 843 845 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, ··· 869 867 mod_timer(&xprt->timer, 870 868 xprt->last_used + xprt->idle_timeout); 871 869 spin_unlock_bh(&xprt->transport_lock); 870 + xprt->ops->buf_free(task); 872 871 task->tk_rqstp = NULL; 873 872 if (req->rq_release_snd_buf) 874 873 req->rq_release_snd_buf(req);
+5
net/sunrpc/xprtsock.c
··· 28 28 #include <linux/udp.h> 29 29 #include <linux/tcp.h> 30 30 #include <linux/sunrpc/clnt.h> 31 + #include <linux/sunrpc/sched.h> 31 32 #include <linux/file.h> 32 33 33 34 #include <net/sock.h> ··· 1162 1161 .reserve_xprt = xprt_reserve_xprt_cong, 1163 1162 .release_xprt = xprt_release_xprt_cong, 1164 1163 .connect = xs_connect, 1164 + .buf_alloc = rpc_malloc, 1165 + .buf_free = rpc_free, 1165 1166 .send_request = xs_udp_send_request, 1166 1167 .set_retrans_timeout = xprt_set_retrans_timeout_rtt, 1167 1168 .timer = xs_udp_timer, ··· 1176 1173 .reserve_xprt = xprt_reserve_xprt, 1177 1174 .release_xprt = xprt_release_xprt, 1178 1175 .connect = xs_connect, 1176 + .buf_alloc = rpc_malloc, 1177 + .buf_free = rpc_free, 1179 1178 .send_request = xs_tcp_send_request, 1180 1179 .set_retrans_timeout = xprt_set_retrans_timeout_def, 1181 1180 .close = xs_close,