Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] knfsd: tidy up up meaning of 'buffer size' in nfsd/sunrpc

There is some confusion about the meaning of 'bufsz' for a sunrpc server.
In some cases it is the largest message that can be sent or received. In
other cases it is the largest 'payload' that can be included in a NFS
message.

In either case, it is not possible for both the request and the reply to be
this large. One of the request or reply may only be one page long, which
fits nicely with NFS.

So we remove 'bufsz' and replace it with two numbers: 'max_payload' and
'max_mesg'. Max_payload is the size that the server requests. It is used
by the server to check the max size allowed on a particular connection:
depending on the protocol a lower limit might be used.

max_mesg is the largest single message that can be sent or received. It is
calculated as the max_payload, rounded up to a multiple of PAGE_SIZE, and
with PAGE_SIZE added to overhead. Only one of the request and reply may be
this size. The other must be at most one page.

Cc: Greg Banks <gnb@sgi.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

NeilBrown and committed by
Linus Torvalds
c6b0a9f8 5842730d

+27 -23
+1 -1
fs/nfsd/nfssvc.c
··· 217 217 218 218 atomic_set(&nfsd_busy, 0); 219 219 nfsd_serv = svc_create_pooled(&nfsd_program, 220 - NFSD_BUFSIZE - NFSSVC_MAXBLKSIZE + nfsd_max_blksize, 220 + nfsd_max_blksize, 221 221 nfsd_last_thread, 222 222 nfsd, SIG_NOCLEAN, THIS_MODULE); 223 223 if (nfsd_serv == NULL)
+2 -1
include/linux/sunrpc/svc.h
··· 57 57 struct svc_stat * sv_stats; /* RPC statistics */ 58 58 spinlock_t sv_lock; 59 59 unsigned int sv_nrthreads; /* # of server threads */ 60 - unsigned int sv_bufsz; /* datagram buffer size */ 60 + unsigned int sv_max_payload; /* datagram payload size */ 61 + unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ 61 62 unsigned int sv_xdrsize; /* XDR buffer size */ 62 63 63 64 struct list_head sv_permsocks; /* all permanent sockets */
+10 -7
net/sunrpc/svc.c
··· 282 282 serv->sv_program = prog; 283 283 serv->sv_nrthreads = 1; 284 284 serv->sv_stats = prog->pg_stats; 285 - serv->sv_bufsz = bufsize? bufsize : 4096; 285 + if (bufsize > RPCSVC_MAXPAYLOAD) 286 + bufsize = RPCSVC_MAXPAYLOAD; 287 + serv->sv_max_payload = bufsize? bufsize : 4096; 288 + serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); 286 289 serv->sv_shutdown = shutdown; 287 290 xdrsize = 0; 288 291 while (prog) { ··· 417 414 int pages; 418 415 int arghi; 419 416 420 - if (size > RPCSVC_MAXPAYLOAD) 421 - size = RPCSVC_MAXPAYLOAD; 422 - pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE; 417 + pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. 418 + * We assume one is at most one page 419 + */ 423 420 arghi = 0; 424 421 BUG_ON(pages > RPCSVC_MAXPAGES); 425 422 while (pages) { ··· 466 463 467 464 if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) 468 465 || !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) 469 - || !svc_init_buffer(rqstp, serv->sv_bufsz)) 466 + || !svc_init_buffer(rqstp, serv->sv_max_mesg)) 470 467 goto out_thread; 471 468 472 469 serv->sv_nrthreads++; ··· 941 938 942 939 if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM) 943 940 max = RPCSVC_MAXPAYLOAD_UDP; 944 - if (rqstp->rq_server->sv_bufsz < max) 945 - max = rqstp->rq_server->sv_bufsz; 941 + if (rqstp->rq_server->sv_max_payload < max) 942 + max = rqstp->rq_server->sv_max_payload; 946 943 return max; 947 944 } 948 945 EXPORT_SYMBOL_GPL(svc_max_payload);
+14 -14
net/sunrpc/svcsock.c
··· 192 192 svsk->sk_pool = pool; 193 193 194 194 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 195 - if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2 195 + if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2 196 196 > svc_sock_wspace(svsk)) 197 197 && !test_bit(SK_CLOSE, &svsk->sk_flags) 198 198 && !test_bit(SK_CONN, &svsk->sk_flags)) { 199 199 /* Don't enqueue while not enough space for reply */ 200 200 dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", 201 - svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz, 201 + svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg, 202 202 svc_sock_wspace(svsk)); 203 203 svsk->sk_pool = NULL; 204 204 clear_bit(SK_BUSY, &svsk->sk_flags); ··· 220 220 rqstp, rqstp->rq_sock); 221 221 rqstp->rq_sock = svsk; 222 222 atomic_inc(&svsk->sk_inuse); 223 - rqstp->rq_reserved = serv->sv_bufsz; 223 + rqstp->rq_reserved = serv->sv_max_mesg; 224 224 atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); 225 225 BUG_ON(svsk->sk_pool != pool); 226 226 wake_up(&rqstp->rq_wait); ··· 639 639 * which will access the socket. 640 640 */ 641 641 svc_sock_setbufsize(svsk->sk_sock, 642 - (serv->sv_nrthreads+3) * serv->sv_bufsz, 643 - (serv->sv_nrthreads+3) * serv->sv_bufsz); 642 + (serv->sv_nrthreads+3) * serv->sv_max_mesg, 643 + (serv->sv_nrthreads+3) * serv->sv_max_mesg); 644 644 645 645 if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { 646 646 svc_sock_received(svsk); ··· 749 749 * svc_udp_recvfrom will re-adjust if necessary 750 750 */ 751 751 svc_sock_setbufsize(svsk->sk_sock, 752 - 3 * svsk->sk_server->sv_bufsz, 753 - 3 * svsk->sk_server->sv_bufsz); 752 + 3 * svsk->sk_server->sv_max_mesg, 753 + 3 * svsk->sk_server->sv_max_mesg); 754 754 755 755 set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ 756 756 set_bit(SK_CHNGBUF, &svsk->sk_flags); ··· 993 993 * as soon a a complete request arrives. 994 994 */ 995 995 svc_sock_setbufsize(svsk->sk_sock, 996 - (serv->sv_nrthreads+3) * serv->sv_bufsz, 997 - 3 * serv->sv_bufsz); 996 + (serv->sv_nrthreads+3) * serv->sv_max_mesg, 997 + 3 * serv->sv_max_mesg); 998 998 999 999 clear_bit(SK_DATA, &svsk->sk_flags); 1000 1000 ··· 1032 1032 } 1033 1033 svsk->sk_reclen &= 0x7fffffff; 1034 1034 dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); 1035 - if (svsk->sk_reclen > serv->sv_bufsz) { 1035 + if (svsk->sk_reclen > serv->sv_max_mesg) { 1036 1036 printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n", 1037 1037 (unsigned long) svsk->sk_reclen); 1038 1038 goto err_delete; ··· 1171 1171 * svc_tcp_recvfrom will re-adjust if necessary 1172 1172 */ 1173 1173 svc_sock_setbufsize(svsk->sk_sock, 1174 - 3 * svsk->sk_server->sv_bufsz, 1175 - 3 * svsk->sk_server->sv_bufsz); 1174 + 3 * svsk->sk_server->sv_max_mesg, 1175 + 3 * svsk->sk_server->sv_max_mesg); 1176 1176 1177 1177 set_bit(SK_CHNGBUF, &svsk->sk_flags); 1178 1178 set_bit(SK_DATA, &svsk->sk_flags); ··· 1234 1234 1235 1235 1236 1236 /* now allocate needed pages. If we get a failure, sleep briefly */ 1237 - pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE; 1237 + pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; 1238 1238 for (i=0; i < pages ; i++) 1239 1239 while (rqstp->rq_pages[i] == NULL) { 1240 1240 struct page *p = alloc_page(GFP_KERNEL); ··· 1263 1263 if ((svsk = svc_sock_dequeue(pool)) != NULL) { 1264 1264 rqstp->rq_sock = svsk; 1265 1265 atomic_inc(&svsk->sk_inuse); 1266 - rqstp->rq_reserved = serv->sv_bufsz; 1266 + rqstp->rq_reserved = serv->sv_max_mesg; 1267 1267 atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); 1268 1268 } else { 1269 1269 /* No data pending. Go to sleep */