[PATCH] NFS: large non-page-aligned direct I/O clobbers memory

The logic in nfs_direct_read_schedule and nfs_direct_write_schedule can
allow data->npages to be one larger than rpages. This causes a page
pointer to be written beyond the end of the pagevec in nfs_read_data (or
nfs_write_data).

Fix this by making nfs_(read|write)_alloc() calculate the size of the
pagevec array, and initialise data->npages.

Also get rid of the redundant argument to nfs_commit_alloc().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Trond Myklebust and committed by Linus Torvalds e9f7bee1 016eb4a0

+47 -74
+14 -36
fs/nfs/direct.c
··· 100 100 return atomic_dec_and_test(&dreq->io_count); 101 101 } 102 102 103 - /* 104 - * "size" is never larger than rsize or wsize. 105 - */ 106 - static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) 107 - { 108 - int page_count; 109 - 110 - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; 111 - page_count -= user_addr >> PAGE_SHIFT; 112 - BUG_ON(page_count < 0); 113 - 114 - return page_count; 115 - } 116 - 117 - static inline unsigned int nfs_max_pages(unsigned int size) 118 - { 119 - return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 120 - } 121 - 122 103 /** 123 104 * nfs_direct_IO - NFS address space operation for direct I/O 124 105 * @rw: direction (read or write) ··· 257 276 struct nfs_open_context *ctx = dreq->ctx; 258 277 struct inode *inode = ctx->dentry->d_inode; 259 278 size_t rsize = NFS_SERVER(inode)->rsize; 260 - unsigned int rpages = nfs_max_pages(rsize); 261 279 unsigned int pgbase; 262 280 int result; 263 281 ssize_t started = 0; 264 282 265 283 get_dreq(dreq); 266 284 267 - pgbase = user_addr & ~PAGE_MASK; 268 285 do { 269 286 struct nfs_read_data *data; 270 287 size_t bytes; 271 288 289 + pgbase = user_addr & ~PAGE_MASK; 290 + bytes = min(rsize,count); 291 + 272 292 result = -ENOMEM; 273 - data = nfs_readdata_alloc(rpages); 293 + data = nfs_readdata_alloc(pgbase + bytes); 274 294 if (unlikely(!data)) 275 295 break; 276 296 277 - bytes = rsize; 278 - if (count < rsize) 279 - bytes = count; 280 - 281 - data->npages = nfs_direct_count_pages(user_addr, bytes); 282 297 down_read(&current->mm->mmap_sem); 283 298 result = get_user_pages(current, current->mm, user_addr, 284 299 data->npages, 1, 0, data->pagevec, NULL); ··· 321 344 started += bytes; 322 345 user_addr += bytes; 323 346 pos += bytes; 347 + /* FIXME: Remove this unnecessary math from final patch */ 324 348 pgbase += bytes; 325 349 pgbase &= ~PAGE_MASK; 350 + BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); 326 351 327 352 count -= bytes; 328 353 } while (count != 0); ··· 503 524 504 525 static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 505 526 { 506 - dreq->commit_data = nfs_commit_alloc(0); 527 + dreq->commit_data = nfs_commit_alloc(); 507 528 if (dreq->commit_data != NULL) 508 529 dreq->commit_data->req = (struct nfs_page *) dreq; 509 530 } ··· 584 605 struct nfs_open_context *ctx = dreq->ctx; 585 606 struct inode *inode = ctx->dentry->d_inode; 586 607 size_t wsize = NFS_SERVER(inode)->wsize; 587 - unsigned int wpages = nfs_max_pages(wsize); 588 608 unsigned int pgbase; 589 609 int result; 590 610 ssize_t started = 0; 591 611 592 612 get_dreq(dreq); 593 613 594 - pgbase = user_addr & ~PAGE_MASK; 595 614 do { 596 615 struct nfs_write_data *data; 597 616 size_t bytes; 598 617 618 + pgbase = user_addr & ~PAGE_MASK; 619 + bytes = min(wsize,count); 620 + 599 621 result = -ENOMEM; 600 - data = nfs_writedata_alloc(wpages); 622 + data = nfs_writedata_alloc(pgbase + bytes); 601 623 if (unlikely(!data)) 602 624 break; 603 625 604 - bytes = wsize; 605 - if (count < wsize) 606 - bytes = count; 607 - 608 - data->npages = nfs_direct_count_pages(user_addr, bytes); 609 626 down_read(&current->mm->mmap_sem); 610 627 result = get_user_pages(current, current->mm, user_addr, 611 628 data->npages, 0, 0, data->pagevec, NULL); ··· 651 676 started += bytes; 652 677 user_addr += bytes; 653 678 pos += bytes; 679 + 680 + /* FIXME: Remove this useless math from the final patch */ 654 681 pgbase += bytes; 655 682 pgbase &= ~PAGE_MASK; 683 + BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); 656 684 657 685 count -= bytes; 658 686 } while (count != 0);
+13 -11
fs/nfs/read.c
··· 43 43 44 44 #define MIN_POOL_READ (32) 45 45 46 - struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 46 + struct nfs_read_data *nfs_readdata_alloc(size_t len) 47 47 { 48 + unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 48 49 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); 49 50 50 51 if (p) { 51 52 memset(p, 0, sizeof(*p)); 52 53 INIT_LIST_HEAD(&p->pages); 54 + p->npages = pagecount; 53 55 if (pagecount <= ARRAY_SIZE(p->page_array)) 54 56 p->pagevec = p->page_array; 55 57 else { ··· 142 140 int result; 143 141 struct nfs_read_data *rdata; 144 142 145 - rdata = nfs_readdata_alloc(1); 143 + rdata = nfs_readdata_alloc(count); 146 144 if (!rdata) 147 145 return -ENOMEM; 148 146 ··· 338 336 struct nfs_page *req = nfs_list_entry(head->next); 339 337 struct page *page = req->wb_page; 340 338 struct nfs_read_data *data; 341 - unsigned int rsize = NFS_SERVER(inode)->rsize; 342 - unsigned int nbytes, offset; 339 + size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 340 + unsigned int offset; 343 341 int requests = 0; 344 342 LIST_HEAD(list); 345 343 346 344 nfs_list_remove_request(req); 347 345 348 346 nbytes = req->wb_bytes; 349 - for(;;) { 350 - data = nfs_readdata_alloc(1); 347 + do { 348 + size_t len = min(nbytes,rsize); 349 + 350 + data = nfs_readdata_alloc(len); 351 351 if (!data) 352 352 goto out_bad; 353 353 INIT_LIST_HEAD(&data->pages); 354 354 list_add(&data->pages, &list); 355 355 requests++; 356 - if (nbytes <= rsize) 357 - break; 358 - nbytes -= rsize; 359 - } 356 + nbytes -= len; 357 + } while(nbytes != 0); 360 358 atomic_set(&req->wb_complete, requests); 361 359 362 360 ClearPageError(page); ··· 404 402 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 405 403 return nfs_pagein_multi(head, inode); 406 404 407 - data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); 405 + data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize); 408 406 if (!data) 409 407 goto out_bad; 410 408
+15 -22
fs/nfs/write.c
··· 90 90 91 91 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); 92 92 93 - struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) 93 + struct nfs_write_data *nfs_commit_alloc(void) 94 94 { 95 95 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); 96 96 97 97 if (p) { 98 98 memset(p, 0, sizeof(*p)); 99 99 INIT_LIST_HEAD(&p->pages); 100 - if (pagecount <= ARRAY_SIZE(p->page_array)) 101 - p->pagevec = p->page_array; 102 - else { 103 - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 104 - if (!p->pagevec) { 105 - mempool_free(p, nfs_commit_mempool); 106 - p = NULL; 107 - } 108 - } 109 100 } 110 101 return p; 111 102 } ··· 108 117 mempool_free(p, nfs_commit_mempool); 109 118 } 110 119 111 - struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 120 + struct nfs_write_data *nfs_writedata_alloc(size_t len) 112 121 { 122 + unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 113 123 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); 114 124 115 125 if (p) { 116 126 memset(p, 0, sizeof(*p)); 117 127 INIT_LIST_HEAD(&p->pages); 128 + p->npages = pagecount; 118 129 if (pagecount <= ARRAY_SIZE(p->page_array)) 119 130 p->pagevec = p->page_array; 120 131 else { ··· 201 208 int result, written = 0; 202 209 struct nfs_write_data *wdata; 203 210 204 - wdata = nfs_writedata_alloc(1); 211 + wdata = nfs_writedata_alloc(wsize); 205 212 if (!wdata) 206 213 return -ENOMEM; 207 214 ··· 992 999 struct nfs_page *req = nfs_list_entry(head->next); 993 1000 struct page *page = req->wb_page; 994 1001 struct nfs_write_data *data; 995 - unsigned int wsize = NFS_SERVER(inode)->wsize; 996 - unsigned int nbytes, offset; 1002 + size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 1003 + unsigned int offset; 997 1004 int requests = 0; 998 1005 LIST_HEAD(list); 999 1006 1000 1007 nfs_list_remove_request(req); 1001 1008 1002 1009 nbytes = req->wb_bytes; 1003 - for (;;) { 1004 - data = nfs_writedata_alloc(1); 1010 + do { 1011 + size_t len = min(nbytes, wsize); 1012 + 1013 + data = nfs_writedata_alloc(len); 1005 1014 if (!data) 1006 1015 goto out_bad; 1007 1016 list_add(&data->pages, &list); 1008 1017 requests++; 1009 - if (nbytes <= wsize) 1010 - break; 1011 - nbytes -= wsize; 1012 - } 1018 + nbytes -= len; 1019 + } while (nbytes != 0); 1013 1020 atomic_set(&req->wb_complete, requests); 1014 1021 1015 1022 ClearPageError(page); ··· 1063 1070 struct nfs_write_data *data; 1064 1071 unsigned int count; 1065 1072 1066 - data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); 1073 + data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize); 1067 1074 if (!data) 1068 1075 goto out_bad; 1069 1076 ··· 1371 1378 struct nfs_write_data *data; 1372 1379 struct nfs_page *req; 1373 1380 1374 - data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); 1381 + data = nfs_commit_alloc(); 1375 1382 1376 1383 if (!data) 1377 1384 goto out_bad;
+3 -3
include/linux/nfs_fs.h
··· 427 427 extern void nfs_writedata_release(void *); 428 428 429 429 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 430 - struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount); 430 + struct nfs_write_data *nfs_commit_alloc(void); 431 431 void nfs_commit_free(struct nfs_write_data *p); 432 432 #endif 433 433 ··· 478 478 /* 479 479 * Allocate nfs_write_data structures 480 480 */ 481 - extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount); 481 + extern struct nfs_write_data *nfs_writedata_alloc(size_t len); 482 482 483 483 /* 484 484 * linux/fs/nfs/read.c ··· 492 492 /* 493 493 * Allocate nfs_read_data structures 494 494 */ 495 - extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount); 495 + extern struct nfs_read_data *nfs_readdata_alloc(size_t len); 496 496 497 497 /* 498 498 * linux/fs/nfs3proc.c
+2 -2
include/linux/nfs_xdr.h
··· 729 729 struct list_head pages; /* Coalesced read requests */ 730 730 struct nfs_page *req; /* multi ops per nfs_page */ 731 731 struct page **pagevec; 732 - unsigned int npages; /* active pages in pagevec */ 732 + unsigned int npages; /* Max length of pagevec */ 733 733 struct nfs_readargs args; 734 734 struct nfs_readres res; 735 735 #ifdef CONFIG_NFS_V4 ··· 748 748 struct list_head pages; /* Coalesced requests we wish to flush */ 749 749 struct nfs_page *req; /* multi ops per nfs_page */ 750 750 struct page **pagevec; 751 - unsigned int npages; /* active pages in pagevec */ 751 + unsigned int npages; /* Max length of pagevec */ 752 752 struct nfs_writeargs args; /* argument struct */ 753 753 struct nfs_writeres res; /* result struct */ 754 754 #ifdef CONFIG_NFS_V4