[PATCH] NFS: large non-page-aligned direct I/O clobbers memory

The logic in nfs_direct_read_schedule and nfs_direct_write_schedule can
allow data->npages to be one larger than rpages. This causes a page
pointer to be written beyond the end of the pagevec in nfs_read_data (or
nfs_write_data).

Fix this by making nfs_(read|write)_alloc() calculate the size of the
pagevec array, and initialise data->npages.

Also get rid of the redundant argument to nfs_commit_alloc().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Trond Myklebust and committed by Linus Torvalds e9f7bee1 016eb4a0

+47 -74
+14 -36
fs/nfs/direct.c
··· 100 return atomic_dec_and_test(&dreq->io_count); 101 } 102 103 - /* 104 - * "size" is never larger than rsize or wsize. 105 - */ 106 - static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) 107 - { 108 - int page_count; 109 - 110 - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; 111 - page_count -= user_addr >> PAGE_SHIFT; 112 - BUG_ON(page_count < 0); 113 - 114 - return page_count; 115 - } 116 - 117 - static inline unsigned int nfs_max_pages(unsigned int size) 118 - { 119 - return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 120 - } 121 - 122 /** 123 * nfs_direct_IO - NFS address space operation for direct I/O 124 * @rw: direction (read or write) ··· 257 struct nfs_open_context *ctx = dreq->ctx; 258 struct inode *inode = ctx->dentry->d_inode; 259 size_t rsize = NFS_SERVER(inode)->rsize; 260 - unsigned int rpages = nfs_max_pages(rsize); 261 unsigned int pgbase; 262 int result; 263 ssize_t started = 0; 264 265 get_dreq(dreq); 266 267 - pgbase = user_addr & ~PAGE_MASK; 268 do { 269 struct nfs_read_data *data; 270 size_t bytes; 271 272 result = -ENOMEM; 273 - data = nfs_readdata_alloc(rpages); 274 if (unlikely(!data)) 275 break; 276 277 - bytes = rsize; 278 - if (count < rsize) 279 - bytes = count; 280 - 281 - data->npages = nfs_direct_count_pages(user_addr, bytes); 282 down_read(&current->mm->mmap_sem); 283 result = get_user_pages(current, current->mm, user_addr, 284 data->npages, 1, 0, data->pagevec, NULL); ··· 321 started += bytes; 322 user_addr += bytes; 323 pos += bytes; 324 pgbase += bytes; 325 pgbase &= ~PAGE_MASK; 326 327 count -= bytes; 328 } while (count != 0); ··· 503 504 static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 505 { 506 - dreq->commit_data = nfs_commit_alloc(0); 507 if (dreq->commit_data != NULL) 508 dreq->commit_data->req = (struct nfs_page *) dreq; 509 } ··· 584 struct nfs_open_context *ctx = dreq->ctx; 585 struct inode *inode = ctx->dentry->d_inode; 586 size_t wsize = NFS_SERVER(inode)->wsize; 587 - unsigned int wpages = nfs_max_pages(wsize); 588 unsigned int pgbase; 589 int result; 590 ssize_t started = 0; 591 592 get_dreq(dreq); 593 594 - pgbase = user_addr & ~PAGE_MASK; 595 do { 596 struct nfs_write_data *data; 597 size_t bytes; 598 599 result = -ENOMEM; 600 - data = nfs_writedata_alloc(wpages); 601 if (unlikely(!data)) 602 break; 603 604 - bytes = wsize; 605 - if (count < wsize) 606 - bytes = count; 607 - 608 - data->npages = nfs_direct_count_pages(user_addr, bytes); 609 down_read(&current->mm->mmap_sem); 610 result = get_user_pages(current, current->mm, user_addr, 611 data->npages, 0, 0, data->pagevec, NULL); ··· 651 started += bytes; 652 user_addr += bytes; 653 pos += bytes; 654 pgbase += bytes; 655 pgbase &= ~PAGE_MASK; 656 657 count -= bytes; 658 } while (count != 0);
··· 100 return atomic_dec_and_test(&dreq->io_count); 101 } 102 103 /** 104 * nfs_direct_IO - NFS address space operation for direct I/O 105 * @rw: direction (read or write) ··· 276 struct nfs_open_context *ctx = dreq->ctx; 277 struct inode *inode = ctx->dentry->d_inode; 278 size_t rsize = NFS_SERVER(inode)->rsize; 279 unsigned int pgbase; 280 int result; 281 ssize_t started = 0; 282 283 get_dreq(dreq); 284 285 do { 286 struct nfs_read_data *data; 287 size_t bytes; 288 289 + pgbase = user_addr & ~PAGE_MASK; 290 + bytes = min(rsize,count); 291 + 292 result = -ENOMEM; 293 + data = nfs_readdata_alloc(pgbase + bytes); 294 if (unlikely(!data)) 295 break; 296 297 down_read(&current->mm->mmap_sem); 298 result = get_user_pages(current, current->mm, user_addr, 299 data->npages, 1, 0, data->pagevec, NULL); ··· 344 started += bytes; 345 user_addr += bytes; 346 pos += bytes; 347 + /* FIXME: Remove this unnecessary math from final patch */ 348 pgbase += bytes; 349 pgbase &= ~PAGE_MASK; 350 + BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); 351 352 count -= bytes; 353 } while (count != 0); ··· 524 525 static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 526 { 527 + dreq->commit_data = nfs_commit_alloc(); 528 if (dreq->commit_data != NULL) 529 dreq->commit_data->req = (struct nfs_page *) dreq; 530 } ··· 605 struct nfs_open_context *ctx = dreq->ctx; 606 struct inode *inode = ctx->dentry->d_inode; 607 size_t wsize = NFS_SERVER(inode)->wsize; 608 unsigned int pgbase; 609 int result; 610 ssize_t started = 0; 611 612 get_dreq(dreq); 613 614 do { 615 struct nfs_write_data *data; 616 size_t bytes; 617 618 + pgbase = user_addr & ~PAGE_MASK; 619 + bytes = min(wsize,count); 620 + 621 result = -ENOMEM; 622 + data = nfs_writedata_alloc(pgbase + bytes); 623 if (unlikely(!data)) 624 break; 625 626 down_read(&current->mm->mmap_sem); 627 result = get_user_pages(current, current->mm, user_addr, 628 data->npages, 0, 0, data->pagevec, NULL); ··· 676 started += bytes; 677 user_addr += bytes; 678 pos += bytes; 679 + 680 + /* FIXME: Remove this useless math from the final patch */ 681 pgbase += bytes; 682 pgbase &= ~PAGE_MASK; 683 + BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); 684 685 count -= bytes; 686 } while (count != 0);
+13 -11
fs/nfs/read.c
··· 43 44 #define MIN_POOL_READ (32) 45 46 - struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 47 { 48 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); 49 50 if (p) { 51 memset(p, 0, sizeof(*p)); 52 INIT_LIST_HEAD(&p->pages); 53 if (pagecount <= ARRAY_SIZE(p->page_array)) 54 p->pagevec = p->page_array; 55 else { ··· 142 int result; 143 struct nfs_read_data *rdata; 144 145 - rdata = nfs_readdata_alloc(1); 146 if (!rdata) 147 return -ENOMEM; 148 ··· 338 struct nfs_page *req = nfs_list_entry(head->next); 339 struct page *page = req->wb_page; 340 struct nfs_read_data *data; 341 - unsigned int rsize = NFS_SERVER(inode)->rsize; 342 - unsigned int nbytes, offset; 343 int requests = 0; 344 LIST_HEAD(list); 345 346 nfs_list_remove_request(req); 347 348 nbytes = req->wb_bytes; 349 - for(;;) { 350 - data = nfs_readdata_alloc(1); 351 if (!data) 352 goto out_bad; 353 INIT_LIST_HEAD(&data->pages); 354 list_add(&data->pages, &list); 355 requests++; 356 - if (nbytes <= rsize) 357 - break; 358 - nbytes -= rsize; 359 - } 360 atomic_set(&req->wb_complete, requests); 361 362 ClearPageError(page); ··· 404 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 405 return nfs_pagein_multi(head, inode); 406 407 - data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); 408 if (!data) 409 goto out_bad; 410
··· 43 44 #define MIN_POOL_READ (32) 45 46 + struct nfs_read_data *nfs_readdata_alloc(size_t len) 47 { 48 + unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 49 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); 50 51 if (p) { 52 memset(p, 0, sizeof(*p)); 53 INIT_LIST_HEAD(&p->pages); 54 + p->npages = pagecount; 55 if (pagecount <= ARRAY_SIZE(p->page_array)) 56 p->pagevec = p->page_array; 57 else { ··· 140 int result; 141 struct nfs_read_data *rdata; 142 143 + rdata = nfs_readdata_alloc(count); 144 if (!rdata) 145 return -ENOMEM; 146 ··· 336 struct nfs_page *req = nfs_list_entry(head->next); 337 struct page *page = req->wb_page; 338 struct nfs_read_data *data; 339 + size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 340 + unsigned int offset; 341 int requests = 0; 342 LIST_HEAD(list); 343 344 nfs_list_remove_request(req); 345 346 nbytes = req->wb_bytes; 347 + do { 348 + size_t len = min(nbytes,rsize); 349 + 350 + data = nfs_readdata_alloc(len); 351 if (!data) 352 goto out_bad; 353 INIT_LIST_HEAD(&data->pages); 354 list_add(&data->pages, &list); 355 requests++; 356 + nbytes -= len; 357 + } while(nbytes != 0); 358 atomic_set(&req->wb_complete, requests); 359 360 ClearPageError(page); ··· 402 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 403 return nfs_pagein_multi(head, inode); 404 405 + data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize); 406 if (!data) 407 goto out_bad; 408
+15 -22
fs/nfs/write.c
··· 90 91 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); 92 93 - struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) 94 { 95 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); 96 97 if (p) { 98 memset(p, 0, sizeof(*p)); 99 INIT_LIST_HEAD(&p->pages); 100 - if (pagecount <= ARRAY_SIZE(p->page_array)) 101 - p->pagevec = p->page_array; 102 - else { 103 - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 104 - if (!p->pagevec) { 105 - mempool_free(p, nfs_commit_mempool); 106 - p = NULL; 107 - } 108 - } 109 } 110 return p; 111 } ··· 108 mempool_free(p, nfs_commit_mempool); 109 } 110 111 - struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 112 { 113 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); 114 115 if (p) { 116 memset(p, 0, sizeof(*p)); 117 INIT_LIST_HEAD(&p->pages); 118 if (pagecount <= ARRAY_SIZE(p->page_array)) 119 p->pagevec = p->page_array; 120 else { ··· 201 int result, written = 0; 202 struct nfs_write_data *wdata; 203 204 - wdata = nfs_writedata_alloc(1); 205 if (!wdata) 206 return -ENOMEM; 207 ··· 992 struct nfs_page *req = nfs_list_entry(head->next); 993 struct page *page = req->wb_page; 994 struct nfs_write_data *data; 995 - unsigned int wsize = NFS_SERVER(inode)->wsize; 996 - unsigned int nbytes, offset; 997 int requests = 0; 998 LIST_HEAD(list); 999 1000 nfs_list_remove_request(req); 1001 1002 nbytes = req->wb_bytes; 1003 - for (;;) { 1004 - data = nfs_writedata_alloc(1); 1005 if (!data) 1006 goto out_bad; 1007 list_add(&data->pages, &list); 1008 requests++; 1009 - if (nbytes <= wsize) 1010 - break; 1011 - nbytes -= wsize; 1012 - } 1013 atomic_set(&req->wb_complete, requests); 1014 1015 ClearPageError(page); ··· 1063 struct nfs_write_data *data; 1064 unsigned int count; 1065 1066 - data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); 1067 if (!data) 1068 goto out_bad; 1069 ··· 1371 struct nfs_write_data *data; 1372 struct nfs_page *req; 1373 1374 - data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); 1375 1376 if (!data) 1377 goto out_bad;
··· 90 91 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); 92 93 + struct nfs_write_data *nfs_commit_alloc(void) 94 { 95 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); 96 97 if (p) { 98 memset(p, 0, sizeof(*p)); 99 INIT_LIST_HEAD(&p->pages); 100 } 101 return p; 102 } ··· 117 mempool_free(p, nfs_commit_mempool); 118 } 119 120 + struct nfs_write_data *nfs_writedata_alloc(size_t len) 121 { 122 + unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 123 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); 124 125 if (p) { 126 memset(p, 0, sizeof(*p)); 127 INIT_LIST_HEAD(&p->pages); 128 + p->npages = pagecount; 129 if (pagecount <= ARRAY_SIZE(p->page_array)) 130 p->pagevec = p->page_array; 131 else { ··· 208 int result, written = 0; 209 struct nfs_write_data *wdata; 210 211 + wdata = nfs_writedata_alloc(wsize); 212 if (!wdata) 213 return -ENOMEM; 214 ··· 999 struct nfs_page *req = nfs_list_entry(head->next); 1000 struct page *page = req->wb_page; 1001 struct nfs_write_data *data; 1002 + size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 1003 + unsigned int offset; 1004 int requests = 0; 1005 LIST_HEAD(list); 1006 1007 nfs_list_remove_request(req); 1008 1009 nbytes = req->wb_bytes; 1010 + do { 1011 + size_t len = min(nbytes, wsize); 1012 + 1013 + data = nfs_writedata_alloc(len); 1014 if (!data) 1015 goto out_bad; 1016 list_add(&data->pages, &list); 1017 requests++; 1018 + nbytes -= len; 1019 + } while (nbytes != 0); 1020 atomic_set(&req->wb_complete, requests); 1021 1022 ClearPageError(page); ··· 1070 struct nfs_write_data *data; 1071 unsigned int count; 1072 1073 + data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize); 1074 if (!data) 1075 goto out_bad; 1076 ··· 1378 struct nfs_write_data *data; 1379 struct nfs_page *req; 1380 1381 + data = nfs_commit_alloc(); 1382 1383 if (!data) 1384 goto out_bad;
+3 -3
include/linux/nfs_fs.h
··· 427 extern void nfs_writedata_release(void *); 428 429 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 430 - struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount); 431 void nfs_commit_free(struct nfs_write_data *p); 432 #endif 433 ··· 478 /* 479 * Allocate nfs_write_data structures 480 */ 481 - extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount); 482 483 /* 484 * linux/fs/nfs/read.c ··· 492 /* 493 * Allocate nfs_read_data structures 494 */ 495 - extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount); 496 497 /* 498 * linux/fs/nfs3proc.c
··· 427 extern void nfs_writedata_release(void *); 428 429 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 430 + struct nfs_write_data *nfs_commit_alloc(void); 431 void nfs_commit_free(struct nfs_write_data *p); 432 #endif 433 ··· 478 /* 479 * Allocate nfs_write_data structures 480 */ 481 + extern struct nfs_write_data *nfs_writedata_alloc(size_t len); 482 483 /* 484 * linux/fs/nfs/read.c ··· 492 /* 493 * Allocate nfs_read_data structures 494 */ 495 + extern struct nfs_read_data *nfs_readdata_alloc(size_t len); 496 497 /* 498 * linux/fs/nfs3proc.c
+2 -2
include/linux/nfs_xdr.h
··· 729 struct list_head pages; /* Coalesced read requests */ 730 struct nfs_page *req; /* multi ops per nfs_page */ 731 struct page **pagevec; 732 - unsigned int npages; /* active pages in pagevec */ 733 struct nfs_readargs args; 734 struct nfs_readres res; 735 #ifdef CONFIG_NFS_V4 ··· 748 struct list_head pages; /* Coalesced requests we wish to flush */ 749 struct nfs_page *req; /* multi ops per nfs_page */ 750 struct page **pagevec; 751 - unsigned int npages; /* active pages in pagevec */ 752 struct nfs_writeargs args; /* argument struct */ 753 struct nfs_writeres res; /* result struct */ 754 #ifdef CONFIG_NFS_V4
··· 729 struct list_head pages; /* Coalesced read requests */ 730 struct nfs_page *req; /* multi ops per nfs_page */ 731 struct page **pagevec; 732 + unsigned int npages; /* Max length of pagevec */ 733 struct nfs_readargs args; 734 struct nfs_readres res; 735 #ifdef CONFIG_NFS_V4 ··· 748 struct list_head pages; /* Coalesced requests we wish to flush */ 749 struct nfs_page *req; /* multi ops per nfs_page */ 750 struct page **pagevec; 751 + unsigned int npages; /* Max length of pagevec */ 752 struct nfs_writeargs args; /* argument struct */ 753 struct nfs_writeres res; /* result struct */ 754 #ifdef CONFIG_NFS_V4