Revert "Merge branch 'odirect'"

This reverts ccf01ef7aa9c6c293a1c64c27331a2ce227916ec commit.

No idea how git managed this one: when I asked it to merge the odirect
topic branch it actually generated a patch which reverted the change.

Reverting the 'merge' will once again reveal Chuck's recent NFS/O_DIRECT
work to the world.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

+203 -234
+201 -234
fs/nfs/direct.c
··· 68 struct kref kref; /* release manager */ 69 70 /* I/O parameters */ 71 - struct list_head list, /* nfs_read/write_data structs */ 72 - rewrite_list; /* saved nfs_write_data structs */ 73 struct nfs_open_context *ctx; /* file open context info */ 74 struct kiocb * iocb; /* controlling i/o request */ 75 struct inode * inode; /* target file of i/o */ 76 - unsigned long user_addr; /* location of user's buffer */ 77 - size_t user_count; /* total bytes to move */ 78 - loff_t pos; /* starting offset in file */ 79 - struct page ** pages; /* pages in our buffer */ 80 - unsigned int npages; /* count of pages */ 81 82 /* completion state */ 83 spinlock_t lock; /* protect completion state */ 84 - int outstanding; /* i/os we're waiting for */ 85 ssize_t count, /* bytes actually processed */ 86 error; /* any reported error */ 87 struct completion completion; /* wait for i/o completion */ 88 89 /* commit state */ 90 struct nfs_write_data * commit_data; /* special write_data for commits */ 91 int flags; 92 #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ ··· 88 struct nfs_writeverf verf; /* unstable write verifier */ 89 }; 90 91 - static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); 92 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); 93 94 /** 95 * nfs_direct_IO - NFS address space operation for direct I/O ··· 142 return -EINVAL; 143 } 144 145 - static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) 146 { 147 int i; 148 for (i = 0; i < npages; i++) { 149 struct page *page = pages[i]; 150 - if (do_dirty && !PageCompound(page)) 151 set_page_dirty_lock(page); 152 - page_cache_release(page); 153 } 154 - kfree(pages); 155 } 156 157 - static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) 158 { 159 - int result = -ENOMEM; 160 - unsigned long page_count; 161 - size_t array_size; 162 - 163 - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; 164 - page_count -= user_addr >> PAGE_SHIFT; 165 - 166 - array_size = (page_count * sizeof(struct page *)); 167 - *pages = kmalloc(array_size, GFP_KERNEL); 168 - if (*pages) { 169 - down_read(&current->mm->mmap_sem); 170 - result = get_user_pages(current, current->mm, user_addr, 171 - page_count, (rw == READ), 0, 172 - *pages, NULL); 173 - up_read(&current->mm->mmap_sem); 174 - if (result != page_count) { 175 - /* 176 - * If we got fewer pages than expected from 177 - * get_user_pages(), the user buffer runs off the 178 - * end of a mapping; return EFAULT. 179 - */ 180 - if (result >= 0) { 181 - nfs_free_user_pages(*pages, result, 0); 182 - result = -EFAULT; 183 - } else 184 - kfree(*pages); 185 - *pages = NULL; 186 - } 187 - } 188 - return result; 189 } 190 191 static inline struct nfs_direct_req *nfs_direct_req_alloc(void) ··· 168 return NULL; 169 170 kref_init(&dreq->kref); 171 init_completion(&dreq->completion); 172 - INIT_LIST_HEAD(&dreq->list); 173 INIT_LIST_HEAD(&dreq->rewrite_list); 174 dreq->iocb = NULL; 175 dreq->ctx = NULL; 176 spin_lock_init(&dreq->lock); 177 - dreq->outstanding = 0; 178 dreq->count = 0; 179 dreq->error = 0; 180 dreq->flags = 0; ··· 215 } 216 217 /* 218 - * We must hold a reference to all the pages in this direct read request 219 - * until the RPCs complete. This could be long *after* we are woken up in 220 - * nfs_direct_wait (for instance, if someone hits ^C on a slow server). 221 - * 222 - * In addition, synchronous I/O uses a stack-allocated iocb. Thus we 223 - * can't trust the iocb is still valid here if this is a synchronous 224 - * request. If the waiter is woken prematurely, the iocb is long gone. 225 */ 226 static void nfs_direct_complete(struct nfs_direct_req *dreq) 227 { 228 - nfs_free_user_pages(dreq->pages, dreq->npages, 1); 229 - 230 if (dreq->iocb) { 231 long res = (long) dreq->error; 232 if (!res) ··· 232 } 233 234 /* 235 - * Note we also set the number of requests we have in the dreq when we are 236 - * done. This prevents races with I/O completion so we will always wait 237 - * until all requests have been dispatched and completed. 238 */ 239 - static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) 240 - { 241 - struct list_head *list; 242 - struct nfs_direct_req *dreq; 243 - unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 244 - 245 - dreq = nfs_direct_req_alloc(); 246 - if (!dreq) 247 - return NULL; 248 - 249 - list = &dreq->list; 250 - for(;;) { 251 - struct nfs_read_data *data = nfs_readdata_alloc(rpages); 252 - 253 - if (unlikely(!data)) { 254 - while (!list_empty(list)) { 255 - data = list_entry(list->next, 256 - struct nfs_read_data, pages); 257 - list_del(&data->pages); 258 - nfs_readdata_free(data); 259 - } 260 - kref_put(&dreq->kref, nfs_direct_req_release); 261 - return NULL; 262 - } 263 - 264 - INIT_LIST_HEAD(&data->pages); 265 - list_add(&data->pages, list); 266 - 267 - data->req = (struct nfs_page *) dreq; 268 - dreq->outstanding++; 269 - if (nbytes <= rsize) 270 - break; 271 - nbytes -= rsize; 272 - } 273 - kref_get(&dreq->kref); 274 - return dreq; 275 - } 276 - 277 static void nfs_direct_read_result(struct rpc_task *task, void *calldata) 278 { 279 struct nfs_read_data *data = calldata; ··· 244 if (nfs_readpage_result(task, data) != 0) 245 return; 246 247 spin_lock(&dreq->lock); 248 249 if (likely(task->tk_status >= 0)) ··· 254 else 255 dreq->error = task->tk_status; 256 257 - if (--dreq->outstanding) { 258 - spin_unlock(&dreq->lock); 259 - return; 260 - } 261 - 262 spin_unlock(&dreq->lock); 263 - nfs_direct_complete(dreq); 264 } 265 266 static const struct rpc_call_ops nfs_read_direct_ops = { ··· 266 }; 267 268 /* 269 - * For each nfs_read_data struct that was allocated on the list, dispatch 270 - * an NFS READ operation 271 */ 272 - static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) 273 { 274 struct nfs_open_context *ctx = dreq->ctx; 275 struct inode *inode = ctx->dentry->d_inode; 276 - struct list_head *list = &dreq->list; 277 - struct page **pages = dreq->pages; 278 - size_t count = dreq->user_count; 279 - loff_t pos = dreq->pos; 280 size_t rsize = NFS_SERVER(inode)->rsize; 281 - unsigned int curpage, pgbase; 282 283 - curpage = 0; 284 - pgbase = dreq->user_addr & ~PAGE_MASK; 285 do { 286 struct nfs_read_data *data; 287 size_t bytes; 288 289 bytes = rsize; 290 if (count < rsize) 291 bytes = count; 292 293 - BUG_ON(list_empty(list)); 294 - data = list_entry(list->next, struct nfs_read_data, pages); 295 - list_del_init(&data->pages); 296 297 data->inode = inode; 298 data->cred = ctx->cred; 299 data->args.fh = NFS_FH(inode); 300 data->args.context = ctx; 301 data->args.offset = pos; 302 data->args.pgbase = pgbase; 303 - data->args.pages = &pages[curpage]; 304 data->args.count = bytes; 305 data->res.fattr = &data->fattr; 306 data->res.eof = 0; ··· 342 bytes, 343 (unsigned long long)data->args.offset); 344 345 pos += bytes; 346 pgbase += bytes; 347 - curpage += pgbase >> PAGE_SHIFT; 348 pgbase &= ~PAGE_MASK; 349 350 count -= bytes; 351 } while (count != 0); 352 - BUG_ON(!list_empty(list)); 353 } 354 355 - static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) 356 { 357 - ssize_t result; 358 sigset_t oldset; 359 struct inode *inode = iocb->ki_filp->f_mapping->host; 360 struct rpc_clnt *clnt = NFS_CLIENT(inode); 361 struct nfs_direct_req *dreq; 362 363 - dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); 364 if (!dreq) 365 return -ENOMEM; 366 367 - dreq->user_addr = user_addr; 368 - dreq->user_count = count; 369 - dreq->pos = pos; 370 - dreq->pages = pages; 371 - dreq->npages = nr_pages; 372 dreq->inode = inode; 373 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); 374 if (!is_sync_kiocb(iocb)) ··· 378 379 nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); 380 rpc_clnt_sigmask(clnt, &oldset); 381 - nfs_direct_read_schedule(dreq); 382 - result = nfs_direct_wait(dreq); 383 rpc_clnt_sigunmask(clnt, &oldset); 384 385 return result; ··· 388 389 static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) 390 { 391 - list_splice_init(&dreq->rewrite_list, &dreq->list); 392 - while (!list_empty(&dreq->list)) { 393 - struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); 394 list_del(&data->pages); 395 nfs_writedata_release(data); 396 } 397 } ··· 399 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 400 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 401 { 402 - struct list_head *pos; 403 404 - list_splice_init(&dreq->rewrite_list, &dreq->list); 405 - list_for_each(pos, &dreq->list) 406 - dreq->outstanding++; 407 dreq->count = 0; 408 409 - nfs_direct_write_schedule(dreq, FLUSH_STABLE); 410 } 411 412 static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) ··· 480 data->cred = dreq->ctx->cred; 481 482 data->args.fh = NFS_FH(data->inode); 483 - data->args.offset = dreq->pos; 484 - data->args.count = dreq->user_count; 485 data->res.count = 0; 486 data->res.fattr = &data->fattr; 487 data->res.verf = &data->verf; ··· 543 } 544 #endif 545 546 - static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) 547 - { 548 - struct list_head *list; 549 - struct nfs_direct_req *dreq; 550 - unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 551 - 552 - dreq = nfs_direct_req_alloc(); 553 - if (!dreq) 554 - return NULL; 555 - 556 - list = &dreq->list; 557 - for(;;) { 558 - struct nfs_write_data *data = nfs_writedata_alloc(wpages); 559 - 560 - if (unlikely(!data)) { 561 - while (!list_empty(list)) { 562 - data = list_entry(list->next, 563 - struct nfs_write_data, pages); 564 - list_del(&data->pages); 565 - nfs_writedata_free(data); 566 - } 567 - kref_put(&dreq->kref, nfs_direct_req_release); 568 - return NULL; 569 - } 570 - 571 - INIT_LIST_HEAD(&data->pages); 572 - list_add(&data->pages, list); 573 - 574 - data->req = (struct nfs_page *) dreq; 575 - dreq->outstanding++; 576 - if (nbytes <= wsize) 577 - break; 578 - nbytes -= wsize; 579 - } 580 - 581 - nfs_alloc_commit_data(dreq); 582 - 583 - kref_get(&dreq->kref); 584 - return dreq; 585 - } 586 - 587 static void nfs_direct_write_result(struct rpc_task *task, void *calldata) 588 { 589 struct nfs_write_data *data = calldata; ··· 572 } 573 } 574 } 575 - /* In case we have to resend */ 576 - data->args.stable = NFS_FILE_SYNC; 577 578 spin_unlock(&dreq->lock); 579 } ··· 585 struct nfs_write_data *data = calldata; 586 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 587 588 - spin_lock(&dreq->lock); 589 - if (--dreq->outstanding) { 590 - spin_unlock(&dreq->lock); 591 - return; 592 - } 593 - spin_unlock(&dreq->lock); 594 - 595 - nfs_direct_write_complete(dreq, data->inode); 596 } 597 598 static const struct rpc_call_ops nfs_write_direct_ops = { ··· 595 }; 596 597 /* 598 - * For each nfs_write_data struct that was allocated on the list, dispatch 599 - * an NFS WRITE operation 600 */ 601 - static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) 602 { 603 struct nfs_open_context *ctx = dreq->ctx; 604 struct inode *inode = ctx->dentry->d_inode; 605 - struct list_head *list = &dreq->list; 606 - struct page **pages = dreq->pages; 607 - size_t count = dreq->user_count; 608 - loff_t pos = dreq->pos; 609 size_t wsize = NFS_SERVER(inode)->wsize; 610 - unsigned int curpage, pgbase; 611 612 - curpage = 0; 613 - pgbase = dreq->user_addr & ~PAGE_MASK; 614 do { 615 struct nfs_write_data *data; 616 size_t bytes; 617 618 bytes = wsize; 619 if (count < wsize) 620 bytes = count; 621 622 - BUG_ON(list_empty(list)); 623 - data = list_entry(list->next, struct nfs_write_data, pages); 624 list_move_tail(&data->pages, &dreq->rewrite_list); 625 626 data->inode = inode; 627 data->cred = ctx->cred; 628 data->args.fh = NFS_FH(inode); 629 data->args.context = ctx; 630 data->args.offset = pos; 631 data->args.pgbase = pgbase; 632 - data->args.pages = &pages[curpage]; 633 data->args.count = bytes; 634 data->res.fattr = &data->fattr; 635 data->res.count = bytes; ··· 674 bytes, 675 (unsigned long long)data->args.offset); 676 677 pos += bytes; 678 pgbase += bytes; 679 - curpage += pgbase >> PAGE_SHIFT; 680 pgbase &= ~PAGE_MASK; 681 682 count -= bytes; 683 } while (count != 0); 684 - BUG_ON(!list_empty(list)); 685 } 686 687 - static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) 688 { 689 - ssize_t result; 690 sigset_t oldset; 691 struct inode *inode = iocb->ki_filp->f_mapping->host; 692 struct rpc_clnt *clnt = NFS_CLIENT(inode); ··· 701 size_t wsize = NFS_SERVER(inode)->wsize; 702 int sync = 0; 703 704 - dreq = nfs_direct_write_alloc(count, wsize); 705 if (!dreq) 706 return -ENOMEM; 707 if (dreq->commit_data == NULL || count < wsize) 708 sync = FLUSH_STABLE; 709 710 - dreq->user_addr = user_addr; 711 - dreq->user_count = count; 712 - dreq->pos = pos; 713 - dreq->pages = pages; 714 - dreq->npages = nr_pages; 715 dreq->inode = inode; 716 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); 717 if (!is_sync_kiocb(iocb)) ··· 719 nfs_begin_data_update(inode); 720 721 rpc_clnt_sigmask(clnt, &oldset); 722 - nfs_direct_write_schedule(dreq, sync); 723 - result = nfs_direct_wait(dreq); 724 rpc_clnt_sigunmask(clnt, &oldset); 725 726 return result; ··· 751 ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) 752 { 753 ssize_t retval = -EINVAL; 754 - int page_count; 755 - struct page **pages; 756 struct file *file = iocb->ki_filp; 757 struct address_space *mapping = file->f_mapping; 758 ··· 772 if (retval) 773 goto out; 774 775 - retval = nfs_get_user_pages(READ, (unsigned long) buf, 776 - count, &pages); 777 - if (retval < 0) 778 - goto out; 779 - page_count = retval; 780 - 781 - retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, 782 - pages, page_count); 783 if (retval > 0) 784 iocb->ki_pos = pos + retval; 785 ··· 808 ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) 809 { 810 ssize_t retval; 811 - int page_count; 812 - struct page **pages; 813 struct file *file = iocb->ki_filp; 814 struct address_space *mapping = file->f_mapping; 815 ··· 835 if (retval) 836 goto out; 837 838 - retval = nfs_get_user_pages(WRITE, (unsigned long) buf, 839 - count, &pages); 840 - if (retval < 0) 841 - goto out; 842 - page_count = retval; 843 - 844 - retval = nfs_direct_write(iocb, (unsigned long) buf, count, 845 - pos, pages, page_count); 846 847 /* 848 * XXX: nfs_end_data_update() already ensures this file's
··· 68 struct kref kref; /* release manager */ 69 70 /* I/O parameters */ 71 struct nfs_open_context *ctx; /* file open context info */ 72 struct kiocb * iocb; /* controlling i/o request */ 73 struct inode * inode; /* target file of i/o */ 74 75 /* completion state */ 76 + atomic_t io_count; /* i/os we're waiting for */ 77 spinlock_t lock; /* protect completion state */ 78 ssize_t count, /* bytes actually processed */ 79 error; /* any reported error */ 80 struct completion completion; /* wait for i/o completion */ 81 82 /* commit state */ 83 + struct list_head rewrite_list; /* saved nfs_write_data structs */ 84 struct nfs_write_data * commit_data; /* special write_data for commits */ 85 int flags; 86 #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ ··· 94 struct nfs_writeverf verf; /* unstable write verifier */ 95 }; 96 97 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); 98 + static const struct rpc_call_ops nfs_write_direct_ops; 99 + 100 + static inline void get_dreq(struct nfs_direct_req *dreq) 101 + { 102 + atomic_inc(&dreq->io_count); 103 + } 104 + 105 + static inline int put_dreq(struct nfs_direct_req *dreq) 106 + { 107 + return atomic_dec_and_test(&dreq->io_count); 108 + } 109 + 110 + /* 111 + * "size" is never larger than rsize or wsize. 112 + */ 113 + static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) 114 + { 115 + int page_count; 116 + 117 + page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; 118 + page_count -= user_addr >> PAGE_SHIFT; 119 + BUG_ON(page_count < 0); 120 + 121 + return page_count; 122 + } 123 + 124 + static inline unsigned int nfs_max_pages(unsigned int size) 125 + { 126 + return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 127 + } 128 129 /** 130 * nfs_direct_IO - NFS address space operation for direct I/O ··· 119 return -EINVAL; 120 } 121 122 + static void nfs_direct_dirty_pages(struct page **pages, int npages) 123 { 124 int i; 125 for (i = 0; i < npages; i++) { 126 struct page *page = pages[i]; 127 + if (!PageCompound(page)) 128 set_page_dirty_lock(page); 129 } 130 } 131 132 + static void nfs_direct_release_pages(struct page **pages, int npages) 133 { 134 + int i; 135 + for (i = 0; i < npages; i++) 136 + page_cache_release(pages[i]); 137 } 138 139 static inline struct nfs_direct_req *nfs_direct_req_alloc(void) ··· 174 return NULL; 175 176 kref_init(&dreq->kref); 177 + kref_get(&dreq->kref); 178 init_completion(&dreq->completion); 179 INIT_LIST_HEAD(&dreq->rewrite_list); 180 dreq->iocb = NULL; 181 dreq->ctx = NULL; 182 spin_lock_init(&dreq->lock); 183 + atomic_set(&dreq->io_count, 0); 184 dreq->count = 0; 185 dreq->error = 0; 186 dreq->flags = 0; ··· 221 } 222 223 /* 224 + * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust 225 + * the iocb is still valid here if this is a synchronous request. 226 */ 227 static void nfs_direct_complete(struct nfs_direct_req *dreq) 228 { 229 if (dreq->iocb) { 230 long res = (long) dreq->error; 231 if (!res) ··· 245 } 246 247 /* 248 + * We must hold a reference to all the pages in this direct read request 249 + * until the RPCs complete. This could be long *after* we are woken up in 250 + * nfs_direct_wait (for instance, if someone hits ^C on a slow server). 251 */ 252 static void nfs_direct_read_result(struct rpc_task *task, void *calldata) 253 { 254 struct nfs_read_data *data = calldata; ··· 295 if (nfs_readpage_result(task, data) != 0) 296 return; 297 298 + nfs_direct_dirty_pages(data->pagevec, data->npages); 299 + nfs_direct_release_pages(data->pagevec, data->npages); 300 + 301 spin_lock(&dreq->lock); 302 303 if (likely(task->tk_status >= 0)) ··· 302 else 303 dreq->error = task->tk_status; 304 305 spin_unlock(&dreq->lock); 306 + 307 + if (put_dreq(dreq)) 308 + nfs_direct_complete(dreq); 309 } 310 311 static const struct rpc_call_ops nfs_read_direct_ops = { ··· 317 }; 318 319 /* 320 + * For each rsize'd chunk of the user's buffer, dispatch an NFS READ 321 + * operation. If nfs_readdata_alloc() or get_user_pages() fails, 322 + * bail and stop sending more reads. Read length accounting is 323 + * handled automatically by nfs_direct_read_result(). Otherwise, if 324 + * no requests have been sent, just return an error. 325 */ 326 + static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) 327 { 328 struct nfs_open_context *ctx = dreq->ctx; 329 struct inode *inode = ctx->dentry->d_inode; 330 size_t rsize = NFS_SERVER(inode)->rsize; 331 + unsigned int rpages = nfs_max_pages(rsize); 332 + unsigned int pgbase; 333 + int result; 334 + ssize_t started = 0; 335 336 + get_dreq(dreq); 337 + 338 + pgbase = user_addr & ~PAGE_MASK; 339 do { 340 struct nfs_read_data *data; 341 size_t bytes; 342 + 343 + result = -ENOMEM; 344 + data = nfs_readdata_alloc(rpages); 345 + if (unlikely(!data)) 346 + break; 347 348 bytes = rsize; 349 if (count < rsize) 350 bytes = count; 351 352 + data->npages = nfs_direct_count_pages(user_addr, bytes); 353 + down_read(&current->mm->mmap_sem); 354 + result = get_user_pages(current, current->mm, user_addr, 355 + data->npages, 1, 0, data->pagevec, NULL); 356 + up_read(&current->mm->mmap_sem); 357 + if (unlikely(result < data->npages)) { 358 + if (result > 0) 359 + nfs_direct_release_pages(data->pagevec, result); 360 + nfs_readdata_release(data); 361 + break; 362 + } 363 364 + get_dreq(dreq); 365 + 366 + data->req = (struct nfs_page *) dreq; 367 data->inode = inode; 368 data->cred = ctx->cred; 369 data->args.fh = NFS_FH(inode); 370 data->args.context = ctx; 371 data->args.offset = pos; 372 data->args.pgbase = pgbase; 373 + data->args.pages = data->pagevec; 374 data->args.count = bytes; 375 data->res.fattr = &data->fattr; 376 data->res.eof = 0; ··· 374 bytes, 375 (unsigned long long)data->args.offset); 376 377 + started += bytes; 378 + user_addr += bytes; 379 pos += bytes; 380 pgbase += bytes; 381 pgbase &= ~PAGE_MASK; 382 383 count -= bytes; 384 } while (count != 0); 385 + 386 + if (put_dreq(dreq)) 387 + nfs_direct_complete(dreq); 388 + 389 + if (started) 390 + return 0; 391 + return result < 0 ? (ssize_t) result : -EFAULT; 392 } 393 394 + static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) 395 { 396 + ssize_t result = 0; 397 sigset_t oldset; 398 struct inode *inode = iocb->ki_filp->f_mapping->host; 399 struct rpc_clnt *clnt = NFS_CLIENT(inode); 400 struct nfs_direct_req *dreq; 401 402 + dreq = nfs_direct_req_alloc(); 403 if (!dreq) 404 return -ENOMEM; 405 406 dreq->inode = inode; 407 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); 408 if (!is_sync_kiocb(iocb)) ··· 408 409 nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); 410 rpc_clnt_sigmask(clnt, &oldset); 411 + result = nfs_direct_read_schedule(dreq, user_addr, count, pos); 412 + if (!result) 413 + result = nfs_direct_wait(dreq); 414 rpc_clnt_sigunmask(clnt, &oldset); 415 416 return result; ··· 417 418 static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) 419 { 420 + while (!list_empty(&dreq->rewrite_list)) { 421 + struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); 422 list_del(&data->pages); 423 + nfs_direct_release_pages(data->pagevec, data->npages); 424 nfs_writedata_release(data); 425 } 426 } ··· 428 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 429 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 430 { 431 + struct inode *inode = dreq->inode; 432 + struct list_head *p; 433 + struct nfs_write_data *data; 434 435 dreq->count = 0; 436 + get_dreq(dreq); 437 438 + list_for_each(p, &dreq->rewrite_list) { 439 + data = list_entry(p, struct nfs_write_data, pages); 440 + 441 + get_dreq(dreq); 442 + 443 + /* 444 + * Reset data->res. 445 + */ 446 + nfs_fattr_init(&data->fattr); 447 + data->res.count = data->args.count; 448 + memset(&data->verf, 0, sizeof(data->verf)); 449 + 450 + /* 451 + * Reuse data->task; data->args should not have changed 452 + * since the original request was sent. 453 + */ 454 + rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, 455 + &nfs_write_direct_ops, data); 456 + NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); 457 + 458 + data->task.tk_priority = RPC_PRIORITY_NORMAL; 459 + data->task.tk_cookie = (unsigned long) inode; 460 + 461 + /* 462 + * We're called via an RPC callback, so BKL is already held. 463 + */ 464 + rpc_execute(&data->task); 465 + 466 + dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", 467 + data->task.tk_pid, 468 + inode->i_sb->s_id, 469 + (long long)NFS_FILEID(inode), 470 + data->args.count, 471 + (unsigned long long)data->args.offset); 472 + } 473 + 474 + if (put_dreq(dreq)) 475 + nfs_direct_write_complete(dreq, inode); 476 } 477 478 static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) ··· 472 data->cred = dreq->ctx->cred; 473 474 data->args.fh = NFS_FH(data->inode); 475 + data->args.offset = 0; 476 + data->args.count = 0; 477 data->res.count = 0; 478 data->res.fattr = &data->fattr; 479 data->res.verf = &data->verf; ··· 535 } 536 #endif 537 538 static void nfs_direct_write_result(struct rpc_task *task, void *calldata) 539 { 540 struct nfs_write_data *data = calldata; ··· 605 } 606 } 607 } 608 609 spin_unlock(&dreq->lock); 610 } ··· 620 struct nfs_write_data *data = calldata; 621 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 622 623 + if (put_dreq(dreq)) 624 + nfs_direct_write_complete(dreq, data->inode); 625 } 626 627 static const struct rpc_call_ops nfs_write_direct_ops = { ··· 636 }; 637 638 /* 639 + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE 640 + * operation. If nfs_writedata_alloc() or get_user_pages() fails, 641 + * bail and stop sending more writes. Write length accounting is 642 + * handled automatically by nfs_direct_write_result(). Otherwise, if 643 + * no requests have been sent, just return an error. 644 */ 645 + static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) 646 { 647 struct nfs_open_context *ctx = dreq->ctx; 648 struct inode *inode = ctx->dentry->d_inode; 649 size_t wsize = NFS_SERVER(inode)->wsize; 650 + unsigned int wpages = nfs_max_pages(wsize); 651 + unsigned int pgbase; 652 + int result; 653 + ssize_t started = 0; 654 655 + get_dreq(dreq); 656 + 657 + pgbase = user_addr & ~PAGE_MASK; 658 do { 659 struct nfs_write_data *data; 660 size_t bytes; 661 + 662 + result = -ENOMEM; 663 + data = nfs_writedata_alloc(wpages); 664 + if (unlikely(!data)) 665 + break; 666 667 bytes = wsize; 668 if (count < wsize) 669 bytes = count; 670 671 + data->npages = nfs_direct_count_pages(user_addr, bytes); 672 + down_read(&current->mm->mmap_sem); 673 + result = get_user_pages(current, current->mm, user_addr, 674 + data->npages, 0, 0, data->pagevec, NULL); 675 + up_read(&current->mm->mmap_sem); 676 + if (unlikely(result < data->npages)) { 677 + if (result > 0) 678 + nfs_direct_release_pages(data->pagevec, result); 679 + nfs_writedata_release(data); 680 + break; 681 + } 682 + 683 + get_dreq(dreq); 684 + 685 list_move_tail(&data->pages, &dreq->rewrite_list); 686 687 + data->req = (struct nfs_page *) dreq; 688 data->inode = inode; 689 data->cred = ctx->cred; 690 data->args.fh = NFS_FH(inode); 691 data->args.context = ctx; 692 data->args.offset = pos; 693 data->args.pgbase = pgbase; 694 + data->args.pages = data->pagevec; 695 data->args.count = bytes; 696 data->res.fattr = &data->fattr; 697 data->res.count = bytes; ··· 694 bytes, 695 (unsigned long long)data->args.offset); 696 697 + started += bytes; 698 + user_addr += bytes; 699 pos += bytes; 700 pgbase += bytes; 701 pgbase &= ~PAGE_MASK; 702 703 count -= bytes; 704 } while (count != 0); 705 + 706 + if (put_dreq(dreq)) 707 + nfs_direct_write_complete(dreq, inode); 708 + 709 + if (started) 710 + return 0; 711 + return result < 0 ? (ssize_t) result : -EFAULT; 712 } 713 714 + static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) 715 { 716 + ssize_t result = 0; 717 sigset_t oldset; 718 struct inode *inode = iocb->ki_filp->f_mapping->host; 719 struct rpc_clnt *clnt = NFS_CLIENT(inode); ··· 714 size_t wsize = NFS_SERVER(inode)->wsize; 715 int sync = 0; 716 717 + dreq = nfs_direct_req_alloc(); 718 if (!dreq) 719 return -ENOMEM; 720 + nfs_alloc_commit_data(dreq); 721 + 722 if (dreq->commit_data == NULL || count < wsize) 723 sync = FLUSH_STABLE; 724 725 dreq->inode = inode; 726 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); 727 if (!is_sync_kiocb(iocb)) ··· 735 nfs_begin_data_update(inode); 736 737 rpc_clnt_sigmask(clnt, &oldset); 738 + result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); 739 + if (!result) 740 + result = nfs_direct_wait(dreq); 741 rpc_clnt_sigunmask(clnt, &oldset); 742 743 return result; ··· 766 ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) 767 { 768 ssize_t retval = -EINVAL; 769 struct file *file = iocb->ki_filp; 770 struct address_space *mapping = file->f_mapping; 771 ··· 789 if (retval) 790 goto out; 791 792 + retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos); 793 if (retval > 0) 794 iocb->ki_pos = pos + retval; 795 ··· 832 ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) 833 { 834 ssize_t retval; 835 struct file *file = iocb->ki_filp; 836 struct address_space *mapping = file->f_mapping; 837 ··· 861 if (retval) 862 goto out; 863 864 + retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos); 865 866 /* 867 * XXX: nfs_end_data_update() already ensures this file's
+2
include/linux/nfs_xdr.h
··· 729 struct list_head pages; /* Coalesced read requests */ 730 struct nfs_page *req; /* multi ops per nfs_page */ 731 struct page **pagevec; 732 struct nfs_readargs args; 733 struct nfs_readres res; 734 #ifdef CONFIG_NFS_V4 ··· 748 struct list_head pages; /* Coalesced requests we wish to flush */ 749 struct nfs_page *req; /* multi ops per nfs_page */ 750 struct page **pagevec; 751 struct nfs_writeargs args; /* argument struct */ 752 struct nfs_writeres res; /* result struct */ 753 #ifdef CONFIG_NFS_V4
··· 729 struct list_head pages; /* Coalesced read requests */ 730 struct nfs_page *req; /* multi ops per nfs_page */ 731 struct page **pagevec; 732 + unsigned int npages; /* active pages in pagevec */ 733 struct nfs_readargs args; 734 struct nfs_readres res; 735 #ifdef CONFIG_NFS_V4 ··· 747 struct list_head pages; /* Coalesced requests we wish to flush */ 748 struct nfs_page *req; /* multi ops per nfs_page */ 749 struct page **pagevec; 750 + unsigned int npages; /* active pages in pagevec */ 751 struct nfs_writeargs args; /* argument struct */ 752 struct nfs_writeres res; /* result struct */ 753 #ifdef CONFIG_NFS_V4