Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'flexfiles'

* flexfiles: (53 commits)
pnfs: lookup new lseg at lseg boundary
nfs41: .init_read and .init_write can be called with valid pg_lseg
pnfs: Update documentation on the Layout Drivers
pnfs/flexfiles: Add the FlexFile Layout Driver
nfs: count DIO good bytes correctly with mirroring
nfs41: wait for LAYOUTRETURN before retrying LAYOUTGET
nfs: add a helper to set NFS_ODIRECT_RESCHED_WRITES to direct writes
nfs41: add NFS_LAYOUT_RETRY_LAYOUTGET to layout header flags
nfs/flexfiles: send layoutreturn before freeing lseg
nfs41: introduce NFS_LAYOUT_RETURN_BEFORE_CLOSE
nfs41: allow async version layoutreturn
nfs41: add range to layoutreturn args
pnfs: allow LD to ask to resend read through pnfs
nfs: add nfs_pgio_current_mirror helper
nfs: only reset desc->pg_mirror_idx when mirroring is supported
nfs41: add a debug warning if we destroy an unempty layout
pnfs: fail comparison when bucket verifier not set
nfs: mirroring support for direct io
nfs: add mirroring support to pgio layer
pnfs: pass ds_commit_idx through the commit path
...

Conflicts:
fs/nfs/pnfs.c
fs/nfs/pnfs.h

+4266 -1039
+7 -6
Documentation/filesystems/nfs/pnfs.txt
··· 57 57 layout drivers 58 58 -------------- 59 59 60 - PNFS utilizes what is called layout drivers. The STD defines 3 basic 61 - layout types: "files" "objects" and "blocks". For each of these types 62 - there is a layout-driver with a common function-vectors table which 63 - are called by the nfs-client pnfs-core to implement the different layout 64 - types. 60 + PNFS utilizes what is called layout drivers. The STD defines 4 basic 61 + layout types: "files", "objects", "blocks", and "flexfiles". For each 62 + of these types there is a layout-driver with a common function-vectors 63 + table which are called by the nfs-client pnfs-core to implement the 64 + different layout types. 65 65 66 - Files-layout-driver code is in: fs/nfs/nfs4filelayout.c && nfs4filelayoutdev.c 66 + Files-layout-driver code is in: fs/nfs/filelayout/.. directory 67 67 Objects-layout-deriver code is in: fs/nfs/objlayout/.. directory 68 68 Blocks-layout-deriver code is in: fs/nfs/blocklayout/.. directory 69 + Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory 69 70 70 71 objects-layout setup 71 72 --------------------
+5
fs/nfs/Kconfig
··· 128 128 depends on NFS_V4_1 && SCSI_OSD_ULD 129 129 default NFS_V4 130 130 131 + config PNFS_FLEXFILE_LAYOUT 132 + tristate 133 + depends on NFS_V4_1 && NFS_V3 134 + default m 135 + 131 136 config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN 132 137 string "NFSv4.1 Implementation ID Domain" 133 138 depends on NFS_V4_1
+2 -1
fs/nfs/Makefile
··· 27 27 dns_resolve.o nfs4trace.o 28 28 nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o 29 29 nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o 30 - nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o 30 + nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o 31 31 nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o 32 32 33 33 obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ 34 34 obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ 35 35 obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ 36 + obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/
+2
fs/nfs/blocklayout/blocklayout.c
··· 860 860 .pg_init = bl_pg_init_read, 861 861 .pg_test = bl_pg_test_read, 862 862 .pg_doio = pnfs_generic_pg_readpages, 863 + .pg_cleanup = pnfs_generic_pg_cleanup, 863 864 }; 864 865 865 866 static const struct nfs_pageio_ops bl_pg_write_ops = { 866 867 .pg_init = bl_pg_init_write, 867 868 .pg_test = bl_pg_test_write, 868 869 .pg_doio = pnfs_generic_pg_writepages, 870 + .pg_cleanup = pnfs_generic_pg_cleanup, 869 871 }; 870 872 871 873 static struct pnfs_layoutdriver_type blocklayout_type = {
+95 -17
fs/nfs/direct.c
··· 66 66 /* 67 67 * This represents a set of asynchronous requests that we're waiting on 68 68 */ 69 + struct nfs_direct_mirror { 70 + ssize_t count; 71 + }; 72 + 69 73 struct nfs_direct_req { 70 74 struct kref kref; /* release manager */ 71 75 ··· 82 78 /* completion state */ 83 79 atomic_t io_count; /* i/os we're waiting for */ 84 80 spinlock_t lock; /* protect completion state */ 81 + 82 + struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; 83 + int mirror_count; 84 + 85 85 ssize_t count, /* bytes actually processed */ 86 86 bytes_left, /* bytes left to be sent */ 87 + io_start, /* start of IO */ 87 88 error; /* any reported error */ 88 89 struct completion completion; /* wait for i/o completion */ 89 90 ··· 117 108 return atomic_dec_and_test(&dreq->io_count); 118 109 } 119 110 111 + void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq) 112 + { 113 + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 114 + } 115 + EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes); 116 + 117 + static void 118 + nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) 119 + { 120 + int i; 121 + ssize_t count; 122 + 123 + WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count); 124 + 125 + count = dreq->mirrors[hdr->pgio_mirror_idx].count; 126 + if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { 127 + count = hdr->io_start + hdr->good_bytes - dreq->io_start; 128 + dreq->mirrors[hdr->pgio_mirror_idx].count = count; 129 + } 130 + 131 + /* update the dreq->count by finding the minimum agreed count from all 132 + * mirrors */ 133 + count = dreq->mirrors[0].count; 134 + 135 + for (i = 1; i < dreq->mirror_count; i++) 136 + count = min(count, dreq->mirrors[i].count); 137 + 138 + dreq->count = count; 139 + } 140 + 120 141 /* 121 142 * nfs_direct_select_verf - select the right verifier 122 143 * @dreq - direct request possibly spanning multiple servers 123 144 * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs 124 - * @ds_idx - index of data server in data server list, only valid if ds_clp set 145 + * @commit_idx - commit bucket index for the DS 125 146 * 126 147 * returns the correct verifier to use given the role of the server 127 148 */ 128 149 static struct nfs_writeverf * 129 150 nfs_direct_select_verf(struct nfs_direct_req *dreq, 130 151 struct nfs_client *ds_clp, 131 - int ds_idx) 152 + int commit_idx) 132 153 { 133 154 struct nfs_writeverf *verfp = &dreq->verf; 134 155 135 156 #ifdef CONFIG_NFS_V4_1 136 157 if (ds_clp) { 137 158 /* pNFS is in use, use the DS verf */ 138 - if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) 139 - verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; 159 + if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) 160 + verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; 140 161 else 141 162 WARN_ON_ONCE(1); 142 163 } ··· 187 148 { 188 149 struct nfs_writeverf *verfp; 189 150 190 - verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, 191 - hdr->ds_idx); 151 + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); 192 152 WARN_ON_ONCE(verfp->committed >= 0); 193 153 memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); 194 154 WARN_ON_ONCE(verfp->committed < 0); ··· 207 169 { 208 170 struct nfs_writeverf *verfp; 209 171 210 - verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, 211 - hdr->ds_idx); 172 + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); 212 173 if (verfp->committed < 0) { 213 174 nfs_direct_set_hdr_verf(dreq, hdr); 214 175 return 0; ··· 230 193 231 194 verfp = nfs_direct_select_verf(dreq, data->ds_clp, 232 195 data->ds_commit_index); 233 - WARN_ON_ONCE(verfp->committed < 0); 196 + 197 + /* verifier not set so always fail */ 198 + if (verfp->committed < 0) 199 + return 1; 200 + 234 201 return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); 235 202 } 236 203 ··· 290 249 cinfo->completion_ops = &nfs_direct_commit_completion_ops; 291 250 } 292 251 252 + static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq, 253 + struct nfs_pageio_descriptor *pgio, 254 + struct nfs_page *req) 255 + { 256 + int mirror_count = 1; 257 + 258 + if (pgio->pg_ops->pg_get_mirror_count) 259 + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); 260 + 261 + dreq->mirror_count = mirror_count; 262 + } 263 + 293 264 static inline struct nfs_direct_req *nfs_direct_req_alloc(void) 294 265 { 295 266 struct nfs_direct_req *dreq; ··· 316 263 INIT_LIST_HEAD(&dreq->mds_cinfo.list); 317 264 dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ 318 265 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); 266 + dreq->mirror_count = 1; 319 267 spin_lock_init(&dreq->lock); 320 268 321 269 return dreq; ··· 423 369 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) 424 370 dreq->error = hdr->error; 425 371 else 426 - dreq->count += hdr->good_bytes; 372 + nfs_direct_good_bytes(dreq, hdr); 373 + 427 374 spin_unlock(&dreq->lock); 428 375 429 376 while (!list_empty(&hdr->pages)) { ··· 602 547 603 548 dreq->inode = inode; 604 549 dreq->bytes_left = count; 550 + dreq->io_start = pos; 605 551 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 606 552 l_ctx = nfs_get_lock_context(dreq->ctx); 607 553 if (IS_ERR(l_ctx)) { ··· 635 579 return result; 636 580 } 637 581 582 + static void 583 + nfs_direct_write_scan_commit_list(struct inode *inode, 584 + struct list_head *list, 585 + struct nfs_commit_info *cinfo) 586 + { 587 + spin_lock(cinfo->lock); 588 + #ifdef CONFIG_NFS_V4_1 589 + if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) 590 + NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); 591 + #endif 592 + nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); 593 + spin_unlock(cinfo->lock); 594 + } 595 + 638 596 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 639 597 { 640 598 struct nfs_pageio_descriptor desc; ··· 656 586 LIST_HEAD(reqs); 657 587 struct nfs_commit_info cinfo; 658 588 LIST_HEAD(failed); 589 + int i; 659 590 660 591 nfs_init_cinfo_from_dreq(&cinfo, dreq); 661 - pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo); 662 - spin_lock(cinfo.lock); 663 - nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0); 664 - spin_unlock(cinfo.lock); 592 + nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); 665 593 666 594 dreq->count = 0; 595 + for (i = 0; i < dreq->mirror_count; i++) 596 + dreq->mirrors[i].count = 0; 667 597 get_dreq(dreq); 668 598 669 599 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, 670 600 &nfs_direct_write_completion_ops); 671 601 desc.pg_dreq = dreq; 602 + 603 + req = nfs_list_entry(reqs.next); 604 + nfs_direct_setup_mirroring(dreq, &desc, req); 672 605 673 606 list_for_each_entry_safe(req, tmp, &reqs, wb_list) { 674 607 if (!nfs_pageio_add_request(&desc, req)) { ··· 719 646 nfs_list_remove_request(req); 720 647 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { 721 648 /* Note the rewrite will go through mds */ 722 - nfs_mark_request_commit(req, NULL, &cinfo); 649 + nfs_mark_request_commit(req, NULL, &cinfo, 0); 723 650 } else 724 651 nfs_release_request(req); 725 652 nfs_unlock_and_release_request(req); ··· 794 721 dreq->error = hdr->error; 795 722 } 796 723 if (dreq->error == 0) { 797 - dreq->count += hdr->good_bytes; 724 + nfs_direct_good_bytes(dreq, hdr); 798 725 if (nfs_write_need_commit(hdr)) { 799 726 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) 800 727 request_commit = true; ··· 818 745 nfs_list_remove_request(req); 819 746 if (request_commit) { 820 747 kref_get(&req->wb_kref); 821 - nfs_mark_request_commit(req, hdr->lseg, &cinfo); 748 + nfs_mark_request_commit(req, hdr->lseg, &cinfo, 749 + hdr->ds_commit_idx); 822 750 } 823 751 nfs_unlock_and_release_request(req); 824 752 } ··· 900 826 result = PTR_ERR(req); 901 827 break; 902 828 } 829 + 830 + nfs_direct_setup_mirroring(dreq, &desc, req); 831 + 903 832 nfs_lock_request(req); 904 833 req->wb_index = pos >> PAGE_SHIFT; 905 834 req->wb_offset = pos & ~PAGE_MASK; ··· 1011 934 1012 935 dreq->inode = inode; 1013 936 dreq->bytes_left = count; 937 + dreq->io_start = pos; 1014 938 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 1015 939 l_ctx = nfs_get_lock_context(dreq->ctx); 1016 940 if (IS_ERR(l_ctx)) {
+28 -287
fs/nfs/filelayout/filelayout.c
··· 118 118 } 119 119 } 120 120 121 - static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) 122 - { 123 - if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 124 - return; 125 - pnfs_return_layout(inode); 126 - } 127 - 128 121 static int filelayout_async_handle_error(struct rpc_task *task, 129 122 struct nfs4_state *state, 130 123 struct nfs_client *clp, ··· 200 207 dprintk("%s DS connection error %d\n", __func__, 201 208 task->tk_status); 202 209 nfs4_mark_deviceid_unavailable(devid); 203 - set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); 210 + pnfs_error_mark_layout_for_return(inode, lseg); 204 211 rpc_wake_up(&tbl->slot_tbl_waitq); 205 212 /* fall through */ 206 213 default: ··· 332 339 rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); 333 340 } 334 341 335 - static void filelayout_read_release(void *data) 336 - { 337 - struct nfs_pgio_header *hdr = data; 338 - struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; 339 - 340 - filelayout_fenceme(lo->plh_inode, lo); 341 - nfs_put_client(hdr->ds_clp); 342 - hdr->mds_ops->rpc_release(data); 343 - } 344 - 345 342 static int filelayout_write_done_cb(struct rpc_task *task, 346 343 struct nfs_pgio_header *hdr) 347 344 { ··· 354 371 return 0; 355 372 } 356 373 357 - /* Fake up some data that will cause nfs_commit_release to retry the writes. */ 358 - static void prepare_to_resend_writes(struct nfs_commit_data *data) 359 - { 360 - struct nfs_page *first = nfs_list_entry(data->pages.next); 361 - 362 - data->task.tk_status = 0; 363 - memcpy(&data->verf.verifier, &first->wb_verf, 364 - sizeof(data->verf.verifier)); 365 - data->verf.verifier.data[0]++; /* ensure verifier mismatch */ 366 - } 367 - 368 374 static int filelayout_commit_done_cb(struct rpc_task *task, 369 375 struct nfs_commit_data *data) 370 376 { ··· 365 393 366 394 switch (err) { 367 395 case -NFS4ERR_RESET_TO_MDS: 368 - prepare_to_resend_writes(data); 396 + pnfs_generic_prepare_to_resend_writes(data); 369 397 return -EAGAIN; 370 398 case -EAGAIN: 371 399 rpc_restart_call_prepare(task); ··· 423 451 rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); 424 452 } 425 453 426 - static void filelayout_write_release(void *data) 427 - { 428 - struct nfs_pgio_header *hdr = data; 429 - struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; 430 - 431 - filelayout_fenceme(lo->plh_inode, lo); 432 - nfs_put_client(hdr->ds_clp); 433 - hdr->mds_ops->rpc_release(data); 434 - } 435 - 436 454 static void filelayout_commit_prepare(struct rpc_task *task, void *data) 437 455 { 438 456 struct nfs_commit_data *wdata = data; ··· 433 471 task); 434 472 } 435 473 436 - static void filelayout_write_commit_done(struct rpc_task *task, void *data) 437 - { 438 - struct nfs_commit_data *wdata = data; 439 - 440 - /* Note this may cause RPC to be resent */ 441 - wdata->mds_ops->rpc_call_done(task, data); 442 - } 443 - 444 474 static void filelayout_commit_count_stats(struct rpc_task *task, void *data) 445 475 { 446 476 struct nfs_commit_data *cdata = data; ··· 440 486 rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics); 441 487 } 442 488 443 - static void filelayout_commit_release(void *calldata) 444 - { 445 - struct nfs_commit_data *data = calldata; 446 - 447 - data->completion_ops->completion(data); 448 - pnfs_put_lseg(data->lseg); 449 - nfs_put_client(data->ds_clp); 450 - nfs_commitdata_release(data); 451 - } 452 - 453 489 static const struct rpc_call_ops filelayout_read_call_ops = { 454 490 .rpc_call_prepare = filelayout_read_prepare, 455 491 .rpc_call_done = filelayout_read_call_done, 456 492 .rpc_count_stats = filelayout_read_count_stats, 457 - .rpc_release = filelayout_read_release, 493 + .rpc_release = pnfs_generic_rw_release, 458 494 }; 459 495 460 496 static const struct rpc_call_ops filelayout_write_call_ops = { 461 497 .rpc_call_prepare = filelayout_write_prepare, 462 498 .rpc_call_done = filelayout_write_call_done, 463 499 .rpc_count_stats = filelayout_write_count_stats, 464 - .rpc_release = filelayout_write_release, 500 + .rpc_release = pnfs_generic_rw_release, 465 501 }; 466 502 467 503 static const struct rpc_call_ops filelayout_commit_call_ops = { 468 504 .rpc_call_prepare = filelayout_commit_prepare, 469 - .rpc_call_done = filelayout_write_commit_done, 505 + .rpc_call_done = pnfs_generic_write_commit_done, 470 506 .rpc_count_stats = filelayout_commit_count_stats, 471 - .rpc_release = filelayout_commit_release, 507 + .rpc_release = pnfs_generic_commit_release, 472 508 }; 473 509 474 510 static enum pnfs_try_status ··· 492 548 /* No multipath support. Use first DS */ 493 549 atomic_inc(&ds->ds_clp->cl_count); 494 550 hdr->ds_clp = ds->ds_clp; 495 - hdr->ds_idx = idx; 551 + hdr->ds_commit_idx = idx; 496 552 fh = nfs4_fl_select_ds_fh(lseg, j); 497 553 if (fh) 498 554 hdr->args.fh = fh; ··· 501 557 hdr->mds_offset = offset; 502 558 503 559 /* Perform an asynchronous read to ds */ 504 - nfs_initiate_pgio(ds_clnt, hdr, 505 - &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); 560 + nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, 561 + NFS_PROTO(hdr->inode), &filelayout_read_call_ops, 562 + 0, RPC_TASK_SOFTCONN); 506 563 return PNFS_ATTEMPTED; 507 564 } 508 565 ··· 536 591 hdr->pgio_done_cb = filelayout_write_done_cb; 537 592 atomic_inc(&ds->ds_clp->cl_count); 538 593 hdr->ds_clp = ds->ds_clp; 539 - hdr->ds_idx = idx; 594 + hdr->ds_commit_idx = idx; 540 595 fh = nfs4_fl_select_ds_fh(lseg, j); 541 596 if (fh) 542 597 hdr->args.fh = fh; 543 598 hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); 544 599 545 600 /* Perform an asynchronous write */ 546 - nfs_initiate_pgio(ds_clnt, hdr, 547 - &filelayout_write_call_ops, sync, 548 - RPC_TASK_SOFTCONN); 601 + nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, 602 + NFS_PROTO(hdr->inode), &filelayout_write_call_ops, 603 + sync, RPC_TASK_SOFTCONN); 549 604 return PNFS_ATTEMPTED; 550 605 } 551 606 ··· 933 988 .pg_init = filelayout_pg_init_read, 934 989 .pg_test = filelayout_pg_test, 935 990 .pg_doio = pnfs_generic_pg_readpages, 991 + .pg_cleanup = pnfs_generic_pg_cleanup, 936 992 }; 937 993 938 994 static const struct nfs_pageio_ops filelayout_pg_write_ops = { 939 995 .pg_init = filelayout_pg_init_write, 940 996 .pg_test = filelayout_pg_test, 941 997 .pg_doio = pnfs_generic_pg_writepages, 998 + .pg_cleanup = pnfs_generic_pg_cleanup, 942 999 }; 943 1000 944 1001 static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) ··· 951 1004 return j; 952 1005 } 953 1006 954 - /* The generic layer is about to remove the req from the commit list. 955 - * If this will make the bucket empty, it will need to put the lseg reference. 956 - * Note this is must be called holding the inode (/cinfo) lock 957 - */ 958 - static void 959 - filelayout_clear_request_commit(struct nfs_page *req, 960 - struct nfs_commit_info *cinfo) 961 - { 962 - struct pnfs_layout_segment *freeme = NULL; 963 - 964 - if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 965 - goto out; 966 - cinfo->ds->nwritten--; 967 - if (list_is_singular(&req->wb_list)) { 968 - struct pnfs_commit_bucket *bucket; 969 - 970 - bucket = list_first_entry(&req->wb_list, 971 - struct pnfs_commit_bucket, 972 - written); 973 - freeme = bucket->wlseg; 974 - bucket->wlseg = NULL; 975 - } 976 - out: 977 - nfs_request_remove_commit_list(req, cinfo); 978 - pnfs_put_lseg_locked(freeme); 979 - } 980 - 981 1007 static void 982 1008 filelayout_mark_request_commit(struct nfs_page *req, 983 1009 struct pnfs_layout_segment *lseg, 984 - struct nfs_commit_info *cinfo) 1010 + struct nfs_commit_info *cinfo, 1011 + u32 ds_commit_idx) 985 1012 986 1013 { 987 1014 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); ··· 985 1064 * is normally transferred to the COMMIT call and released 986 1065 * there. It could also be released if the last req is pulled 987 1066 * off due to a rewrite, in which case it will be done in 988 - * filelayout_clear_request_commit 1067 + * pnfs_generic_clear_request_commit 989 1068 */ 990 1069 buckets[i].wlseg = pnfs_get_lseg(lseg); 991 1070 } ··· 1059 1138 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 1060 1139 if (fh) 1061 1140 data->args.fh = fh; 1062 - return nfs_initiate_commit(ds_clnt, data, 1141 + return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode), 1063 1142 &filelayout_commit_call_ops, how, 1064 1143 RPC_TASK_SOFTCONN); 1065 1144 out_err: 1066 - prepare_to_resend_writes(data); 1067 - filelayout_commit_release(data); 1145 + pnfs_generic_prepare_to_resend_writes(data); 1146 + pnfs_generic_commit_release(data); 1068 1147 return -EAGAIN; 1069 - } 1070 - 1071 - static int 1072 - transfer_commit_list(struct list_head *src, struct list_head *dst, 1073 - struct nfs_commit_info *cinfo, int max) 1074 - { 1075 - struct nfs_page *req, *tmp; 1076 - int ret = 0; 1077 - 1078 - list_for_each_entry_safe(req, tmp, src, wb_list) { 1079 - if (!nfs_lock_request(req)) 1080 - continue; 1081 - kref_get(&req->wb_kref); 1082 - if (cond_resched_lock(cinfo->lock)) 1083 - list_safe_reset_next(req, tmp, wb_list); 1084 - nfs_request_remove_commit_list(req, cinfo); 1085 - clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1086 - nfs_list_add_request(req, dst); 1087 - ret++; 1088 - if ((ret == max) && !cinfo->dreq) 1089 - break; 1090 - } 1091 - return ret; 1092 - } 1093 - 1094 - /* Note called with cinfo->lock held. */ 1095 - static int 1096 - filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, 1097 - struct nfs_commit_info *cinfo, 1098 - int max) 1099 - { 1100 - struct list_head *src = &bucket->written; 1101 - struct list_head *dst = &bucket->committing; 1102 - int ret; 1103 - 1104 - ret = transfer_commit_list(src, dst, cinfo, max); 1105 - if (ret) { 1106 - cinfo->ds->nwritten -= ret; 1107 - cinfo->ds->ncommitting += ret; 1108 - bucket->clseg = bucket->wlseg; 1109 - if (list_empty(src)) 1110 - bucket->wlseg = NULL; 1111 - else 1112 - pnfs_get_lseg(bucket->clseg); 1113 - } 1114 - return ret; 1115 - } 1116 - 1117 - /* Move reqs from written to committing lists, returning count of number moved. 1118 - * Note called with cinfo->lock held. 1119 - */ 1120 - static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, 1121 - int max) 1122 - { 1123 - int i, rv = 0, cnt; 1124 - 1125 - for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { 1126 - cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i], 1127 - cinfo, max); 1128 - max -= cnt; 1129 - rv += cnt; 1130 - } 1131 - return rv; 1132 - } 1133 - 1134 - /* Pull everything off the committing lists and dump into @dst */ 1135 - static void filelayout_recover_commit_reqs(struct list_head *dst, 1136 - struct nfs_commit_info *cinfo) 1137 - { 1138 - struct pnfs_commit_bucket *b; 1139 - struct pnfs_layout_segment *freeme; 1140 - int i; 1141 - 1142 - restart: 1143 - spin_lock(cinfo->lock); 1144 - for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { 1145 - if (transfer_commit_list(&b->written, dst, cinfo, 0)) { 1146 - freeme = b->wlseg; 1147 - b->wlseg = NULL; 1148 - spin_unlock(cinfo->lock); 1149 - pnfs_put_lseg(freeme); 1150 - goto restart; 1151 - } 1152 - } 1153 - cinfo->ds->nwritten = 0; 1154 - spin_unlock(cinfo->lock); 1155 1148 } 1156 1149 1157 1150 /* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest ··· 1098 1263 return NULL; 1099 1264 } 1100 1265 1101 - static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx) 1102 - { 1103 - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; 1104 - struct pnfs_commit_bucket *bucket; 1105 - struct pnfs_layout_segment *freeme; 1106 - int i; 1107 - 1108 - for (i = idx; i < fl_cinfo->nbuckets; i++) { 1109 - bucket = &fl_cinfo->buckets[i]; 1110 - if (list_empty(&bucket->committing)) 1111 - continue; 1112 - nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); 1113 - spin_lock(cinfo->lock); 1114 - freeme = bucket->clseg; 1115 - bucket->clseg = NULL; 1116 - spin_unlock(cinfo->lock); 1117 - pnfs_put_lseg(freeme); 1118 - } 1119 - } 1120 - 1121 - static unsigned int 1122 - alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) 1123 - { 1124 - struct pnfs_ds_commit_info *fl_cinfo; 1125 - struct pnfs_commit_bucket *bucket; 1126 - struct nfs_commit_data *data; 1127 - int i; 1128 - unsigned int nreq = 0; 1129 - 1130 - fl_cinfo = cinfo->ds; 1131 - bucket = fl_cinfo->buckets; 1132 - for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { 1133 - if (list_empty(&bucket->committing)) 1134 - continue; 1135 - data = nfs_commitdata_alloc(); 1136 - if (!data) 1137 - break; 1138 - data->ds_commit_index = i; 1139 - spin_lock(cinfo->lock); 1140 - data->lseg = bucket->clseg; 1141 - bucket->clseg = NULL; 1142 - spin_unlock(cinfo->lock); 1143 - list_add(&data->pages, list); 1144 - nreq++; 1145 - } 1146 - 1147 - /* Clean up on error */ 1148 - filelayout_retry_commit(cinfo, i); 1149 - /* Caller will clean up entries put on list */ 1150 - return nreq; 1151 - } 1152 - 1153 - /* This follows nfs_commit_list pretty closely */ 1154 1266 static int 1155 1267 filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 1156 1268 int how, struct nfs_commit_info *cinfo) 1157 1269 { 1158 - struct nfs_commit_data *data, *tmp; 1159 - LIST_HEAD(list); 1160 - unsigned int nreq = 0; 1161 - 1162 - if (!list_empty(mds_pages)) { 1163 - data = nfs_commitdata_alloc(); 1164 - if (data != NULL) { 1165 - data->lseg = NULL; 1166 - list_add(&data->pages, &list); 1167 - nreq++; 1168 - } else { 1169 - nfs_retry_commit(mds_pages, NULL, cinfo); 1170 - filelayout_retry_commit(cinfo, 0); 1171 - cinfo->completion_ops->error_cleanup(NFS_I(inode)); 1172 - return -ENOMEM; 1173 - } 1174 - } 1175 - 1176 - nreq += alloc_ds_commits(cinfo, &list); 1177 - 1178 - if (nreq == 0) { 1179 - cinfo->completion_ops->error_cleanup(NFS_I(inode)); 1180 - goto out; 1181 - } 1182 - 1183 - atomic_add(nreq, &cinfo->mds->rpcs_out); 1184 - 1185 - list_for_each_entry_safe(data, tmp, &list, pages) { 1186 - list_del_init(&data->pages); 1187 - if (!data->lseg) { 1188 - nfs_init_commit(data, mds_pages, NULL, cinfo); 1189 - nfs_initiate_commit(NFS_CLIENT(inode), data, 1190 - data->mds_ops, how, 0); 1191 - } else { 1192 - struct pnfs_commit_bucket *buckets; 1193 - 1194 - buckets = cinfo->ds->buckets; 1195 - nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo); 1196 - filelayout_initiate_commit(data, how); 1197 - } 1198 - } 1199 - out: 1200 - cinfo->ds->ncommitting = 0; 1201 - return PNFS_ATTEMPTED; 1270 + return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, 1271 + filelayout_initiate_commit); 1202 1272 } 1273 + 1203 1274 static struct nfs4_deviceid_node * 1204 1275 filelayout_alloc_deviceid_node(struct nfs_server *server, 1205 1276 struct pnfs_device *pdev, gfp_t gfp_flags) ··· 1162 1421 .pg_write_ops = &filelayout_pg_write_ops, 1163 1422 .get_ds_info = &filelayout_get_ds_info, 1164 1423 .mark_request_commit = filelayout_mark_request_commit, 1165 - .clear_request_commit = filelayout_clear_request_commit, 1166 - .scan_commit_lists = filelayout_scan_commit_lists, 1167 - .recover_commit_reqs = filelayout_recover_commit_reqs, 1424 + .clear_request_commit = pnfs_generic_clear_request_commit, 1425 + .scan_commit_lists = pnfs_generic_scan_commit_lists, 1426 + .recover_commit_reqs = pnfs_generic_recover_commit_reqs, 1168 1427 .search_commit_reqs = filelayout_search_commit_reqs, 1169 1428 .commit_pagelist = filelayout_commit_pagelist, 1170 1429 .read_pagelist = filelayout_read_pagelist,
-40
fs/nfs/filelayout/filelayout.h
··· 33 33 #include "../pnfs.h" 34 34 35 35 /* 36 - * Default data server connection timeout and retrans vaules. 37 - * Set by module paramters dataserver_timeo and dataserver_retrans. 38 - */ 39 - #define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ 40 - #define NFS4_DEF_DS_RETRANS 5 41 - 42 - /* 43 36 * Field testing shows we need to support up to 4096 stripe indices. 44 37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint 45 38 * reasonable. This in turn means we support a maximum of 256 ··· 41 48 #define NFS4_PNFS_MAX_STRIPE_CNT 4096 42 49 #define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ 43 50 44 - /* error codes for internal use */ 45 - #define NFS4ERR_RESET_TO_MDS 12001 46 - 47 51 enum stripetype4 { 48 52 STRIPE_SPARSE = 1, 49 53 STRIPE_DENSE = 2 50 - }; 51 - 52 - /* Individual ip address */ 53 - struct nfs4_pnfs_ds_addr { 54 - struct sockaddr_storage da_addr; 55 - size_t da_addrlen; 56 - struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ 57 - char *da_remotestr; /* human readable addr+port */ 58 - }; 59 - 60 - struct nfs4_pnfs_ds { 61 - struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ 62 - char *ds_remotestr; /* comma sep list of addrs */ 63 - struct list_head ds_addrs; 64 - struct nfs_client *ds_clp; 65 - atomic_t ds_count; 66 - unsigned long ds_state; 67 - #define NFS4DS_CONNECTING 0 /* ds is establishing connection */ 68 54 }; 69 55 70 56 struct nfs4_file_layout_dsaddr { ··· 91 119 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; 92 120 } 93 121 94 - static inline void 95 - filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) 96 - { 97 - u32 *p = (u32 *)&node->deviceid; 98 - 99 - printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n", 100 - p[0], p[1], p[2], p[3]); 101 - 102 - set_bit(NFS_DEVICEID_INVALID, &node->flags); 103 - } 104 - 105 122 static inline bool 106 123 filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) 107 124 { ··· 103 142 extern struct nfs_fh * 104 143 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); 105 144 106 - extern void print_ds(struct nfs4_pnfs_ds *ds); 107 145 u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); 108 146 u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); 109 147 struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
+11 -456
fs/nfs/filelayout/filelayoutdev.c
··· 31 31 #include <linux/nfs_fs.h> 32 32 #include <linux/vmalloc.h> 33 33 #include <linux/module.h> 34 - #include <linux/sunrpc/addr.h> 35 34 36 35 #include "../internal.h" 37 36 #include "../nfs4session.h" ··· 40 41 41 42 static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; 42 43 static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; 43 - 44 - /* 45 - * Data server cache 46 - * 47 - * Data servers can be mapped to different device ids. 48 - * nfs4_pnfs_ds reference counting 49 - * - set to 1 on allocation 50 - * - incremented when a device id maps a data server already in the cache. 51 - * - decremented when deviceid is removed from the cache. 52 - */ 53 - static DEFINE_SPINLOCK(nfs4_ds_cache_lock); 54 - static LIST_HEAD(nfs4_data_server_cache); 55 - 56 - /* Debug routines */ 57 - void 58 - print_ds(struct nfs4_pnfs_ds *ds) 59 - { 60 - if (ds == NULL) { 61 - printk("%s NULL device\n", __func__); 62 - return; 63 - } 64 - printk(" ds %s\n" 65 - " ref count %d\n" 66 - " client %p\n" 67 - " cl_exchange_flags %x\n", 68 - ds->ds_remotestr, 69 - atomic_read(&ds->ds_count), ds->ds_clp, 70 - ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); 71 - } 72 - 73 - static bool 74 - same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) 75 - { 76 - struct sockaddr_in *a, *b; 77 - struct sockaddr_in6 *a6, *b6; 78 - 79 - if (addr1->sa_family != addr2->sa_family) 80 - return false; 81 - 82 - switch (addr1->sa_family) { 83 - case AF_INET: 84 - a = (struct sockaddr_in *)addr1; 85 - b = (struct sockaddr_in *)addr2; 86 - 87 - if (a->sin_addr.s_addr == b->sin_addr.s_addr && 88 - a->sin_port == b->sin_port) 89 - return true; 90 - break; 91 - 92 - case AF_INET6: 93 - a6 = (struct sockaddr_in6 *)addr1; 94 - b6 = (struct sockaddr_in6 *)addr2; 95 - 96 - /* LINKLOCAL addresses must have matching scope_id */ 97 - if (ipv6_addr_src_scope(&a6->sin6_addr) == 98 - IPV6_ADDR_SCOPE_LINKLOCAL && 99 - a6->sin6_scope_id != b6->sin6_scope_id) 100 - return false; 101 - 102 - if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && 103 - a6->sin6_port == b6->sin6_port) 104 - return true; 105 - break; 106 - 107 - default: 108 - dprintk("%s: unhandled address family: %u\n", 109 - __func__, addr1->sa_family); 110 - return false; 111 - } 112 - 113 - return false; 114 - } 115 - 116 - static bool 117 - _same_data_server_addrs_locked(const struct list_head *dsaddrs1, 118 - const struct list_head *dsaddrs2) 119 - { 120 - struct nfs4_pnfs_ds_addr *da1, *da2; 121 - 122 - /* step through both lists, comparing as we go */ 123 - for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), 124 - da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); 125 - da1 != NULL && da2 != NULL; 126 - da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), 127 - da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { 128 - if (!same_sockaddr((struct sockaddr *)&da1->da_addr, 129 - (struct sockaddr *)&da2->da_addr)) 130 - return false; 131 - } 132 - if (da1 == NULL && da2 == NULL) 133 - return true; 134 - 135 - return false; 136 - } 137 - 138 - /* 139 - * Lookup DS by addresses. nfs4_ds_cache_lock is held 140 - */ 141 - static struct nfs4_pnfs_ds * 142 - _data_server_lookup_locked(const struct list_head *dsaddrs) 143 - { 144 - struct nfs4_pnfs_ds *ds; 145 - 146 - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) 147 - if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) 148 - return ds; 149 - return NULL; 150 - } 151 - 152 - /* 153 - * Create an rpc connection to the nfs4_pnfs_ds data server 154 - * Currently only supports IPv4 and IPv6 addresses 155 - */ 156 - static int 157 - nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) 158 - { 159 - struct nfs_client *clp = ERR_PTR(-EIO); 160 - struct nfs4_pnfs_ds_addr *da; 161 - int status = 0; 162 - 163 - dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, 164 - mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); 165 - 166 - list_for_each_entry(da, &ds->ds_addrs, da_node) { 167 - dprintk("%s: DS %s: trying address %s\n", 168 - __func__, ds->ds_remotestr, da->da_remotestr); 169 - 170 - clp = nfs4_set_ds_client(mds_srv->nfs_client, 171 - (struct sockaddr *)&da->da_addr, 172 - da->da_addrlen, IPPROTO_TCP, 173 - dataserver_timeo, dataserver_retrans); 174 - if (!IS_ERR(clp)) 175 - break; 176 - } 177 - 178 - if (IS_ERR(clp)) { 179 - status = PTR_ERR(clp); 180 - goto out; 181 - } 182 - 183 - status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); 184 - if (status) 185 - goto out_put; 186 - 187 - smp_wmb(); 188 - ds->ds_clp = clp; 189 - dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); 190 - out: 191 - return status; 192 - out_put: 193 - nfs_put_client(clp); 194 - goto out; 195 - } 196 - 197 - static void 198 - destroy_ds(struct nfs4_pnfs_ds *ds) 199 - { 200 - struct nfs4_pnfs_ds_addr *da; 201 - 202 - dprintk("--> %s\n", __func__); 203 - ifdebug(FACILITY) 204 - print_ds(ds); 205 - 206 - nfs_put_client(ds->ds_clp); 207 - 208 - while (!list_empty(&ds->ds_addrs)) { 209 - da = list_first_entry(&ds->ds_addrs, 210 - struct nfs4_pnfs_ds_addr, 211 - da_node); 212 - list_del_init(&da->da_node); 213 - kfree(da->da_remotestr); 214 - kfree(da); 215 - } 216 - 217 - kfree(ds->ds_remotestr); 218 - kfree(ds); 219 - } 220 44 221 45 void 222 46 nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) ··· 51 229 52 230 for (i = 0; i < dsaddr->ds_num; i++) { 53 231 ds = dsaddr->ds_list[i]; 54 - if (ds != NULL) { 55 - if (atomic_dec_and_lock(&ds->ds_count, 56 - &nfs4_ds_cache_lock)) { 57 - list_del_init(&ds->ds_node); 58 - spin_unlock(&nfs4_ds_cache_lock); 59 - destroy_ds(ds); 60 - } 61 - } 232 + if (ds != NULL) 233 + nfs4_pnfs_ds_put(ds); 62 234 } 63 235 kfree(dsaddr->stripe_indices); 64 236 kfree(dsaddr); 65 - } 66 - 67 - /* 68 - * Create a string with a human readable address and port to avoid 69 - * complicated setup around many dprinks. 70 - */ 71 - static char * 72 - nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) 73 - { 74 - struct nfs4_pnfs_ds_addr *da; 75 - char *remotestr; 76 - size_t len; 77 - char *p; 78 - 79 - len = 3; /* '{', '}' and eol */ 80 - list_for_each_entry(da, dsaddrs, da_node) { 81 - len += strlen(da->da_remotestr) + 1; /* string plus comma */ 82 - } 83 - 84 - remotestr = kzalloc(len, gfp_flags); 85 - if (!remotestr) 86 - return NULL; 87 - 88 - p = remotestr; 89 - *(p++) = '{'; 90 - len--; 91 - list_for_each_entry(da, dsaddrs, da_node) { 92 - size_t ll = strlen(da->da_remotestr); 93 - 94 - if (ll > len) 95 - goto out_err; 96 - 97 - memcpy(p, da->da_remotestr, ll); 98 - p += ll; 99 - len -= ll; 100 - 101 - if (len < 1) 102 - goto out_err; 103 - (*p++) = ','; 104 - len--; 105 - } 106 - if (len < 2) 107 - goto out_err; 108 - *(p++) = '}'; 109 - *p = '\0'; 110 - return remotestr; 111 - out_err: 112 - kfree(remotestr); 113 - return NULL; 114 - } 115 - 116 - static struct nfs4_pnfs_ds * 117 - nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) 118 - { 119 - struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; 120 - char *remotestr; 121 - 122 - if (list_empty(dsaddrs)) { 123 - dprintk("%s: no addresses defined\n", __func__); 124 - goto out; 125 - } 126 - 127 - ds = kzalloc(sizeof(*ds), gfp_flags); 128 - if (!ds) 129 - goto out; 130 - 131 - /* this is only used for debugging, so it's ok if its NULL */ 132 - remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); 133 - 134 - spin_lock(&nfs4_ds_cache_lock); 135 - tmp_ds = _data_server_lookup_locked(dsaddrs); 136 - if (tmp_ds == NULL) { 137 - INIT_LIST_HEAD(&ds->ds_addrs); 138 - list_splice_init(dsaddrs, &ds->ds_addrs); 139 - ds->ds_remotestr = remotestr; 140 - atomic_set(&ds->ds_count, 1); 141 - INIT_LIST_HEAD(&ds->ds_node); 142 - ds->ds_clp = NULL; 143 - list_add(&ds->ds_node, &nfs4_data_server_cache); 144 - dprintk("%s add new data server %s\n", __func__, 145 - ds->ds_remotestr); 146 - } else { 147 - kfree(remotestr); 148 - kfree(ds); 149 - atomic_inc(&tmp_ds->ds_count); 150 - dprintk("%s data server %s found, inc'ed ds_count to %d\n", 151 - __func__, tmp_ds->ds_remotestr, 152 - atomic_read(&tmp_ds->ds_count)); 153 - ds = tmp_ds; 154 - } 155 - spin_unlock(&nfs4_ds_cache_lock); 156 - out: 157 - return ds; 158 - } 159 - 160 - /* 161 - * Currently only supports ipv4, ipv6 and one multi-path address. 162 - */ 163 - static struct nfs4_pnfs_ds_addr * 164 - decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) 165 - { 166 - struct nfs4_pnfs_ds_addr *da = NULL; 167 - char *buf, *portstr; 168 - __be16 port; 169 - int nlen, rlen; 170 - int tmp[2]; 171 - __be32 *p; 172 - char *netid, *match_netid; 173 - size_t len, match_netid_len; 174 - char *startsep = ""; 175 - char *endsep = ""; 176 - 177 - 178 - /* r_netid */ 179 - p = xdr_inline_decode(streamp, 4); 180 - if (unlikely(!p)) 181 - goto out_err; 182 - nlen = be32_to_cpup(p++); 183 - 184 - p = xdr_inline_decode(streamp, nlen); 185 - if (unlikely(!p)) 186 - goto out_err; 187 - 188 - netid = kmalloc(nlen+1, gfp_flags); 189 - if (unlikely(!netid)) 190 - goto out_err; 191 - 192 - netid[nlen] = '\0'; 193 - memcpy(netid, p, nlen); 194 - 195 - /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ 196 - p = xdr_inline_decode(streamp, 4); 197 - if (unlikely(!p)) 198 - goto out_free_netid; 199 - rlen = be32_to_cpup(p); 200 - 201 - p = xdr_inline_decode(streamp, rlen); 202 - if (unlikely(!p)) 203 - goto out_free_netid; 204 - 205 - /* port is ".ABC.DEF", 8 chars max */ 206 - if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { 207 - dprintk("%s: Invalid address, length %d\n", __func__, 208 - rlen); 209 - goto out_free_netid; 210 - } 211 - buf = kmalloc(rlen + 1, gfp_flags); 212 - if (!buf) { 213 - dprintk("%s: Not enough memory\n", __func__); 214 - goto out_free_netid; 215 - } 216 - buf[rlen] = '\0'; 217 - memcpy(buf, p, rlen); 218 - 219 - /* replace port '.' with '-' */ 220 - portstr = strrchr(buf, '.'); 221 - if (!portstr) { 222 - dprintk("%s: Failed finding expected dot in port\n", 223 - __func__); 224 - goto out_free_buf; 225 - } 226 - *portstr = '-'; 227 - 228 - /* find '.' between address and port */ 229 - portstr = strrchr(buf, '.'); 230 - if (!portstr) { 231 - dprintk("%s: Failed finding expected dot between address and " 232 - "port\n", __func__); 233 - goto out_free_buf; 234 - } 235 - *portstr = '\0'; 236 - 237 - da = kzalloc(sizeof(*da), gfp_flags); 238 - if (unlikely(!da)) 239 - goto out_free_buf; 240 - 241 - INIT_LIST_HEAD(&da->da_node); 242 - 243 - if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, 244 - sizeof(da->da_addr))) { 245 - dprintk("%s: error parsing address %s\n", __func__, buf); 246 - goto out_free_da; 247 - } 248 - 249 - portstr++; 250 - sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); 251 - port = htons((tmp[0] << 8) | (tmp[1])); 252 - 253 - switch (da->da_addr.ss_family) { 254 - case AF_INET: 255 - ((struct sockaddr_in *)&da->da_addr)->sin_port = port; 256 - da->da_addrlen = sizeof(struct sockaddr_in); 257 - match_netid = "tcp"; 258 - match_netid_len = 3; 259 - break; 260 - 261 - case AF_INET6: 262 - ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; 263 - da->da_addrlen = sizeof(struct sockaddr_in6); 264 - match_netid = "tcp6"; 265 - match_netid_len = 4; 266 - startsep = "["; 267 - endsep = "]"; 268 - break; 269 - 270 - default: 271 - dprintk("%s: unsupported address family: %u\n", 272 - __func__, da->da_addr.ss_family); 273 - goto out_free_da; 274 - } 275 - 276 - if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { 277 - dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", 278 - __func__, netid, match_netid); 279 - goto out_free_da; 280 - } 281 - 282 - /* save human readable address */ 283 - len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; 284 - da->da_remotestr = kzalloc(len, gfp_flags); 285 - 286 - /* NULL is ok, only used for dprintk */ 287 - if (da->da_remotestr) 288 - snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, 289 - buf, endsep, ntohs(port)); 290 - 291 - dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); 292 - kfree(buf); 293 - kfree(netid); 294 - return da; 295 - 296 - out_free_da: 297 - kfree(da); 298 - out_free_buf: 299 - dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); 300 - kfree(buf); 301 - out_free_netid: 302 - kfree(netid); 303 - out_err: 304 - return NULL; 305 237 } 306 238 307 239 /* Decode opaque device data and return the result */ ··· 160 584 161 585 mp_count = be32_to_cpup(p); /* multipath count */ 162 586 for (j = 0; j < mp_count; j++) { 163 - da = decode_ds_addr(server->nfs_client->cl_net, 164 - &stream, gfp_flags); 587 + da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, 588 + &stream, gfp_flags); 165 589 if (da) 166 590 list_add_tail(&da->da_node, &dsaddrs); 167 591 } ··· 257 681 return flseg->fh_array[i]; 258 682 } 259 683 260 - static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) 261 - { 262 - might_sleep(); 263 - wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING, 264 - nfs_wait_bit_killable, TASK_KILLABLE); 265 - } 266 - 267 - static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) 268 - { 269 - smp_mb__before_atomic(); 270 - clear_bit(NFS4DS_CONNECTING, &ds->ds_state); 271 - smp_mb__after_atomic(); 272 - wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); 273 - } 274 - 275 - 684 + /* Upon return, either ds is connected, or ds is NULL */ 276 685 struct nfs4_pnfs_ds * 277 686 nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) 278 687 { ··· 265 704 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 266 705 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); 267 706 struct nfs4_pnfs_ds *ret = ds; 707 + struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); 268 708 269 709 if (ds == NULL) { 270 710 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", 271 711 __func__, ds_idx); 272 - filelayout_mark_devid_invalid(devid); 712 + pnfs_generic_mark_devid_invalid(devid); 273 713 goto out; 274 714 } 275 715 smp_rmb(); 276 716 if (ds->ds_clp) 277 717 goto out_test_devid; 278 718 279 - if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { 280 - struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); 281 - int err; 719 + nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, 720 + dataserver_retrans, 4, 721 + s->nfs_client->cl_minorversion, 722 + s->nfs_client->cl_rpcclient->cl_auth->au_flavor); 282 723 283 - err = nfs4_ds_connect(s, ds); 284 - if (err) 285 - nfs4_mark_deviceid_unavailable(devid); 286 - nfs4_clear_ds_conn_bit(ds); 287 - } else { 288 - /* Either ds is connected, or ds is NULL */ 289 - nfs4_wait_ds_connect(ds); 290 - } 291 724 out_test_devid: 292 725 if (filelayout_test_devid_unavailable(devid)) 293 726 ret = NULL;
+5
fs/nfs/flexfilelayout/Makefile
··· 1 + # 2 + # Makefile for the pNFS Flexfile Layout Driver kernel module 3 + # 4 + obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += nfs_layout_flexfiles.o 5 + nfs_layout_flexfiles-y := flexfilelayout.o flexfilelayoutdev.o
+1574
fs/nfs/flexfilelayout/flexfilelayout.c
··· 1 + /* 2 + * Module for pnfs flexfile layout driver. 3 + * 4 + * Copyright (c) 2014, Primary Data, Inc. All rights reserved. 5 + * 6 + * Tao Peng <bergwolf@primarydata.com> 7 + */ 8 + 9 + #include <linux/nfs_fs.h> 10 + #include <linux/nfs_page.h> 11 + #include <linux/module.h> 12 + 13 + #include <linux/sunrpc/metrics.h> 14 + #include <linux/nfs_idmap.h> 15 + 16 + #include "flexfilelayout.h" 17 + #include "../nfs4session.h" 18 + #include "../internal.h" 19 + #include "../delegation.h" 20 + #include "../nfs4trace.h" 21 + #include "../iostat.h" 22 + #include "../nfs.h" 23 + 24 + #define NFSDBG_FACILITY NFSDBG_PNFS_LD 25 + 26 + #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) 27 + 28 + static struct pnfs_layout_hdr * 29 + ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) 30 + { 31 + struct nfs4_flexfile_layout *ffl; 32 + 33 + ffl = kzalloc(sizeof(*ffl), gfp_flags); 34 + if (ffl) { 35 + INIT_LIST_HEAD(&ffl->error_list); 36 + return &ffl->generic_hdr; 37 + } else 38 + return NULL; 39 + } 40 + 41 + static void 42 + ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo) 43 + { 44 + struct nfs4_ff_layout_ds_err *err, *n; 45 + 46 + list_for_each_entry_safe(err, n, &FF_LAYOUT_FROM_HDR(lo)->error_list, 47 + list) { 48 + list_del(&err->list); 49 + kfree(err); 50 + } 51 + kfree(FF_LAYOUT_FROM_HDR(lo)); 52 + } 53 + 54 + static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) 55 + { 56 + __be32 *p; 57 + 58 + p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); 59 + if (unlikely(p == NULL)) 60 + return -ENOBUFS; 61 + memcpy(stateid, p, NFS4_STATEID_SIZE); 62 + dprintk("%s: stateid id= [%x%x%x%x]\n", __func__, 63 + p[0], p[1], p[2], p[3]); 64 + return 0; 65 + } 66 + 67 + static int decode_deviceid(struct xdr_stream *xdr, struct nfs4_deviceid *devid) 68 + { 69 + __be32 *p; 70 + 71 + p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE); 72 + if (unlikely(!p)) 73 + return -ENOBUFS; 74 + memcpy(devid, p, NFS4_DEVICEID4_SIZE); 75 + nfs4_print_deviceid(devid); 76 + return 0; 77 + } 78 + 79 + static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh) 80 + { 81 + __be32 *p; 82 + 83 + p = xdr_inline_decode(xdr, 4); 84 + if (unlikely(!p)) 85 + return -ENOBUFS; 86 + fh->size = be32_to_cpup(p++); 87 + if (fh->size > sizeof(struct nfs_fh)) { 88 + printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n", 89 + fh->size); 90 + return -EOVERFLOW; 91 + } 92 + /* fh.data */ 93 + p = xdr_inline_decode(xdr, fh->size); 94 + if (unlikely(!p)) 95 + return -ENOBUFS; 96 + memcpy(&fh->data, p, fh->size); 97 + dprintk("%s: fh len %d\n", __func__, fh->size); 98 + 99 + return 0; 100 + } 101 + 102 + /* 103 + * Currently only stringified uids and gids are accepted. 104 + * I.e., kerberos is not supported to the DSes, so no pricipals. 105 + * 106 + * That means that one common function will suffice, but when 107 + * principals are added, this should be split to accomodate 108 + * calls to both nfs_map_name_to_uid() and nfs_map_group_to_gid(). 109 + */ 110 + static int 111 + decode_name(struct xdr_stream *xdr, u32 *id) 112 + { 113 + __be32 *p; 114 + int len; 115 + 116 + /* opaque_length(4)*/ 117 + p = xdr_inline_decode(xdr, 4); 118 + if (unlikely(!p)) 119 + return -ENOBUFS; 120 + len = be32_to_cpup(p++); 121 + if (len < 0) 122 + return -EINVAL; 123 + 124 + dprintk("%s: len %u\n", __func__, len); 125 + 126 + /* opaque body */ 127 + p = xdr_inline_decode(xdr, len); 128 + if (unlikely(!p)) 129 + return -ENOBUFS; 130 + 131 + if (!nfs_map_string_to_numeric((char *)p, len, id)) 132 + return -EINVAL; 133 + 134 + return 0; 135 + } 136 + 137 + static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls) 138 + { 139 + int i; 140 + 141 + if (fls->mirror_array) { 142 + for (i = 0; i < fls->mirror_array_cnt; i++) { 143 + /* normally mirror_ds is freed in 144 + * .free_deviceid_node but we still do it here 145 + * for .alloc_lseg error path */ 146 + if (fls->mirror_array[i]) { 147 + kfree(fls->mirror_array[i]->fh_versions); 148 + nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); 149 + kfree(fls->mirror_array[i]); 150 + } 151 + } 152 + kfree(fls->mirror_array); 153 + fls->mirror_array = NULL; 154 + } 155 + } 156 + 157 + static int ff_layout_check_layout(struct nfs4_layoutget_res *lgr) 158 + { 159 + int ret = 0; 160 + 161 + dprintk("--> %s\n", __func__); 162 + 163 + /* FIXME: remove this check when layout segment support is added */ 164 + if (lgr->range.offset != 0 || 165 + lgr->range.length != NFS4_MAX_UINT64) { 166 + dprintk("%s Only whole file layouts supported. Use MDS i/o\n", 167 + __func__); 168 + ret = -EINVAL; 169 + } 170 + 171 + dprintk("--> %s returns %d\n", __func__, ret); 172 + return ret; 173 + } 174 + 175 + static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) 176 + { 177 + if (fls) { 178 + ff_layout_free_mirror_array(fls); 179 + kfree(fls); 180 + } 181 + } 182 + 183 + static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) 184 + { 185 + struct nfs4_ff_layout_mirror *tmp; 186 + int i, j; 187 + 188 + for (i = 0; i < fls->mirror_array_cnt - 1; i++) { 189 + for (j = i + 1; j < fls->mirror_array_cnt; j++) 190 + if (fls->mirror_array[i]->efficiency < 191 + fls->mirror_array[j]->efficiency) { 192 + tmp = fls->mirror_array[i]; 193 + fls->mirror_array[i] = fls->mirror_array[j]; 194 + fls->mirror_array[j] = tmp; 195 + } 196 + } 197 + } 198 + 199 + static struct pnfs_layout_segment * 200 + ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, 201 + struct nfs4_layoutget_res *lgr, 202 + gfp_t gfp_flags) 203 + { 204 + struct pnfs_layout_segment *ret; 205 + struct nfs4_ff_layout_segment *fls = NULL; 206 + struct xdr_stream stream; 207 + struct xdr_buf buf; 208 + struct page *scratch; 209 + u64 stripe_unit; 210 + u32 mirror_array_cnt; 211 + __be32 *p; 212 + int i, rc; 213 + 214 + dprintk("--> %s\n", __func__); 215 + scratch = alloc_page(gfp_flags); 216 + if (!scratch) 217 + return ERR_PTR(-ENOMEM); 218 + 219 + xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, 220 + lgr->layoutp->len); 221 + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 222 + 223 + /* stripe unit and mirror_array_cnt */ 224 + rc = -EIO; 225 + p = xdr_inline_decode(&stream, 8 + 4); 226 + if (!p) 227 + goto out_err_free; 228 + 229 + p = xdr_decode_hyper(p, &stripe_unit); 230 + mirror_array_cnt = be32_to_cpup(p++); 231 + dprintk("%s: stripe_unit=%llu mirror_array_cnt=%u\n", __func__, 232 + stripe_unit, mirror_array_cnt); 233 + 234 + if (mirror_array_cnt > NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT || 235 + mirror_array_cnt == 0) 236 + goto out_err_free; 237 + 238 + rc = -ENOMEM; 239 + fls = kzalloc(sizeof(*fls), gfp_flags); 240 + if (!fls) 241 + goto out_err_free; 242 + 243 + fls->mirror_array_cnt = mirror_array_cnt; 244 + fls->stripe_unit = stripe_unit; 245 + fls->mirror_array = kcalloc(fls->mirror_array_cnt, 246 + sizeof(fls->mirror_array[0]), gfp_flags); 247 + if (fls->mirror_array == NULL) 248 + goto out_err_free; 249 + 250 + for (i = 0; i < fls->mirror_array_cnt; i++) { 251 + struct nfs4_deviceid devid; 252 + struct nfs4_deviceid_node *idnode; 253 + u32 ds_count; 254 + u32 fh_count; 255 + int j; 256 + 257 + rc = -EIO; 258 + p = xdr_inline_decode(&stream, 4); 259 + if (!p) 260 + goto out_err_free; 261 + ds_count = be32_to_cpup(p); 262 + 263 + /* FIXME: allow for striping? */ 264 + if (ds_count != 1) 265 + goto out_err_free; 266 + 267 + fls->mirror_array[i] = 268 + kzalloc(sizeof(struct nfs4_ff_layout_mirror), 269 + gfp_flags); 270 + if (fls->mirror_array[i] == NULL) { 271 + rc = -ENOMEM; 272 + goto out_err_free; 273 + } 274 + 275 + spin_lock_init(&fls->mirror_array[i]->lock); 276 + fls->mirror_array[i]->ds_count = ds_count; 277 + 278 + /* deviceid */ 279 + rc = decode_deviceid(&stream, &devid); 280 + if (rc) 281 + goto out_err_free; 282 + 283 + idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), 284 + &devid, lh->plh_lc_cred, 285 + gfp_flags); 286 + /* 287 + * upon success, mirror_ds is allocated by previous 288 + * getdeviceinfo, or newly by .alloc_deviceid_node 289 + * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure 290 + */ 291 + if (idnode) 292 + fls->mirror_array[i]->mirror_ds = 293 + FF_LAYOUT_MIRROR_DS(idnode); 294 + else 295 + goto out_err_free; 296 + 297 + /* efficiency */ 298 + rc = -EIO; 299 + p = xdr_inline_decode(&stream, 4); 300 + if (!p) 301 + goto out_err_free; 302 + fls->mirror_array[i]->efficiency = be32_to_cpup(p); 303 + 304 + /* stateid */ 305 + rc = decode_stateid(&stream, &fls->mirror_array[i]->stateid); 306 + if (rc) 307 + goto out_err_free; 308 + 309 + /* fh */ 310 + p = xdr_inline_decode(&stream, 4); 311 + if (!p) 312 + goto out_err_free; 313 + fh_count = be32_to_cpup(p); 314 + 315 + fls->mirror_array[i]->fh_versions = 316 + kzalloc(fh_count * sizeof(struct nfs_fh), 317 + gfp_flags); 318 + if (fls->mirror_array[i]->fh_versions == NULL) { 319 + rc = -ENOMEM; 320 + goto out_err_free; 321 + } 322 + 323 + for (j = 0; j < fh_count; j++) { 324 + rc = decode_nfs_fh(&stream, 325 + &fls->mirror_array[i]->fh_versions[j]); 326 + if (rc) 327 + goto out_err_free; 328 + } 329 + 330 + fls->mirror_array[i]->fh_versions_cnt = fh_count; 331 + 332 + /* user */ 333 + rc = decode_name(&stream, &fls->mirror_array[i]->uid); 334 + if (rc) 335 + goto out_err_free; 336 + 337 + /* group */ 338 + rc = decode_name(&stream, &fls->mirror_array[i]->gid); 339 + if (rc) 340 + goto out_err_free; 341 + 342 + dprintk("%s: uid %d gid %d\n", __func__, 343 + fls->mirror_array[i]->uid, 344 + fls->mirror_array[i]->gid); 345 + } 346 + 347 + ff_layout_sort_mirrors(fls); 348 + rc = ff_layout_check_layout(lgr); 349 + if (rc) 350 + goto out_err_free; 351 + 352 + ret = &fls->generic_hdr; 353 + dprintk("<-- %s (success)\n", __func__); 354 + out_free_page: 355 + __free_page(scratch); 356 + return ret; 357 + out_err_free: 358 + _ff_layout_free_lseg(fls); 359 + ret = ERR_PTR(rc); 360 + dprintk("<-- %s (%d)\n", __func__, rc); 361 + goto out_free_page; 362 + } 363 + 364 + static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) 365 + { 366 + struct pnfs_layout_segment *lseg; 367 + 368 + list_for_each_entry(lseg, &layout->plh_segs, pls_list) 369 + if (lseg->pls_range.iomode == IOMODE_RW) 370 + return true; 371 + 372 + return false; 373 + } 374 + 375 + static void 376 + ff_layout_free_lseg(struct pnfs_layout_segment *lseg) 377 + { 378 + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); 379 + int i; 380 + 381 + dprintk("--> %s\n", __func__); 382 + 383 + for (i = 0; i < fls->mirror_array_cnt; i++) { 384 + if (fls->mirror_array[i]) { 385 + nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); 386 + fls->mirror_array[i]->mirror_ds = NULL; 387 + if (fls->mirror_array[i]->cred) { 388 + put_rpccred(fls->mirror_array[i]->cred); 389 + fls->mirror_array[i]->cred = NULL; 390 + } 391 + } 392 + } 393 + 394 + if (lseg->pls_range.iomode == IOMODE_RW) { 395 + struct nfs4_flexfile_layout *ffl; 396 + struct inode *inode; 397 + 398 + ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout); 399 + inode = ffl->generic_hdr.plh_inode; 400 + spin_lock(&inode->i_lock); 401 + if (!ff_layout_has_rw_segments(lseg->pls_layout)) { 402 + ffl->commit_info.nbuckets = 0; 403 + kfree(ffl->commit_info.buckets); 404 + ffl->commit_info.buckets = NULL; 405 + } 406 + spin_unlock(&inode->i_lock); 407 + } 408 + _ff_layout_free_lseg(fls); 409 + } 410 + 411 + /* Return 1 until we have multiple lsegs support */ 412 + static int 413 + ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) 414 + { 415 + return 1; 416 + } 417 + 418 + static int 419 + ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, 420 + struct nfs_commit_info *cinfo, 421 + gfp_t gfp_flags) 422 + { 423 + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); 424 + struct pnfs_commit_bucket *buckets; 425 + int size; 426 + 427 + if (cinfo->ds->nbuckets != 0) { 428 + /* This assumes there is only one RW lseg per file. 429 + * To support multiple lseg per file, we need to 430 + * change struct pnfs_commit_bucket to allow dynamic 431 + * increasing nbuckets. 432 + */ 433 + return 0; 434 + } 435 + 436 + size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg); 437 + 438 + buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), 439 + gfp_flags); 440 + if (!buckets) 441 + return -ENOMEM; 442 + else { 443 + int i; 444 + 445 + spin_lock(cinfo->lock); 446 + if (cinfo->ds->nbuckets != 0) 447 + kfree(buckets); 448 + else { 449 + cinfo->ds->buckets = buckets; 450 + cinfo->ds->nbuckets = size; 451 + for (i = 0; i < size; i++) { 452 + INIT_LIST_HEAD(&buckets[i].written); 453 + INIT_LIST_HEAD(&buckets[i].committing); 454 + /* mark direct verifier as unset */ 455 + buckets[i].direct_verf.committed = 456 + NFS_INVALID_STABLE_HOW; 457 + } 458 + } 459 + spin_unlock(cinfo->lock); 460 + return 0; 461 + } 462 + } 463 + 464 + static struct nfs4_pnfs_ds * 465 + ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio, 466 + int *best_idx) 467 + { 468 + struct nfs4_ff_layout_segment *fls; 469 + struct nfs4_pnfs_ds *ds; 470 + int idx; 471 + 472 + fls = FF_LAYOUT_LSEG(pgio->pg_lseg); 473 + /* mirrors are sorted by efficiency */ 474 + for (idx = 0; idx < fls->mirror_array_cnt; idx++) { 475 + ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false); 476 + if (ds) { 477 + *best_idx = idx; 478 + return ds; 479 + } 480 + } 481 + 482 + return NULL; 483 + } 484 + 485 + static void 486 + ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, 487 + struct nfs_page *req) 488 + { 489 + struct nfs_pgio_mirror *pgm; 490 + struct nfs4_ff_layout_mirror *mirror; 491 + struct nfs4_pnfs_ds *ds; 492 + int ds_idx; 493 + 494 + /* Use full layout for now */ 495 + if (!pgio->pg_lseg) 496 + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 497 + req->wb_context, 498 + 0, 499 + NFS4_MAX_UINT64, 500 + IOMODE_READ, 501 + GFP_KERNEL); 502 + /* If no lseg, fall back to read through mds */ 503 + if (pgio->pg_lseg == NULL) 504 + goto out_mds; 505 + 506 + ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx); 507 + if (!ds) 508 + goto out_mds; 509 + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); 510 + 511 + pgio->pg_mirror_idx = ds_idx; 512 + 513 + /* read always uses only one mirror - idx 0 for pgio layer */ 514 + pgm = &pgio->pg_mirrors[0]; 515 + pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; 516 + 517 + return; 518 + out_mds: 519 + pnfs_put_lseg(pgio->pg_lseg); 520 + pgio->pg_lseg = NULL; 521 + nfs_pageio_reset_read_mds(pgio); 522 + } 523 + 524 + static void 525 + ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, 526 + struct nfs_page *req) 527 + { 528 + struct nfs4_ff_layout_mirror *mirror; 529 + struct nfs_pgio_mirror *pgm; 530 + struct nfs_commit_info cinfo; 531 + struct nfs4_pnfs_ds *ds; 532 + int i; 533 + int status; 534 + 535 + if (!pgio->pg_lseg) 536 + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 537 + req->wb_context, 538 + 0, 539 + NFS4_MAX_UINT64, 540 + IOMODE_RW, 541 + GFP_NOFS); 542 + /* If no lseg, fall back to write through mds */ 543 + if (pgio->pg_lseg == NULL) 544 + goto out_mds; 545 + 546 + nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); 547 + status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); 548 + if (status < 0) 549 + goto out_mds; 550 + 551 + /* Use a direct mapping of ds_idx to pgio mirror_idx */ 552 + if (WARN_ON_ONCE(pgio->pg_mirror_count != 553 + FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) 554 + goto out_mds; 555 + 556 + for (i = 0; i < pgio->pg_mirror_count; i++) { 557 + ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); 558 + if (!ds) 559 + goto out_mds; 560 + pgm = &pgio->pg_mirrors[i]; 561 + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); 562 + pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; 563 + } 564 + 565 + return; 566 + 567 + out_mds: 568 + pnfs_put_lseg(pgio->pg_lseg); 569 + pgio->pg_lseg = NULL; 570 + nfs_pageio_reset_write_mds(pgio); 571 + } 572 + 573 + static unsigned int 574 + ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, 575 + struct nfs_page *req) 576 + { 577 + if (!pgio->pg_lseg) 578 + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 579 + req->wb_context, 580 + 0, 581 + NFS4_MAX_UINT64, 582 + IOMODE_RW, 583 + GFP_NOFS); 584 + if (pgio->pg_lseg) 585 + return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); 586 + 587 + /* no lseg means that pnfs is not in use, so no mirroring here */ 588 + pnfs_put_lseg(pgio->pg_lseg); 589 + pgio->pg_lseg = NULL; 590 + nfs_pageio_reset_write_mds(pgio); 591 + return 1; 592 + } 593 + 594 + static const struct nfs_pageio_ops ff_layout_pg_read_ops = { 595 + .pg_init = ff_layout_pg_init_read, 596 + .pg_test = pnfs_generic_pg_test, 597 + .pg_doio = pnfs_generic_pg_readpages, 598 + .pg_cleanup = pnfs_generic_pg_cleanup, 599 + }; 600 + 601 + static const struct nfs_pageio_ops ff_layout_pg_write_ops = { 602 + .pg_init = ff_layout_pg_init_write, 603 + .pg_test = pnfs_generic_pg_test, 604 + .pg_doio = pnfs_generic_pg_writepages, 605 + .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, 606 + .pg_cleanup = pnfs_generic_pg_cleanup, 607 + }; 608 + 609 + static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) 610 + { 611 + struct rpc_task *task = &hdr->task; 612 + 613 + pnfs_layoutcommit_inode(hdr->inode, false); 614 + 615 + if (retry_pnfs) { 616 + dprintk("%s Reset task %5u for i/o through pNFS " 617 + "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 618 + hdr->task.tk_pid, 619 + hdr->inode->i_sb->s_id, 620 + (unsigned long long)NFS_FILEID(hdr->inode), 621 + hdr->args.count, 622 + (unsigned long long)hdr->args.offset); 623 + 624 + if (!hdr->dreq) { 625 + struct nfs_open_context *ctx; 626 + 627 + ctx = nfs_list_entry(hdr->pages.next)->wb_context; 628 + set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); 629 + hdr->completion_ops->error_cleanup(&hdr->pages); 630 + } else { 631 + nfs_direct_set_resched_writes(hdr->dreq); 632 + /* fake unstable write to let common nfs resend pages */ 633 + hdr->verf.committed = NFS_UNSTABLE; 634 + hdr->good_bytes = 0; 635 + } 636 + return; 637 + } 638 + 639 + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 640 + dprintk("%s Reset task %5u for i/o through MDS " 641 + "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 642 + hdr->task.tk_pid, 643 + hdr->inode->i_sb->s_id, 644 + (unsigned long long)NFS_FILEID(hdr->inode), 645 + hdr->args.count, 646 + (unsigned long long)hdr->args.offset); 647 + 648 + task->tk_status = pnfs_write_done_resend_to_mds(hdr); 649 + } 650 + } 651 + 652 + static void ff_layout_reset_read(struct nfs_pgio_header *hdr) 653 + { 654 + struct rpc_task *task = &hdr->task; 655 + 656 + pnfs_layoutcommit_inode(hdr->inode, false); 657 + 658 + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 659 + dprintk("%s Reset task %5u for i/o through MDS " 660 + "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 661 + hdr->task.tk_pid, 662 + hdr->inode->i_sb->s_id, 663 + (unsigned long long)NFS_FILEID(hdr->inode), 664 + hdr->args.count, 665 + (unsigned long long)hdr->args.offset); 666 + 667 + task->tk_status = pnfs_read_done_resend_to_mds(hdr); 668 + } 669 + } 670 + 671 + static int ff_layout_async_handle_error_v4(struct rpc_task *task, 672 + struct nfs4_state *state, 673 + struct nfs_client *clp, 674 + struct pnfs_layout_segment *lseg, 675 + int idx) 676 + { 677 + struct pnfs_layout_hdr *lo = lseg->pls_layout; 678 + struct inode *inode = lo->plh_inode; 679 + struct nfs_server *mds_server = NFS_SERVER(inode); 680 + 681 + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 682 + struct nfs_client *mds_client = mds_server->nfs_client; 683 + struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; 684 + 685 + if (task->tk_status >= 0) 686 + return 0; 687 + 688 + switch (task->tk_status) { 689 + /* MDS state errors */ 690 + case -NFS4ERR_DELEG_REVOKED: 691 + case -NFS4ERR_ADMIN_REVOKED: 692 + case -NFS4ERR_BAD_STATEID: 693 + if (state == NULL) 694 + break; 695 + nfs_remove_bad_delegation(state->inode); 696 + case -NFS4ERR_OPENMODE: 697 + if (state == NULL) 698 + break; 699 + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) 700 + goto out_bad_stateid; 701 + goto wait_on_recovery; 702 + case -NFS4ERR_EXPIRED: 703 + if (state != NULL) { 704 + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) 705 + goto out_bad_stateid; 706 + } 707 + nfs4_schedule_lease_recovery(mds_client); 708 + goto wait_on_recovery; 709 + /* DS session errors */ 710 + case -NFS4ERR_BADSESSION: 711 + case -NFS4ERR_BADSLOT: 712 + case -NFS4ERR_BAD_HIGH_SLOT: 713 + case -NFS4ERR_DEADSESSION: 714 + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 715 + case -NFS4ERR_SEQ_FALSE_RETRY: 716 + case -NFS4ERR_SEQ_MISORDERED: 717 + dprintk("%s ERROR %d, Reset session. Exchangeid " 718 + "flags 0x%x\n", __func__, task->tk_status, 719 + clp->cl_exchange_flags); 720 + nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); 721 + break; 722 + case -NFS4ERR_DELAY: 723 + case -NFS4ERR_GRACE: 724 + rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); 725 + break; 726 + case -NFS4ERR_RETRY_UNCACHED_REP: 727 + break; 728 + /* Invalidate Layout errors */ 729 + case -NFS4ERR_PNFS_NO_LAYOUT: 730 + case -ESTALE: /* mapped NFS4ERR_STALE */ 731 + case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ 732 + case -EISDIR: /* mapped NFS4ERR_ISDIR */ 733 + case -NFS4ERR_FHEXPIRED: 734 + case -NFS4ERR_WRONG_TYPE: 735 + dprintk("%s Invalid layout error %d\n", __func__, 736 + task->tk_status); 737 + /* 738 + * Destroy layout so new i/o will get a new layout. 739 + * Layout will not be destroyed until all current lseg 740 + * references are put. Mark layout as invalid to resend failed 741 + * i/o and all i/o waiting on the slot table to the MDS until 742 + * layout is destroyed and a new valid layout is obtained. 743 + */ 744 + pnfs_destroy_layout(NFS_I(inode)); 745 + rpc_wake_up(&tbl->slot_tbl_waitq); 746 + goto reset; 747 + /* RPC connection errors */ 748 + case -ECONNREFUSED: 749 + case -EHOSTDOWN: 750 + case -EHOSTUNREACH: 751 + case -ENETUNREACH: 752 + case -EIO: 753 + case -ETIMEDOUT: 754 + case -EPIPE: 755 + dprintk("%s DS connection error %d\n", __func__, 756 + task->tk_status); 757 + nfs4_mark_deviceid_unavailable(devid); 758 + rpc_wake_up(&tbl->slot_tbl_waitq); 759 + /* fall through */ 760 + default: 761 + if (ff_layout_has_available_ds(lseg)) 762 + return -NFS4ERR_RESET_TO_PNFS; 763 + reset: 764 + dprintk("%s Retry through MDS. Error %d\n", __func__, 765 + task->tk_status); 766 + return -NFS4ERR_RESET_TO_MDS; 767 + } 768 + out: 769 + task->tk_status = 0; 770 + return -EAGAIN; 771 + out_bad_stateid: 772 + task->tk_status = -EIO; 773 + return 0; 774 + wait_on_recovery: 775 + rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); 776 + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) 777 + rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); 778 + goto out; 779 + } 780 + 781 + /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ 782 + static int ff_layout_async_handle_error_v3(struct rpc_task *task, 783 + struct pnfs_layout_segment *lseg, 784 + int idx) 785 + { 786 + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 787 + 788 + if (task->tk_status >= 0) 789 + return 0; 790 + 791 + if (task->tk_status != -EJUKEBOX) { 792 + dprintk("%s DS connection error %d\n", __func__, 793 + task->tk_status); 794 + nfs4_mark_deviceid_unavailable(devid); 795 + if (ff_layout_has_available_ds(lseg)) 796 + return -NFS4ERR_RESET_TO_PNFS; 797 + else 798 + return -NFS4ERR_RESET_TO_MDS; 799 + } 800 + 801 + if (task->tk_status == -EJUKEBOX) 802 + nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); 803 + task->tk_status = 0; 804 + rpc_restart_call(task); 805 + rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); 806 + return -EAGAIN; 807 + } 808 + 809 + static int ff_layout_async_handle_error(struct rpc_task *task, 810 + struct nfs4_state *state, 811 + struct nfs_client *clp, 812 + struct pnfs_layout_segment *lseg, 813 + int idx) 814 + { 815 + int vers = clp->cl_nfs_mod->rpc_vers->number; 816 + 817 + switch (vers) { 818 + case 3: 819 + return ff_layout_async_handle_error_v3(task, lseg, idx); 820 + case 4: 821 + return ff_layout_async_handle_error_v4(task, state, clp, 822 + lseg, idx); 823 + default: 824 + /* should never happen */ 825 + WARN_ON_ONCE(1); 826 + return 0; 827 + } 828 + } 829 + 830 + static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, 831 + int idx, u64 offset, u64 length, 832 + u32 status, int opnum) 833 + { 834 + struct nfs4_ff_layout_mirror *mirror; 835 + int err; 836 + 837 + mirror = FF_LAYOUT_COMP(lseg, idx); 838 + err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 839 + mirror, offset, length, status, opnum, 840 + GFP_NOIO); 841 + dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status); 842 + } 843 + 844 + /* NFS_PROTO call done callback routines */ 845 + 846 + static int ff_layout_read_done_cb(struct rpc_task *task, 847 + struct nfs_pgio_header *hdr) 848 + { 849 + struct inode *inode; 850 + int err; 851 + 852 + trace_nfs4_pnfs_read(hdr, task->tk_status); 853 + if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) 854 + hdr->res.op_status = NFS4ERR_NXIO; 855 + if (task->tk_status < 0 && hdr->res.op_status) 856 + ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, 857 + hdr->args.offset, hdr->args.count, 858 + hdr->res.op_status, OP_READ); 859 + err = ff_layout_async_handle_error(task, hdr->args.context->state, 860 + hdr->ds_clp, hdr->lseg, 861 + hdr->pgio_mirror_idx); 862 + 863 + switch (err) { 864 + case -NFS4ERR_RESET_TO_PNFS: 865 + set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 866 + &hdr->lseg->pls_layout->plh_flags); 867 + pnfs_read_resend_pnfs(hdr); 868 + return task->tk_status; 869 + case -NFS4ERR_RESET_TO_MDS: 870 + inode = hdr->lseg->pls_layout->plh_inode; 871 + pnfs_error_mark_layout_for_return(inode, hdr->lseg); 872 + ff_layout_reset_read(hdr); 873 + return task->tk_status; 874 + case -EAGAIN: 875 + rpc_restart_call_prepare(task); 876 + return -EAGAIN; 877 + } 878 + 879 + return 0; 880 + } 881 + 882 + /* 883 + * We reference the rpc_cred of the first WRITE that triggers the need for 884 + * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. 885 + * rfc5661 is not clear about which credential should be used. 886 + * 887 + * Flexlayout client should treat DS replied FILE_SYNC as DATA_SYNC, so 888 + * to follow http://www.rfc-editor.org/errata_search.php?rfc=5661&eid=2751 889 + * we always send layoutcommit after DS writes. 890 + */ 891 + static void 892 + ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) 893 + { 894 + pnfs_set_layoutcommit(hdr); 895 + dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, 896 + (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 897 + } 898 + 899 + static bool 900 + ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) 901 + { 902 + /* No mirroring for now */ 903 + struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); 904 + 905 + return ff_layout_test_devid_unavailable(node); 906 + } 907 + 908 + static int ff_layout_read_prepare_common(struct rpc_task *task, 909 + struct nfs_pgio_header *hdr) 910 + { 911 + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 912 + rpc_exit(task, -EIO); 913 + return -EIO; 914 + } 915 + if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { 916 + dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 917 + if (ff_layout_has_available_ds(hdr->lseg)) 918 + pnfs_read_resend_pnfs(hdr); 919 + else 920 + ff_layout_reset_read(hdr); 921 + rpc_exit(task, 0); 922 + return -EAGAIN; 923 + } 924 + hdr->pgio_done_cb = ff_layout_read_done_cb; 925 + 926 + return 0; 927 + } 928 + 929 + /* 930 + * Call ops for the async read/write cases 931 + * In the case of dense layouts, the offset needs to be reset to its 932 + * original value. 933 + */ 934 + static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data) 935 + { 936 + struct nfs_pgio_header *hdr = data; 937 + 938 + if (ff_layout_read_prepare_common(task, hdr)) 939 + return; 940 + 941 + rpc_call_start(task); 942 + } 943 + 944 + static int ff_layout_setup_sequence(struct nfs_client *ds_clp, 945 + struct nfs4_sequence_args *args, 946 + struct nfs4_sequence_res *res, 947 + struct rpc_task *task) 948 + { 949 + if (ds_clp->cl_session) 950 + return nfs41_setup_sequence(ds_clp->cl_session, 951 + args, 952 + res, 953 + task); 954 + return nfs40_setup_sequence(ds_clp->cl_slot_tbl, 955 + args, 956 + res, 957 + task); 958 + } 959 + 960 + static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) 961 + { 962 + struct nfs_pgio_header *hdr = data; 963 + 964 + if (ff_layout_read_prepare_common(task, hdr)) 965 + return; 966 + 967 + if (ff_layout_setup_sequence(hdr->ds_clp, 968 + &hdr->args.seq_args, 969 + &hdr->res.seq_res, 970 + task)) 971 + return; 972 + 973 + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, 974 + hdr->args.lock_context, FMODE_READ) == -EIO) 975 + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 976 + } 977 + 978 + static void ff_layout_read_call_done(struct rpc_task *task, void *data) 979 + { 980 + struct nfs_pgio_header *hdr = data; 981 + 982 + dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 983 + 984 + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 985 + task->tk_status == 0) { 986 + nfs4_sequence_done(task, &hdr->res.seq_res); 987 + return; 988 + } 989 + 990 + /* Note this may cause RPC to be resent */ 991 + hdr->mds_ops->rpc_call_done(task, hdr); 992 + } 993 + 994 + static void ff_layout_read_count_stats(struct rpc_task *task, void *data) 995 + { 996 + struct nfs_pgio_header *hdr = data; 997 + 998 + rpc_count_iostats_metrics(task, 999 + &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); 1000 + } 1001 + 1002 + static int ff_layout_write_done_cb(struct rpc_task *task, 1003 + struct nfs_pgio_header *hdr) 1004 + { 1005 + struct inode *inode; 1006 + int err; 1007 + 1008 + trace_nfs4_pnfs_write(hdr, task->tk_status); 1009 + if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) 1010 + hdr->res.op_status = NFS4ERR_NXIO; 1011 + if (task->tk_status < 0 && hdr->res.op_status) 1012 + ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, 1013 + hdr->args.offset, hdr->args.count, 1014 + hdr->res.op_status, OP_WRITE); 1015 + err = ff_layout_async_handle_error(task, hdr->args.context->state, 1016 + hdr->ds_clp, hdr->lseg, 1017 + hdr->pgio_mirror_idx); 1018 + 1019 + switch (err) { 1020 + case -NFS4ERR_RESET_TO_PNFS: 1021 + case -NFS4ERR_RESET_TO_MDS: 1022 + inode = hdr->lseg->pls_layout->plh_inode; 1023 + pnfs_error_mark_layout_for_return(inode, hdr->lseg); 1024 + if (err == -NFS4ERR_RESET_TO_PNFS) { 1025 + pnfs_set_retry_layoutget(hdr->lseg->pls_layout); 1026 + ff_layout_reset_write(hdr, true); 1027 + } else { 1028 + pnfs_clear_retry_layoutget(hdr->lseg->pls_layout); 1029 + ff_layout_reset_write(hdr, false); 1030 + } 1031 + return task->tk_status; 1032 + case -EAGAIN: 1033 + rpc_restart_call_prepare(task); 1034 + return -EAGAIN; 1035 + } 1036 + 1037 + if (hdr->res.verf->committed == NFS_FILE_SYNC || 1038 + hdr->res.verf->committed == NFS_DATA_SYNC) 1039 + ff_layout_set_layoutcommit(hdr); 1040 + 1041 + return 0; 1042 + } 1043 + 1044 + static int ff_layout_commit_done_cb(struct rpc_task *task, 1045 + struct nfs_commit_data *data) 1046 + { 1047 + struct inode *inode; 1048 + int err; 1049 + 1050 + trace_nfs4_pnfs_commit_ds(data, task->tk_status); 1051 + if (task->tk_status == -ETIMEDOUT && !data->res.op_status) 1052 + data->res.op_status = NFS4ERR_NXIO; 1053 + if (task->tk_status < 0 && data->res.op_status) 1054 + ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, 1055 + data->args.offset, data->args.count, 1056 + data->res.op_status, OP_COMMIT); 1057 + err = ff_layout_async_handle_error(task, NULL, data->ds_clp, 1058 + data->lseg, data->ds_commit_index); 1059 + 1060 + switch (err) { 1061 + case -NFS4ERR_RESET_TO_PNFS: 1062 + case -NFS4ERR_RESET_TO_MDS: 1063 + inode = data->lseg->pls_layout->plh_inode; 1064 + pnfs_error_mark_layout_for_return(inode, data->lseg); 1065 + if (err == -NFS4ERR_RESET_TO_PNFS) 1066 + pnfs_set_retry_layoutget(data->lseg->pls_layout); 1067 + else 1068 + pnfs_clear_retry_layoutget(data->lseg->pls_layout); 1069 + pnfs_generic_prepare_to_resend_writes(data); 1070 + return -EAGAIN; 1071 + case -EAGAIN: 1072 + rpc_restart_call_prepare(task); 1073 + return -EAGAIN; 1074 + } 1075 + 1076 + if (data->verf.committed == NFS_UNSTABLE) 1077 + pnfs_commit_set_layoutcommit(data); 1078 + 1079 + return 0; 1080 + } 1081 + 1082 + static int ff_layout_write_prepare_common(struct rpc_task *task, 1083 + struct nfs_pgio_header *hdr) 1084 + { 1085 + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 1086 + rpc_exit(task, -EIO); 1087 + return -EIO; 1088 + } 1089 + 1090 + if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { 1091 + bool retry_pnfs; 1092 + 1093 + retry_pnfs = ff_layout_has_available_ds(hdr->lseg); 1094 + dprintk("%s task %u reset io to %s\n", __func__, 1095 + task->tk_pid, retry_pnfs ? "pNFS" : "MDS"); 1096 + ff_layout_reset_write(hdr, retry_pnfs); 1097 + rpc_exit(task, 0); 1098 + return -EAGAIN; 1099 + } 1100 + 1101 + return 0; 1102 + } 1103 + 1104 + static void ff_layout_write_prepare_v3(struct rpc_task *task, void *data) 1105 + { 1106 + struct nfs_pgio_header *hdr = data; 1107 + 1108 + if (ff_layout_write_prepare_common(task, hdr)) 1109 + return; 1110 + 1111 + rpc_call_start(task); 1112 + } 1113 + 1114 + static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) 1115 + { 1116 + struct nfs_pgio_header *hdr = data; 1117 + 1118 + if (ff_layout_write_prepare_common(task, hdr)) 1119 + return; 1120 + 1121 + if (ff_layout_setup_sequence(hdr->ds_clp, 1122 + &hdr->args.seq_args, 1123 + &hdr->res.seq_res, 1124 + task)) 1125 + return; 1126 + 1127 + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, 1128 + hdr->args.lock_context, FMODE_WRITE) == -EIO) 1129 + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 1130 + } 1131 + 1132 + static void ff_layout_write_call_done(struct rpc_task *task, void *data) 1133 + { 1134 + struct nfs_pgio_header *hdr = data; 1135 + 1136 + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 1137 + task->tk_status == 0) { 1138 + nfs4_sequence_done(task, &hdr->res.seq_res); 1139 + return; 1140 + } 1141 + 1142 + /* Note this may cause RPC to be resent */ 1143 + hdr->mds_ops->rpc_call_done(task, hdr); 1144 + } 1145 + 1146 + static void ff_layout_write_count_stats(struct rpc_task *task, void *data) 1147 + { 1148 + struct nfs_pgio_header *hdr = data; 1149 + 1150 + rpc_count_iostats_metrics(task, 1151 + &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); 1152 + } 1153 + 1154 + static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) 1155 + { 1156 + rpc_call_start(task); 1157 + } 1158 + 1159 + static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) 1160 + { 1161 + struct nfs_commit_data *wdata = data; 1162 + 1163 + ff_layout_setup_sequence(wdata->ds_clp, 1164 + &wdata->args.seq_args, 1165 + &wdata->res.seq_res, 1166 + task); 1167 + } 1168 + 1169 + static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) 1170 + { 1171 + struct nfs_commit_data *cdata = data; 1172 + 1173 + rpc_count_iostats_metrics(task, 1174 + &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); 1175 + } 1176 + 1177 + static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { 1178 + .rpc_call_prepare = ff_layout_read_prepare_v3, 1179 + .rpc_call_done = ff_layout_read_call_done, 1180 + .rpc_count_stats = ff_layout_read_count_stats, 1181 + .rpc_release = pnfs_generic_rw_release, 1182 + }; 1183 + 1184 + static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { 1185 + .rpc_call_prepare = ff_layout_read_prepare_v4, 1186 + .rpc_call_done = ff_layout_read_call_done, 1187 + .rpc_count_stats = ff_layout_read_count_stats, 1188 + .rpc_release = pnfs_generic_rw_release, 1189 + }; 1190 + 1191 + static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { 1192 + .rpc_call_prepare = ff_layout_write_prepare_v3, 1193 + .rpc_call_done = ff_layout_write_call_done, 1194 + .rpc_count_stats = ff_layout_write_count_stats, 1195 + .rpc_release = pnfs_generic_rw_release, 1196 + }; 1197 + 1198 + static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { 1199 + .rpc_call_prepare = ff_layout_write_prepare_v4, 1200 + .rpc_call_done = ff_layout_write_call_done, 1201 + .rpc_count_stats = ff_layout_write_count_stats, 1202 + .rpc_release = pnfs_generic_rw_release, 1203 + }; 1204 + 1205 + static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { 1206 + .rpc_call_prepare = ff_layout_commit_prepare_v3, 1207 + .rpc_call_done = pnfs_generic_write_commit_done, 1208 + .rpc_count_stats = ff_layout_commit_count_stats, 1209 + .rpc_release = pnfs_generic_commit_release, 1210 + }; 1211 + 1212 + static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { 1213 + .rpc_call_prepare = ff_layout_commit_prepare_v4, 1214 + .rpc_call_done = pnfs_generic_write_commit_done, 1215 + .rpc_count_stats = ff_layout_commit_count_stats, 1216 + .rpc_release = pnfs_generic_commit_release, 1217 + }; 1218 + 1219 + static enum pnfs_try_status 1220 + ff_layout_read_pagelist(struct nfs_pgio_header *hdr) 1221 + { 1222 + struct pnfs_layout_segment *lseg = hdr->lseg; 1223 + struct nfs4_pnfs_ds *ds; 1224 + struct rpc_clnt *ds_clnt; 1225 + struct rpc_cred *ds_cred; 1226 + loff_t offset = hdr->args.offset; 1227 + u32 idx = hdr->pgio_mirror_idx; 1228 + int vers; 1229 + struct nfs_fh *fh; 1230 + 1231 + dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 1232 + __func__, hdr->inode->i_ino, 1233 + hdr->args.pgbase, (size_t)hdr->args.count, offset); 1234 + 1235 + ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); 1236 + if (!ds) 1237 + goto out_failed; 1238 + 1239 + ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, 1240 + hdr->inode); 1241 + if (IS_ERR(ds_clnt)) 1242 + goto out_failed; 1243 + 1244 + ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); 1245 + if (IS_ERR(ds_cred)) 1246 + goto out_failed; 1247 + 1248 + vers = nfs4_ff_layout_ds_version(lseg, idx); 1249 + 1250 + dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, 1251 + ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); 1252 + 1253 + atomic_inc(&ds->ds_clp->cl_count); 1254 + hdr->ds_clp = ds->ds_clp; 1255 + fh = nfs4_ff_layout_select_ds_fh(lseg, idx); 1256 + if (fh) 1257 + hdr->args.fh = fh; 1258 + 1259 + /* 1260 + * Note that if we ever decide to split across DSes, 1261 + * then we may need to handle dense-like offsets. 1262 + */ 1263 + hdr->args.offset = offset; 1264 + hdr->mds_offset = offset; 1265 + 1266 + /* Perform an asynchronous read to ds */ 1267 + nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, 1268 + vers == 3 ? &ff_layout_read_call_ops_v3 : 1269 + &ff_layout_read_call_ops_v4, 1270 + 0, RPC_TASK_SOFTCONN); 1271 + 1272 + return PNFS_ATTEMPTED; 1273 + 1274 + out_failed: 1275 + if (ff_layout_has_available_ds(lseg)) 1276 + return PNFS_TRY_AGAIN; 1277 + return PNFS_NOT_ATTEMPTED; 1278 + } 1279 + 1280 + /* Perform async writes. */ 1281 + static enum pnfs_try_status 1282 + ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) 1283 + { 1284 + struct pnfs_layout_segment *lseg = hdr->lseg; 1285 + struct nfs4_pnfs_ds *ds; 1286 + struct rpc_clnt *ds_clnt; 1287 + struct rpc_cred *ds_cred; 1288 + loff_t offset = hdr->args.offset; 1289 + int vers; 1290 + struct nfs_fh *fh; 1291 + int idx = hdr->pgio_mirror_idx; 1292 + 1293 + ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); 1294 + if (!ds) 1295 + return PNFS_NOT_ATTEMPTED; 1296 + 1297 + ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, 1298 + hdr->inode); 1299 + if (IS_ERR(ds_clnt)) 1300 + return PNFS_NOT_ATTEMPTED; 1301 + 1302 + ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); 1303 + if (IS_ERR(ds_cred)) 1304 + return PNFS_NOT_ATTEMPTED; 1305 + 1306 + vers = nfs4_ff_layout_ds_version(lseg, idx); 1307 + 1308 + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n", 1309 + __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, 1310 + offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), 1311 + vers); 1312 + 1313 + hdr->pgio_done_cb = ff_layout_write_done_cb; 1314 + atomic_inc(&ds->ds_clp->cl_count); 1315 + hdr->ds_clp = ds->ds_clp; 1316 + hdr->ds_commit_idx = idx; 1317 + fh = nfs4_ff_layout_select_ds_fh(lseg, idx); 1318 + if (fh) 1319 + hdr->args.fh = fh; 1320 + 1321 + /* 1322 + * Note that if we ever decide to split across DSes, 1323 + * then we may need to handle dense-like offsets. 1324 + */ 1325 + hdr->args.offset = offset; 1326 + 1327 + /* Perform an asynchronous write */ 1328 + nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, 1329 + vers == 3 ? &ff_layout_write_call_ops_v3 : 1330 + &ff_layout_write_call_ops_v4, 1331 + sync, RPC_TASK_SOFTCONN); 1332 + return PNFS_ATTEMPTED; 1333 + } 1334 + 1335 + static void 1336 + ff_layout_mark_request_commit(struct nfs_page *req, 1337 + struct pnfs_layout_segment *lseg, 1338 + struct nfs_commit_info *cinfo, 1339 + u32 ds_commit_idx) 1340 + { 1341 + struct list_head *list; 1342 + struct pnfs_commit_bucket *buckets; 1343 + 1344 + spin_lock(cinfo->lock); 1345 + buckets = cinfo->ds->buckets; 1346 + list = &buckets[ds_commit_idx].written; 1347 + if (list_empty(list)) { 1348 + /* Non-empty buckets hold a reference on the lseg. That ref 1349 + * is normally transferred to the COMMIT call and released 1350 + * there. It could also be released if the last req is pulled 1351 + * off due to a rewrite, in which case it will be done in 1352 + * pnfs_common_clear_request_commit 1353 + */ 1354 + WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL); 1355 + buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg); 1356 + } 1357 + set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1358 + cinfo->ds->nwritten++; 1359 + 1360 + /* nfs_request_add_commit_list(). We need to add req to list without 1361 + * dropping cinfo lock. 1362 + */ 1363 + set_bit(PG_CLEAN, &(req)->wb_flags); 1364 + nfs_list_add_request(req, list); 1365 + cinfo->mds->ncommit++; 1366 + spin_unlock(cinfo->lock); 1367 + if (!cinfo->dreq) { 1368 + inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1369 + inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, 1370 + BDI_RECLAIMABLE); 1371 + __mark_inode_dirty(req->wb_context->dentry->d_inode, 1372 + I_DIRTY_DATASYNC); 1373 + } 1374 + } 1375 + 1376 + static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1377 + { 1378 + return i; 1379 + } 1380 + 1381 + static struct nfs_fh * 1382 + select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1383 + { 1384 + struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); 1385 + 1386 + /* FIXME: Assume that there is only one NFS version available 1387 + * for the DS. 1388 + */ 1389 + return &flseg->mirror_array[i]->fh_versions[0]; 1390 + } 1391 + 1392 + static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) 1393 + { 1394 + struct pnfs_layout_segment *lseg = data->lseg; 1395 + struct nfs4_pnfs_ds *ds; 1396 + struct rpc_clnt *ds_clnt; 1397 + struct rpc_cred *ds_cred; 1398 + u32 idx; 1399 + int vers; 1400 + struct nfs_fh *fh; 1401 + 1402 + idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 1403 + ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); 1404 + if (!ds) 1405 + goto out_err; 1406 + 1407 + ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, 1408 + data->inode); 1409 + if (IS_ERR(ds_clnt)) 1410 + goto out_err; 1411 + 1412 + ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred); 1413 + if (IS_ERR(ds_cred)) 1414 + goto out_err; 1415 + 1416 + vers = nfs4_ff_layout_ds_version(lseg, idx); 1417 + 1418 + dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, 1419 + data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count), 1420 + vers); 1421 + data->commit_done_cb = ff_layout_commit_done_cb; 1422 + data->cred = ds_cred; 1423 + atomic_inc(&ds->ds_clp->cl_count); 1424 + data->ds_clp = ds->ds_clp; 1425 + fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 1426 + if (fh) 1427 + data->args.fh = fh; 1428 + return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, 1429 + vers == 3 ? &ff_layout_commit_call_ops_v3 : 1430 + &ff_layout_commit_call_ops_v4, 1431 + how, RPC_TASK_SOFTCONN); 1432 + out_err: 1433 + pnfs_generic_prepare_to_resend_writes(data); 1434 + pnfs_generic_commit_release(data); 1435 + return -EAGAIN; 1436 + } 1437 + 1438 + static int 1439 + ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 1440 + int how, struct nfs_commit_info *cinfo) 1441 + { 1442 + return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, 1443 + ff_layout_initiate_commit); 1444 + } 1445 + 1446 + static struct pnfs_ds_commit_info * 1447 + ff_layout_get_ds_info(struct inode *inode) 1448 + { 1449 + struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; 1450 + 1451 + if (layout == NULL) 1452 + return NULL; 1453 + 1454 + return &FF_LAYOUT_FROM_HDR(layout)->commit_info; 1455 + } 1456 + 1457 + static void 1458 + ff_layout_free_deveiceid_node(struct nfs4_deviceid_node *d) 1459 + { 1460 + nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, 1461 + id_node)); 1462 + } 1463 + 1464 + static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, 1465 + struct xdr_stream *xdr, 1466 + const struct nfs4_layoutreturn_args *args) 1467 + { 1468 + struct pnfs_layout_hdr *hdr = &flo->generic_hdr; 1469 + __be32 *start; 1470 + int count = 0, ret = 0; 1471 + 1472 + start = xdr_reserve_space(xdr, 4); 1473 + if (unlikely(!start)) 1474 + return -E2BIG; 1475 + 1476 + /* This assume we always return _ALL_ layouts */ 1477 + spin_lock(&hdr->plh_inode->i_lock); 1478 + ret = ff_layout_encode_ds_ioerr(flo, xdr, &count, &args->range); 1479 + spin_unlock(&hdr->plh_inode->i_lock); 1480 + 1481 + *start = cpu_to_be32(count); 1482 + 1483 + return ret; 1484 + } 1485 + 1486 + /* report nothing for now */ 1487 + static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo, 1488 + struct xdr_stream *xdr, 1489 + const struct nfs4_layoutreturn_args *args) 1490 + { 1491 + __be32 *p; 1492 + 1493 + p = xdr_reserve_space(xdr, 4); 1494 + if (likely(p)) 1495 + *p = cpu_to_be32(0); 1496 + } 1497 + 1498 + static struct nfs4_deviceid_node * 1499 + ff_layout_alloc_deviceid_node(struct nfs_server *server, 1500 + struct pnfs_device *pdev, gfp_t gfp_flags) 1501 + { 1502 + struct nfs4_ff_layout_ds *dsaddr; 1503 + 1504 + dsaddr = nfs4_ff_alloc_deviceid_node(server, pdev, gfp_flags); 1505 + if (!dsaddr) 1506 + return NULL; 1507 + return &dsaddr->id_node; 1508 + } 1509 + 1510 + static void 1511 + ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, 1512 + struct xdr_stream *xdr, 1513 + const struct nfs4_layoutreturn_args *args) 1514 + { 1515 + struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); 1516 + __be32 *start; 1517 + 1518 + dprintk("%s: Begin\n", __func__); 1519 + start = xdr_reserve_space(xdr, 4); 1520 + BUG_ON(!start); 1521 + 1522 + if (ff_layout_encode_ioerr(flo, xdr, args)) 1523 + goto out; 1524 + 1525 + ff_layout_encode_iostats(flo, xdr, args); 1526 + out: 1527 + *start = cpu_to_be32((xdr->p - start - 1) * 4); 1528 + dprintk("%s: Return\n", __func__); 1529 + } 1530 + 1531 + static struct pnfs_layoutdriver_type flexfilelayout_type = { 1532 + .id = LAYOUT_FLEX_FILES, 1533 + .name = "LAYOUT_FLEX_FILES", 1534 + .owner = THIS_MODULE, 1535 + .alloc_layout_hdr = ff_layout_alloc_layout_hdr, 1536 + .free_layout_hdr = ff_layout_free_layout_hdr, 1537 + .alloc_lseg = ff_layout_alloc_lseg, 1538 + .free_lseg = ff_layout_free_lseg, 1539 + .pg_read_ops = &ff_layout_pg_read_ops, 1540 + .pg_write_ops = &ff_layout_pg_write_ops, 1541 + .get_ds_info = ff_layout_get_ds_info, 1542 + .free_deviceid_node = ff_layout_free_deveiceid_node, 1543 + .mark_request_commit = ff_layout_mark_request_commit, 1544 + .clear_request_commit = pnfs_generic_clear_request_commit, 1545 + .scan_commit_lists = pnfs_generic_scan_commit_lists, 1546 + .recover_commit_reqs = pnfs_generic_recover_commit_reqs, 1547 + .commit_pagelist = ff_layout_commit_pagelist, 1548 + .read_pagelist = ff_layout_read_pagelist, 1549 + .write_pagelist = ff_layout_write_pagelist, 1550 + .alloc_deviceid_node = ff_layout_alloc_deviceid_node, 1551 + .encode_layoutreturn = ff_layout_encode_layoutreturn, 1552 + }; 1553 + 1554 + static int __init nfs4flexfilelayout_init(void) 1555 + { 1556 + printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n", 1557 + __func__); 1558 + return pnfs_register_layoutdriver(&flexfilelayout_type); 1559 + } 1560 + 1561 + static void __exit nfs4flexfilelayout_exit(void) 1562 + { 1563 + printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n", 1564 + __func__); 1565 + pnfs_unregister_layoutdriver(&flexfilelayout_type); 1566 + } 1567 + 1568 + MODULE_ALIAS("nfs-layouttype4-4"); 1569 + 1570 + MODULE_LICENSE("GPL"); 1571 + MODULE_DESCRIPTION("The NFSv4 flexfile layout driver"); 1572 + 1573 + module_init(nfs4flexfilelayout_init); 1574 + module_exit(nfs4flexfilelayout_exit);
+155
fs/nfs/flexfilelayout/flexfilelayout.h
··· 1 + /* 2 + * NFSv4 flexfile layout driver data structures. 3 + * 4 + * Copyright (c) 2014, Primary Data, Inc. All rights reserved. 5 + * 6 + * Tao Peng <bergwolf@primarydata.com> 7 + */ 8 + 9 + #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H 10 + #define FS_NFS_NFS4FLEXFILELAYOUT_H 11 + 12 + #include "../pnfs.h" 13 + 14 + /* XXX: Let's filter out insanely large mirror count for now to avoid oom 15 + * due to network error etc. */ 16 + #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 17 + 18 + struct nfs4_ff_ds_version { 19 + u32 version; 20 + u32 minor_version; 21 + u32 rsize; 22 + u32 wsize; 23 + bool tightly_coupled; 24 + }; 25 + 26 + /* chained in global deviceid hlist */ 27 + struct nfs4_ff_layout_ds { 28 + struct nfs4_deviceid_node id_node; 29 + u32 ds_versions_cnt; 30 + struct nfs4_ff_ds_version *ds_versions; 31 + struct nfs4_pnfs_ds *ds; 32 + }; 33 + 34 + struct nfs4_ff_layout_ds_err { 35 + struct list_head list; /* linked in mirror error_list */ 36 + u64 offset; 37 + u64 length; 38 + int status; 39 + enum nfs_opnum4 opnum; 40 + nfs4_stateid stateid; 41 + struct nfs4_deviceid deviceid; 42 + }; 43 + 44 + struct nfs4_ff_layout_mirror { 45 + u32 ds_count; 46 + u32 efficiency; 47 + struct nfs4_ff_layout_ds *mirror_ds; 48 + u32 fh_versions_cnt; 49 + struct nfs_fh *fh_versions; 50 + nfs4_stateid stateid; 51 + struct nfs4_string user_name; 52 + struct nfs4_string group_name; 53 + u32 uid; 54 + u32 gid; 55 + struct rpc_cred *cred; 56 + spinlock_t lock; 57 + }; 58 + 59 + struct nfs4_ff_layout_segment { 60 + struct pnfs_layout_segment generic_hdr; 61 + u64 stripe_unit; 62 + u32 mirror_array_cnt; 63 + struct nfs4_ff_layout_mirror **mirror_array; 64 + }; 65 + 66 + struct nfs4_flexfile_layout { 67 + struct pnfs_layout_hdr generic_hdr; 68 + struct pnfs_ds_commit_info commit_info; 69 + struct list_head error_list; /* nfs4_ff_layout_ds_err */ 70 + }; 71 + 72 + static inline struct nfs4_flexfile_layout * 73 + FF_LAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) 74 + { 75 + return container_of(lo, struct nfs4_flexfile_layout, generic_hdr); 76 + } 77 + 78 + static inline struct nfs4_ff_layout_segment * 79 + FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg) 80 + { 81 + return container_of(lseg, 82 + struct nfs4_ff_layout_segment, 83 + generic_hdr); 84 + } 85 + 86 + static inline struct nfs4_deviceid_node * 87 + FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) 88 + { 89 + if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt || 90 + FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL || 91 + FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL) 92 + return NULL; 93 + return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node; 94 + } 95 + 96 + static inline struct nfs4_ff_layout_ds * 97 + FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) 98 + { 99 + return container_of(node, struct nfs4_ff_layout_ds, id_node); 100 + } 101 + 102 + static inline struct nfs4_ff_layout_mirror * 103 + FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx) 104 + { 105 + if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt) 106 + return NULL; 107 + return FF_LAYOUT_LSEG(lseg)->mirror_array[idx]; 108 + } 109 + 110 + static inline u32 111 + FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg) 112 + { 113 + return FF_LAYOUT_LSEG(lseg)->mirror_array_cnt; 114 + } 115 + 116 + static inline bool 117 + ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) 118 + { 119 + return nfs4_test_deviceid_unavailable(node); 120 + } 121 + 122 + static inline int 123 + nfs4_ff_layout_ds_version(struct pnfs_layout_segment *lseg, u32 ds_idx) 124 + { 125 + return FF_LAYOUT_COMP(lseg, ds_idx)->mirror_ds->ds_versions[0].version; 126 + } 127 + 128 + struct nfs4_ff_layout_ds * 129 + nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, 130 + gfp_t gfp_flags); 131 + void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds); 132 + void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds); 133 + int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, 134 + struct nfs4_ff_layout_mirror *mirror, u64 offset, 135 + u64 length, int status, enum nfs_opnum4 opnum, 136 + gfp_t gfp_flags); 137 + int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, 138 + struct xdr_stream *xdr, int *count, 139 + const struct pnfs_layout_range *range); 140 + struct nfs_fh * 141 + nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); 142 + 143 + struct nfs4_pnfs_ds * 144 + nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, 145 + bool fail_return); 146 + 147 + struct rpc_clnt * 148 + nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, 149 + u32 ds_idx, 150 + struct nfs_client *ds_clp, 151 + struct inode *inode); 152 + struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, 153 + u32 ds_idx, struct rpc_cred *mdscred); 154 + bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); 155 + #endif /* FS_NFS_NFS4FLEXFILELAYOUT_H */
+552
fs/nfs/flexfilelayout/flexfilelayoutdev.c
··· 1 + /* 2 + * Device operations for the pnfs nfs4 file layout driver. 3 + * 4 + * Copyright (c) 2014, Primary Data, Inc. All rights reserved. 5 + * 6 + * Tao Peng <bergwolf@primarydata.com> 7 + */ 8 + 9 + #include <linux/nfs_fs.h> 10 + #include <linux/vmalloc.h> 11 + #include <linux/module.h> 12 + #include <linux/sunrpc/addr.h> 13 + 14 + #include "../internal.h" 15 + #include "../nfs4session.h" 16 + #include "flexfilelayout.h" 17 + 18 + #define NFSDBG_FACILITY NFSDBG_PNFS_LD 19 + 20 + static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; 21 + static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; 22 + 23 + void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) 24 + { 25 + if (mirror_ds) 26 + nfs4_put_deviceid_node(&mirror_ds->id_node); 27 + } 28 + 29 + void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) 30 + { 31 + nfs4_print_deviceid(&mirror_ds->id_node.deviceid); 32 + nfs4_pnfs_ds_put(mirror_ds->ds); 33 + kfree(mirror_ds); 34 + } 35 + 36 + /* Decode opaque device data and construct new_ds using it */ 37 + struct nfs4_ff_layout_ds * 38 + nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, 39 + gfp_t gfp_flags) 40 + { 41 + struct xdr_stream stream; 42 + struct xdr_buf buf; 43 + struct page *scratch; 44 + struct list_head dsaddrs; 45 + struct nfs4_pnfs_ds_addr *da; 46 + struct nfs4_ff_layout_ds *new_ds = NULL; 47 + struct nfs4_ff_ds_version *ds_versions = NULL; 48 + u32 mp_count; 49 + u32 version_count; 50 + __be32 *p; 51 + int i, ret = -ENOMEM; 52 + 53 + /* set up xdr stream */ 54 + scratch = alloc_page(gfp_flags); 55 + if (!scratch) 56 + goto out_err; 57 + 58 + new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags); 59 + if (!new_ds) 60 + goto out_scratch; 61 + 62 + nfs4_init_deviceid_node(&new_ds->id_node, 63 + server, 64 + &pdev->dev_id); 65 + INIT_LIST_HEAD(&dsaddrs); 66 + 67 + xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); 68 + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 69 + 70 + /* multipath count */ 71 + p = xdr_inline_decode(&stream, 4); 72 + if (unlikely(!p)) 73 + goto out_err_drain_dsaddrs; 74 + mp_count = be32_to_cpup(p); 75 + dprintk("%s: multipath ds count %d\n", __func__, mp_count); 76 + 77 + for (i = 0; i < mp_count; i++) { 78 + /* multipath ds */ 79 + da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, 80 + &stream, gfp_flags); 81 + if (da) 82 + list_add_tail(&da->da_node, &dsaddrs); 83 + } 84 + if (list_empty(&dsaddrs)) { 85 + dprintk("%s: no suitable DS addresses found\n", 86 + __func__); 87 + ret = -ENOMEDIUM; 88 + goto out_err_drain_dsaddrs; 89 + } 90 + 91 + /* version count */ 92 + p = xdr_inline_decode(&stream, 4); 93 + if (unlikely(!p)) 94 + goto out_err_drain_dsaddrs; 95 + version_count = be32_to_cpup(p); 96 + dprintk("%s: version count %d\n", __func__, version_count); 97 + 98 + ds_versions = kzalloc(version_count * sizeof(struct nfs4_ff_ds_version), 99 + gfp_flags); 100 + if (!ds_versions) 101 + goto out_scratch; 102 + 103 + for (i = 0; i < version_count; i++) { 104 + /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) + 105 + * tightly_coupled(4) */ 106 + p = xdr_inline_decode(&stream, 20); 107 + if (unlikely(!p)) 108 + goto out_err_drain_dsaddrs; 109 + ds_versions[i].version = be32_to_cpup(p++); 110 + ds_versions[i].minor_version = be32_to_cpup(p++); 111 + ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL); 112 + ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL); 113 + ds_versions[i].tightly_coupled = be32_to_cpup(p); 114 + 115 + if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) 116 + ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE; 117 + if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) 118 + ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; 119 + 120 + if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) { 121 + dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, 122 + i, ds_versions[i].version, 123 + ds_versions[i].minor_version); 124 + ret = -EPROTONOSUPPORT; 125 + goto out_err_drain_dsaddrs; 126 + } 127 + 128 + dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n", 129 + __func__, i, ds_versions[i].version, 130 + ds_versions[i].minor_version, 131 + ds_versions[i].rsize, 132 + ds_versions[i].wsize, 133 + ds_versions[i].tightly_coupled); 134 + } 135 + 136 + new_ds->ds_versions = ds_versions; 137 + new_ds->ds_versions_cnt = version_count; 138 + 139 + new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); 140 + if (!new_ds->ds) 141 + goto out_err_drain_dsaddrs; 142 + 143 + /* If DS was already in cache, free ds addrs */ 144 + while (!list_empty(&dsaddrs)) { 145 + da = list_first_entry(&dsaddrs, 146 + struct nfs4_pnfs_ds_addr, 147 + da_node); 148 + list_del_init(&da->da_node); 149 + kfree(da->da_remotestr); 150 + kfree(da); 151 + } 152 + 153 + __free_page(scratch); 154 + return new_ds; 155 + 156 + out_err_drain_dsaddrs: 157 + while (!list_empty(&dsaddrs)) { 158 + da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, 159 + da_node); 160 + list_del_init(&da->da_node); 161 + kfree(da->da_remotestr); 162 + kfree(da); 163 + } 164 + 165 + kfree(ds_versions); 166 + out_scratch: 167 + __free_page(scratch); 168 + out_err: 169 + kfree(new_ds); 170 + 171 + dprintk("%s ERROR: returning %d\n", __func__, ret); 172 + return NULL; 173 + } 174 + 175 + static u64 176 + end_offset(u64 start, u64 len) 177 + { 178 + u64 end; 179 + 180 + end = start + len; 181 + return end >= start ? end : NFS4_MAX_UINT64; 182 + } 183 + 184 + static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, 185 + u64 offset, u64 length) 186 + { 187 + u64 end; 188 + 189 + end = max_t(u64, end_offset(err->offset, err->length), 190 + end_offset(offset, length)); 191 + err->offset = min_t(u64, err->offset, offset); 192 + err->length = end - err->offset; 193 + } 194 + 195 + static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err, u64 offset, 196 + u64 length, int status, enum nfs_opnum4 opnum, 197 + nfs4_stateid *stateid, 198 + struct nfs4_deviceid *deviceid) 199 + { 200 + return err->status == status && err->opnum == opnum && 201 + nfs4_stateid_match(&err->stateid, stateid) && 202 + !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) && 203 + end_offset(err->offset, err->length) >= offset && 204 + err->offset <= end_offset(offset, length); 205 + } 206 + 207 + static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old, 208 + struct nfs4_ff_layout_ds_err *new) 209 + { 210 + if (!ds_error_can_merge(old, new->offset, new->length, new->status, 211 + new->opnum, &new->stateid, &new->deviceid)) 212 + return false; 213 + 214 + extend_ds_error(old, new->offset, new->length); 215 + return true; 216 + } 217 + 218 + static bool 219 + ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, 220 + struct nfs4_ff_layout_ds_err *dserr) 221 + { 222 + struct nfs4_ff_layout_ds_err *err; 223 + 224 + list_for_each_entry(err, &flo->error_list, list) { 225 + if (merge_ds_error(err, dserr)) { 226 + return true; 227 + } 228 + } 229 + 230 + list_add(&dserr->list, &flo->error_list); 231 + return false; 232 + } 233 + 234 + static bool 235 + ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset, 236 + u64 length, int status, enum nfs_opnum4 opnum, 237 + nfs4_stateid *stateid, struct nfs4_deviceid *deviceid) 238 + { 239 + bool found = false; 240 + struct nfs4_ff_layout_ds_err *err; 241 + 242 + list_for_each_entry(err, &flo->error_list, list) { 243 + if (ds_error_can_merge(err, offset, length, status, opnum, 244 + stateid, deviceid)) { 245 + found = true; 246 + extend_ds_error(err, offset, length); 247 + break; 248 + } 249 + } 250 + 251 + return found; 252 + } 253 + 254 + int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, 255 + struct nfs4_ff_layout_mirror *mirror, u64 offset, 256 + u64 length, int status, enum nfs_opnum4 opnum, 257 + gfp_t gfp_flags) 258 + { 259 + struct nfs4_ff_layout_ds_err *dserr; 260 + bool needfree; 261 + 262 + if (status == 0) 263 + return 0; 264 + 265 + if (mirror->mirror_ds == NULL) 266 + return -EINVAL; 267 + 268 + spin_lock(&flo->generic_hdr.plh_inode->i_lock); 269 + if (ff_layout_update_ds_error(flo, offset, length, status, opnum, 270 + &mirror->stateid, 271 + &mirror->mirror_ds->id_node.deviceid)) { 272 + spin_unlock(&flo->generic_hdr.plh_inode->i_lock); 273 + return 0; 274 + } 275 + spin_unlock(&flo->generic_hdr.plh_inode->i_lock); 276 + dserr = kmalloc(sizeof(*dserr), gfp_flags); 277 + if (!dserr) 278 + return -ENOMEM; 279 + 280 + INIT_LIST_HEAD(&dserr->list); 281 + dserr->offset = offset; 282 + dserr->length = length; 283 + dserr->status = status; 284 + dserr->opnum = opnum; 285 + nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); 286 + memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, 287 + NFS4_DEVICEID4_SIZE); 288 + 289 + spin_lock(&flo->generic_hdr.plh_inode->i_lock); 290 + needfree = ff_layout_add_ds_error_locked(flo, dserr); 291 + spin_unlock(&flo->generic_hdr.plh_inode->i_lock); 292 + if (needfree) 293 + kfree(dserr); 294 + 295 + return 0; 296 + } 297 + 298 + /* currently we only support AUTH_NONE and AUTH_SYS */ 299 + static rpc_authflavor_t 300 + nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror *mirror) 301 + { 302 + if (mirror->uid == (u32)-1) 303 + return RPC_AUTH_NULL; 304 + return RPC_AUTH_UNIX; 305 + } 306 + 307 + /* fetch cred for NFSv3 DS */ 308 + static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, 309 + struct nfs4_pnfs_ds *ds) 310 + { 311 + if (ds->ds_clp && !mirror->cred && 312 + mirror->mirror_ds->ds_versions[0].version == 3) { 313 + struct rpc_auth *auth = ds->ds_clp->cl_rpcclient->cl_auth; 314 + struct rpc_cred *cred; 315 + struct auth_cred acred = { 316 + .uid = make_kuid(&init_user_ns, mirror->uid), 317 + .gid = make_kgid(&init_user_ns, mirror->gid), 318 + }; 319 + 320 + /* AUTH_NULL ignores acred */ 321 + cred = auth->au_ops->lookup_cred(auth, &acred, 0); 322 + if (IS_ERR(cred)) { 323 + dprintk("%s: lookup_cred failed with %ld\n", 324 + __func__, PTR_ERR(cred)); 325 + return PTR_ERR(cred); 326 + } else { 327 + mirror->cred = cred; 328 + } 329 + } 330 + return 0; 331 + } 332 + 333 + struct nfs_fh * 334 + nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) 335 + { 336 + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); 337 + struct nfs_fh *fh = NULL; 338 + struct nfs4_deviceid_node *devid; 339 + 340 + if (mirror == NULL || mirror->mirror_ds == NULL || 341 + mirror->mirror_ds->ds == NULL) { 342 + printk(KERN_ERR "NFS: %s: No data server for mirror offset index %d\n", 343 + __func__, mirror_idx); 344 + if (mirror && mirror->mirror_ds) { 345 + devid = &mirror->mirror_ds->id_node; 346 + pnfs_generic_mark_devid_invalid(devid); 347 + } 348 + goto out; 349 + } 350 + 351 + /* FIXME: For now assume there is only 1 version available for the DS */ 352 + fh = &mirror->fh_versions[0]; 353 + out: 354 + return fh; 355 + } 356 + 357 + /* Upon return, either ds is connected, or ds is NULL */ 358 + struct nfs4_pnfs_ds * 359 + nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, 360 + bool fail_return) 361 + { 362 + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); 363 + struct nfs4_pnfs_ds *ds = NULL; 364 + struct nfs4_deviceid_node *devid; 365 + struct inode *ino = lseg->pls_layout->plh_inode; 366 + struct nfs_server *s = NFS_SERVER(ino); 367 + unsigned int max_payload; 368 + rpc_authflavor_t flavor; 369 + 370 + if (mirror == NULL || mirror->mirror_ds == NULL || 371 + mirror->mirror_ds->ds == NULL) { 372 + printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", 373 + __func__, ds_idx); 374 + if (mirror && mirror->mirror_ds) { 375 + devid = &mirror->mirror_ds->id_node; 376 + pnfs_generic_mark_devid_invalid(devid); 377 + } 378 + goto out; 379 + } 380 + 381 + devid = &mirror->mirror_ds->id_node; 382 + if (ff_layout_test_devid_unavailable(devid)) 383 + goto out; 384 + 385 + ds = mirror->mirror_ds->ds; 386 + /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ 387 + smp_rmb(); 388 + if (ds->ds_clp) 389 + goto out; 390 + 391 + flavor = nfs4_ff_layout_choose_authflavor(mirror); 392 + 393 + /* FIXME: For now we assume the server sent only one version of NFS 394 + * to use for the DS. 395 + */ 396 + nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, 397 + dataserver_retrans, 398 + mirror->mirror_ds->ds_versions[0].version, 399 + mirror->mirror_ds->ds_versions[0].minor_version, 400 + flavor); 401 + 402 + /* connect success, check rsize/wsize limit */ 403 + if (ds->ds_clp) { 404 + max_payload = 405 + nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), 406 + NULL); 407 + if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) 408 + mirror->mirror_ds->ds_versions[0].rsize = max_payload; 409 + if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) 410 + mirror->mirror_ds->ds_versions[0].wsize = max_payload; 411 + } else { 412 + ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 413 + mirror, lseg->pls_range.offset, 414 + lseg->pls_range.length, NFS4ERR_NXIO, 415 + OP_ILLEGAL, GFP_NOIO); 416 + if (fail_return) { 417 + pnfs_error_mark_layout_for_return(ino, lseg); 418 + if (ff_layout_has_available_ds(lseg)) 419 + pnfs_set_retry_layoutget(lseg->pls_layout); 420 + else 421 + pnfs_clear_retry_layoutget(lseg->pls_layout); 422 + 423 + } else { 424 + if (ff_layout_has_available_ds(lseg)) 425 + set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 426 + &lseg->pls_layout->plh_flags); 427 + else { 428 + pnfs_error_mark_layout_for_return(ino, lseg); 429 + pnfs_clear_retry_layoutget(lseg->pls_layout); 430 + } 431 + } 432 + } 433 + 434 + if (ff_layout_update_mirror_cred(mirror, ds)) 435 + ds = NULL; 436 + out: 437 + return ds; 438 + } 439 + 440 + struct rpc_cred * 441 + ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, 442 + struct rpc_cred *mdscred) 443 + { 444 + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); 445 + struct rpc_cred *cred = ERR_PTR(-EINVAL); 446 + 447 + if (!nfs4_ff_layout_prepare_ds(lseg, ds_idx, true)) 448 + goto out; 449 + 450 + if (mirror && mirror->cred) 451 + cred = mirror->cred; 452 + else 453 + cred = mdscred; 454 + out: 455 + return cred; 456 + } 457 + 458 + /** 459 + * Find or create a DS rpc client with th MDS server rpc client auth flavor 460 + * in the nfs_client cl_ds_clients list. 461 + */ 462 + struct rpc_clnt * 463 + nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, 464 + struct nfs_client *ds_clp, struct inode *inode) 465 + { 466 + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); 467 + 468 + switch (mirror->mirror_ds->ds_versions[0].version) { 469 + case 3: 470 + /* For NFSv3 DS, flavor is set when creating DS connections */ 471 + return ds_clp->cl_rpcclient; 472 + case 4: 473 + return nfs4_find_or_create_ds_client(ds_clp, inode); 474 + default: 475 + BUG(); 476 + } 477 + } 478 + 479 + static bool is_range_intersecting(u64 offset1, u64 length1, 480 + u64 offset2, u64 length2) 481 + { 482 + u64 end1 = end_offset(offset1, length1); 483 + u64 end2 = end_offset(offset2, length2); 484 + 485 + return (end1 == NFS4_MAX_UINT64 || end1 > offset2) && 486 + (end2 == NFS4_MAX_UINT64 || end2 > offset1); 487 + } 488 + 489 + /* called with inode i_lock held */ 490 + int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, 491 + struct xdr_stream *xdr, int *count, 492 + const struct pnfs_layout_range *range) 493 + { 494 + struct nfs4_ff_layout_ds_err *err, *n; 495 + __be32 *p; 496 + 497 + list_for_each_entry_safe(err, n, &flo->error_list, list) { 498 + if (!is_range_intersecting(err->offset, err->length, 499 + range->offset, range->length)) 500 + continue; 501 + /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) 502 + * + deviceid(NFS4_DEVICEID4_SIZE) + status(4) + opnum(4) 503 + */ 504 + p = xdr_reserve_space(xdr, 505 + 24 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); 506 + if (unlikely(!p)) 507 + return -ENOBUFS; 508 + p = xdr_encode_hyper(p, err->offset); 509 + p = xdr_encode_hyper(p, err->length); 510 + p = xdr_encode_opaque_fixed(p, &err->stateid, 511 + NFS4_STATEID_SIZE); 512 + p = xdr_encode_opaque_fixed(p, &err->deviceid, 513 + NFS4_DEVICEID4_SIZE); 514 + *p++ = cpu_to_be32(err->status); 515 + *p++ = cpu_to_be32(err->opnum); 516 + *count += 1; 517 + list_del(&err->list); 518 + kfree(err); 519 + dprintk("%s: offset %llu length %llu status %d op %d count %d\n", 520 + __func__, err->offset, err->length, err->status, 521 + err->opnum, *count); 522 + } 523 + 524 + return 0; 525 + } 526 + 527 + bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) 528 + { 529 + struct nfs4_ff_layout_mirror *mirror; 530 + struct nfs4_deviceid_node *devid; 531 + int idx; 532 + 533 + for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { 534 + mirror = FF_LAYOUT_COMP(lseg, idx); 535 + if (mirror && mirror->mirror_ds) { 536 + devid = &mirror->mirror_ds->id_node; 537 + if (!ff_layout_test_devid_unavailable(devid)) 538 + return true; 539 + } 540 + } 541 + 542 + return false; 543 + } 544 + 545 + module_param(dataserver_retrans, uint, 0644); 546 + MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " 547 + "retries a request before it attempts further " 548 + " recovery action."); 549 + module_param(dataserver_timeo, uint, 0644); 550 + MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " 551 + "NFSv4.1 client waits for a response from a " 552 + " data server before it retries an NFS request.");
+2 -1
fs/nfs/idmap.c
··· 152 152 nfs_fattr_free_group_name(fattr); 153 153 } 154 154 155 - static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) 155 + int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) 156 156 { 157 157 unsigned long val; 158 158 char buf[16]; ··· 166 166 *res = val; 167 167 return 1; 168 168 } 169 + EXPORT_SYMBOL_GPL(nfs_map_string_to_numeric); 169 170 170 171 static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) 171 172 {
+26 -5
fs/nfs/internal.h
··· 6 6 #include <linux/mount.h> 7 7 #include <linux/security.h> 8 8 #include <linux/crc32.h> 9 + #include <linux/nfs_page.h> 9 10 10 11 #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) 11 12 ··· 188 187 const struct sockaddr *ds_addr, 189 188 int ds_addrlen, int ds_proto, 190 189 unsigned int ds_timeo, 191 - unsigned int ds_retrans); 190 + unsigned int ds_retrans, 191 + u32 minor_version, 192 + rpc_authflavor_t au_flavor); 192 193 extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, 193 194 struct inode *); 195 + extern struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, 196 + const struct sockaddr *ds_addr, int ds_addrlen, 197 + int ds_proto, unsigned int ds_timeo, 198 + unsigned int ds_retrans, rpc_authflavor_t au_flavor); 194 199 #ifdef CONFIG_PROC_FS 195 200 extern int __init nfs_fs_proc_init(void); 196 201 extern void nfs_fs_proc_exit(void); ··· 249 242 void nfs_pgio_header_free(struct nfs_pgio_header *); 250 243 void nfs_pgio_data_destroy(struct nfs_pgio_header *); 251 244 int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); 252 - int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *, 253 - const struct rpc_call_ops *, int, int); 245 + int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, 246 + struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, 247 + const struct rpc_call_ops *call_ops, int how, int flags); 254 248 void nfs_free_request(struct nfs_page *req); 249 + struct nfs_pgio_mirror * 250 + nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); 255 251 256 252 static inline void nfs_iocounter_init(struct nfs_io_counter *c) 257 253 { 258 254 c->flags = 0; 259 255 atomic_set(&c->io_count, 0); 256 + } 257 + 258 + static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) 259 + { 260 + WARN_ON_ONCE(desc->pg_mirror_count < 1); 261 + return desc->pg_mirror_count > 1; 260 262 } 261 263 262 264 /* nfs2xdr.c */ ··· 443 427 extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); 444 428 extern int nfs_initiate_commit(struct rpc_clnt *clnt, 445 429 struct nfs_commit_data *data, 430 + const struct nfs_rpc_ops *nfs_ops, 446 431 const struct rpc_call_ops *call_ops, 447 432 int how, int flags); 448 433 extern void nfs_init_commit(struct nfs_commit_data *data, ··· 457 440 struct nfs_commit_info *cinfo); 458 441 void nfs_mark_request_commit(struct nfs_page *req, 459 442 struct pnfs_layout_segment *lseg, 460 - struct nfs_commit_info *cinfo); 443 + struct nfs_commit_info *cinfo, 444 + u32 ds_commit_idx); 461 445 int nfs_write_need_commit(struct nfs_pgio_header *); 462 446 int nfs_generic_commit_list(struct inode *inode, struct list_head *head, 463 447 int how, struct nfs_commit_info *cinfo); 464 448 void nfs_retry_commit(struct list_head *page_list, 465 449 struct pnfs_layout_segment *lseg, 466 - struct nfs_commit_info *cinfo); 450 + struct nfs_commit_info *cinfo, 451 + u32 ds_commit_idx); 467 452 void nfs_commitdata_release(struct nfs_commit_data *data); 468 453 void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, 469 454 struct nfs_commit_info *cinfo); ··· 476 457 struct nfs_direct_req *dreq); 477 458 int nfs_key_timeout_notify(struct file *filp, struct inode *inode); 478 459 bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); 460 + void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); 479 461 480 462 #ifdef CONFIG_MIGRATION 481 463 extern int nfs_migrate_page(struct address_space *, ··· 500 480 inode_dio_wait(inode); 501 481 } 502 482 extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); 483 + extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq); 503 484 504 485 /* nfs4proc.c */ 505 486 extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
+7 -3
fs/nfs/nfs2xdr.c
··· 481 481 * void; 482 482 * }; 483 483 */ 484 - static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result) 484 + static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result, 485 + __u32 *op_status) 485 486 { 486 487 enum nfs_stat status; 487 488 int error; ··· 490 489 error = decode_stat(xdr, &status); 491 490 if (unlikely(error)) 492 491 goto out; 492 + if (op_status) 493 + *op_status = status; 493 494 if (status != NFS_OK) 494 495 goto out_default; 495 496 error = decode_fattr(xdr, result); ··· 811 808 static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr, 812 809 struct nfs_fattr *result) 813 810 { 814 - return decode_attrstat(xdr, result); 811 + return decode_attrstat(xdr, result, NULL); 815 812 } 816 813 817 814 static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr, ··· 868 865 error = decode_stat(xdr, &status); 869 866 if (unlikely(error)) 870 867 goto out; 868 + result->op_status = status; 871 869 if (status != NFS_OK) 872 870 goto out_default; 873 871 error = decode_fattr(xdr, result->fattr); ··· 886 882 { 887 883 /* All NFSv2 writes are "file sync" writes */ 888 884 result->verf->committed = NFS_FILE_SYNC; 889 - return decode_attrstat(xdr, result->fattr); 885 + return decode_attrstat(xdr, result->fattr, &result->op_status); 890 886 } 891 887 892 888 /**
+2
fs/nfs/nfs3_fs.h
··· 30 30 struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, 31 31 struct nfs_fattr *, rpc_authflavor_t); 32 32 33 + /* nfs3super.c */ 34 + extern struct nfs_subversion nfs_v3; 33 35 34 36 #endif /* __LINUX_FS_NFS_NFS3_FS_H */
+41
fs/nfs/nfs3client.c
··· 1 1 #include <linux/nfs_fs.h> 2 2 #include <linux/nfs_mount.h> 3 + #include <linux/sunrpc/addr.h> 3 4 #include "internal.h" 4 5 #include "nfs3_fs.h" 5 6 ··· 65 64 nfs_init_server_aclclient(server); 66 65 return server; 67 66 } 67 + 68 + /* 69 + * Set up a pNFS Data Server client over NFSv3. 70 + * 71 + * Return any existing nfs_client that matches server address,port,version 72 + * and minorversion. 73 + * 74 + * For a new nfs_client, use a soft mount (default), a low retrans and a 75 + * low timeout interval so that if a connection is lost, we retry through 76 + * the MDS. 77 + */ 78 + struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, 79 + const struct sockaddr *ds_addr, int ds_addrlen, 80 + int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, 81 + rpc_authflavor_t au_flavor) 82 + { 83 + struct nfs_client_initdata cl_init = { 84 + .addr = ds_addr, 85 + .addrlen = ds_addrlen, 86 + .nfs_mod = &nfs_v3, 87 + .proto = ds_proto, 88 + .net = mds_clp->cl_net, 89 + }; 90 + struct rpc_timeout ds_timeout; 91 + struct nfs_client *clp; 92 + char buf[INET6_ADDRSTRLEN + 1]; 93 + 94 + /* fake a hostname because lockd wants it */ 95 + if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) 96 + return ERR_PTR(-EINVAL); 97 + cl_init.hostname = buf; 98 + 99 + /* Use the MDS nfs_client cl_ipaddr. */ 100 + nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); 101 + clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, 102 + au_flavor); 103 + 104 + return clp; 105 + } 106 + EXPORT_SYMBOL_GPL(nfs3_set_ds_client);
+9
fs/nfs/nfs3proc.c
··· 800 800 { 801 801 struct inode *inode = hdr->inode; 802 802 803 + if (hdr->pgio_done_cb != NULL) 804 + return hdr->pgio_done_cb(task, hdr); 805 + 803 806 if (nfs3_async_handle_jukebox(task, inode)) 804 807 return -EAGAIN; 805 808 ··· 828 825 { 829 826 struct inode *inode = hdr->inode; 830 827 828 + if (hdr->pgio_done_cb != NULL) 829 + return hdr->pgio_done_cb(task, hdr); 830 + 831 831 if (nfs3_async_handle_jukebox(task, inode)) 832 832 return -EAGAIN; 833 833 if (task->tk_status >= 0) ··· 851 845 852 846 static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data) 853 847 { 848 + if (data->commit_done_cb != NULL) 849 + return data->commit_done_cb(task, data); 850 + 854 851 if (nfs3_async_handle_jukebox(task, data->inode)) 855 852 return -EAGAIN; 856 853 nfs_refresh_inode(data->inode, data->res.fattr);
+1 -1
fs/nfs/nfs3super.c
··· 7 7 #include "nfs3_fs.h" 8 8 #include "nfs.h" 9 9 10 - static struct nfs_subversion nfs_v3 = { 10 + struct nfs_subversion nfs_v3 = { 11 11 .owner = THIS_MODULE, 12 12 .nfs_fs = &nfs_fs_type, 13 13 .rpc_vers = &nfs_version3,
+3
fs/nfs/nfs3xdr.c
··· 1636 1636 error = decode_post_op_attr(xdr, result->fattr); 1637 1637 if (unlikely(error)) 1638 1638 goto out; 1639 + result->op_status = status; 1639 1640 if (status != NFS3_OK) 1640 1641 goto out_status; 1641 1642 error = decode_read3resok(xdr, result); ··· 1709 1708 error = decode_wcc_data(xdr, result->fattr); 1710 1709 if (unlikely(error)) 1711 1710 goto out; 1711 + result->op_status = status; 1712 1712 if (status != NFS3_OK) 1713 1713 goto out_status; 1714 1714 error = decode_write3resok(xdr, result); ··· 2325 2323 error = decode_wcc_data(xdr, result->fattr); 2326 2324 if (unlikely(error)) 2327 2325 goto out; 2326 + result->op_status = status; 2328 2327 if (status != NFS3_OK) 2329 2328 goto out_status; 2330 2329 error = decode_writeverf3(xdr, &result->verf->verifier);
+6
fs/nfs/nfs4_fs.h
··· 446 446 extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); 447 447 extern void nfs_release_seqid(struct nfs_seqid *seqid); 448 448 extern void nfs_free_seqid(struct nfs_seqid *seqid); 449 + extern int nfs40_setup_sequence(struct nfs4_slot_table *tbl, 450 + struct nfs4_sequence_args *args, 451 + struct nfs4_sequence_res *res, 452 + struct rpc_task *task); 453 + extern int nfs4_sequence_done(struct rpc_task *task, 454 + struct nfs4_sequence_res *res); 449 455 450 456 extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); 451 457
+4 -3
fs/nfs/nfs4client.c
··· 849 849 */ 850 850 struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, 851 851 const struct sockaddr *ds_addr, int ds_addrlen, 852 - int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) 852 + int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, 853 + u32 minor_version, rpc_authflavor_t au_flavor) 853 854 { 854 855 struct nfs_client_initdata cl_init = { 855 856 .addr = ds_addr, 856 857 .addrlen = ds_addrlen, 857 858 .nfs_mod = &nfs_v4, 858 859 .proto = ds_proto, 859 - .minorversion = mds_clp->cl_minorversion, 860 + .minorversion = minor_version, 860 861 .net = mds_clp->cl_net, 861 862 }; 862 863 struct rpc_timeout ds_timeout; ··· 875 874 */ 876 875 nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); 877 876 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, 878 - mds_clp->cl_rpcclient->cl_auth->au_flavor); 877 + au_flavor); 879 878 880 879 dprintk("<-- %s %p\n", __func__, clp); 881 880 return clp;
+31 -17
fs/nfs/nfs4proc.c
··· 495 495 args->sa_privileged = 1; 496 496 } 497 497 498 - static int nfs40_setup_sequence(const struct nfs_server *server, 499 - struct nfs4_sequence_args *args, 500 - struct nfs4_sequence_res *res, 501 - struct rpc_task *task) 498 + int nfs40_setup_sequence(struct nfs4_slot_table *tbl, 499 + struct nfs4_sequence_args *args, 500 + struct nfs4_sequence_res *res, 501 + struct rpc_task *task) 502 502 { 503 - struct nfs4_slot_table *tbl = server->nfs_client->cl_slot_tbl; 504 503 struct nfs4_slot *slot; 505 504 506 505 /* slot already allocated? */ ··· 534 535 spin_unlock(&tbl->slot_tbl_lock); 535 536 return -EAGAIN; 536 537 } 538 + EXPORT_SYMBOL_GPL(nfs40_setup_sequence); 537 539 538 540 static int nfs40_sequence_done(struct rpc_task *task, 539 541 struct nfs4_sequence_res *res) ··· 694 694 } 695 695 EXPORT_SYMBOL_GPL(nfs41_sequence_done); 696 696 697 - static int nfs4_sequence_done(struct rpc_task *task, 698 - struct nfs4_sequence_res *res) 697 + int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) 699 698 { 700 699 if (res->sr_slot == NULL) 701 700 return 1; ··· 702 703 return nfs40_sequence_done(task, res); 703 704 return nfs41_sequence_done(task, res); 704 705 } 706 + EXPORT_SYMBOL_GPL(nfs4_sequence_done); 705 707 706 708 int nfs41_setup_sequence(struct nfs4_session *session, 707 709 struct nfs4_sequence_args *args, ··· 777 777 int ret = 0; 778 778 779 779 if (!session) 780 - return nfs40_setup_sequence(server, args, res, task); 780 + return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, 781 + args, res, task); 781 782 782 783 dprintk("--> %s clp %p session %p sr_slot %u\n", 783 784 __func__, session->clp, session, res->sr_slot ? ··· 819 818 struct nfs4_sequence_res *res, 820 819 struct rpc_task *task) 821 820 { 822 - return nfs40_setup_sequence(server, args, res, task); 821 + return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, 822 + args, res, task); 823 823 } 824 824 825 - static int nfs4_sequence_done(struct rpc_task *task, 826 - struct nfs4_sequence_res *res) 825 + int nfs4_sequence_done(struct rpc_task *task, 826 + struct nfs4_sequence_res *res) 827 827 { 828 828 return nfs40_sequence_done(task, res); 829 829 } 830 + EXPORT_SYMBOL_GPL(nfs4_sequence_done); 830 831 831 832 #endif /* !CONFIG_NFS_V4_1 */ 832 833 ··· 1715 1712 { 1716 1713 struct nfs4_opendata *data = calldata; 1717 1714 1718 - nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args, 1719 - &data->c_res.seq_res, task); 1715 + nfs40_setup_sequence(data->o_arg.server->nfs_client->cl_slot_tbl, 1716 + &data->c_arg.seq_args, &data->c_res.seq_res, task); 1720 1717 } 1721 1718 1722 1719 static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) ··· 5997 5994 { 5998 5995 struct nfs_release_lockowner_data *data = calldata; 5999 5996 struct nfs_server *server = data->server; 6000 - nfs40_setup_sequence(server, &data->args.seq_args, 6001 - &data->res.seq_res, task); 5997 + nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, 5998 + &data->args.seq_args, &data->res.seq_res, task); 6002 5999 data->args.lock_owner.clientid = server->nfs_client->cl_clientid; 6003 6000 data->timestamp = jiffies; 6004 6001 } ··· 7560 7557 return; 7561 7558 if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, 7562 7559 NFS_I(lgp->args.inode)->layout, 7560 + &lgp->args.range, 7563 7561 lgp->args.ctx->state)) { 7564 7562 rpc_exit(task, NFS4_OK); 7565 7563 } ··· 7816 7812 spin_lock(&lo->plh_inode->i_lock); 7817 7813 if (lrp->res.lrs_present) 7818 7814 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); 7815 + pnfs_clear_layoutreturn_waitbit(lo); 7816 + clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); 7817 + rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); 7819 7818 lo->plh_block_lgets--; 7820 7819 spin_unlock(&lo->plh_inode->i_lock); 7821 7820 pnfs_put_layout_hdr(lrp->args.layout); ··· 7832 7825 .rpc_release = nfs4_layoutreturn_release, 7833 7826 }; 7834 7827 7835 - int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) 7828 + int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) 7836 7829 { 7837 7830 struct rpc_task *task; 7838 7831 struct rpc_message msg = { ··· 7846 7839 .rpc_message = &msg, 7847 7840 .callback_ops = &nfs4_layoutreturn_call_ops, 7848 7841 .callback_data = lrp, 7842 + .flags = RPC_TASK_ASYNC, 7849 7843 }; 7850 - int status; 7844 + int status = 0; 7851 7845 7852 7846 dprintk("--> %s\n", __func__); 7853 7847 nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); 7854 7848 task = rpc_run_task(&task_setup_data); 7855 7849 if (IS_ERR(task)) 7856 7850 return PTR_ERR(task); 7851 + if (sync == false) 7852 + goto out; 7853 + status = nfs4_wait_for_completion_rpc_task(task); 7854 + if (status != 0) 7855 + goto out; 7857 7856 status = task->tk_status; 7858 7857 trace_nfs4_layoutreturn(lrp->args.inode, status); 7858 + out: 7859 7859 dprintk("<-- %s status=%d\n", __func__, status); 7860 7860 rpc_put_task(task); 7861 7861 return status;
+3
fs/nfs/nfs4super.c
··· 346 346 347 347 static void __exit exit_nfs_v4(void) 348 348 { 349 + /* Not called in the _init(), conditionally loaded */ 350 + nfs4_pnfs_v3_ds_connect_unload(); 351 + 349 352 unregister_nfs_version(&nfs_v4); 350 353 nfs4_unregister_sysctl(); 351 354 nfs_idmap_quit();
+6 -3
fs/nfs/nfs4xdr.c
··· 2011 2011 p = reserve_space(xdr, 16); 2012 2012 *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ 2013 2013 *p++ = cpu_to_be32(args->layout_type); 2014 - *p++ = cpu_to_be32(IOMODE_ANY); 2014 + *p++ = cpu_to_be32(args->range.iomode); 2015 2015 *p = cpu_to_be32(RETURN_FILE); 2016 2016 p = reserve_space(xdr, 16); 2017 - p = xdr_encode_hyper(p, 0); 2018 - p = xdr_encode_hyper(p, NFS4_MAX_UINT64); 2017 + p = xdr_encode_hyper(p, args->range.offset); 2018 + p = xdr_encode_hyper(p, args->range.length); 2019 2019 spin_lock(&args->inode->i_lock); 2020 2020 encode_nfs4_stateid(xdr, &args->stateid); 2021 2021 spin_unlock(&args->inode->i_lock); ··· 6566 6566 int status; 6567 6567 6568 6568 status = decode_compound_hdr(xdr, &hdr); 6569 + res->op_status = hdr.status; 6569 6570 if (status) 6570 6571 goto out; 6571 6572 status = decode_sequence(xdr, &res->seq_res, rqstp); ··· 6592 6591 int status; 6593 6592 6594 6593 status = decode_compound_hdr(xdr, &hdr); 6594 + res->op_status = hdr.status; 6595 6595 if (status) 6596 6596 goto out; 6597 6597 status = decode_sequence(xdr, &res->seq_res, rqstp); ··· 6622 6620 int status; 6623 6621 6624 6622 status = decode_compound_hdr(xdr, &hdr); 6623 + res->op_status = hdr.status; 6625 6624 if (status) 6626 6625 goto out; 6627 6626 status = decode_sequence(xdr, &res->seq_res, rqstp);
+4 -1
fs/nfs/objlayout/objio_osd.c
··· 537 537 static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, 538 538 struct nfs_page *prev, struct nfs_page *req) 539 539 { 540 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio); 540 541 unsigned int size; 541 542 542 543 size = pnfs_generic_pg_test(pgio, prev, req); 543 544 544 - if (!size || pgio->pg_count + req->wb_bytes > 545 + if (!size || mirror->pg_count + req->wb_bytes > 545 546 (unsigned long)pgio->pg_layout_private) 546 547 return 0; 547 548 ··· 608 607 .pg_init = objio_init_read, 609 608 .pg_test = objio_pg_test, 610 609 .pg_doio = pnfs_generic_pg_readpages, 610 + .pg_cleanup = pnfs_generic_pg_cleanup, 611 611 }; 612 612 613 613 static const struct nfs_pageio_ops objio_pg_write_ops = { 614 614 .pg_init = objio_init_write, 615 615 .pg_test = objio_pg_test, 616 616 .pg_doio = pnfs_generic_pg_writepages, 617 + .pg_cleanup = pnfs_generic_pg_cleanup, 617 618 }; 618 619 619 620 static struct pnfs_layoutdriver_type objlayout_type = {
+245 -49
fs/nfs/pagelist.c
··· 42 42 return p->pagevec != NULL; 43 43 } 44 44 45 + struct nfs_pgio_mirror * 46 + nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc) 47 + { 48 + return nfs_pgio_has_mirroring(desc) ? 49 + &desc->pg_mirrors[desc->pg_mirror_idx] : 50 + &desc->pg_mirrors[0]; 51 + } 52 + EXPORT_SYMBOL_GPL(nfs_pgio_current_mirror); 53 + 45 54 void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, 46 55 struct nfs_pgio_header *hdr, 47 56 void (*release)(struct nfs_pgio_header *hdr)) 48 57 { 49 - hdr->req = nfs_list_entry(desc->pg_list.next); 58 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 59 + 60 + 61 + hdr->req = nfs_list_entry(mirror->pg_list.next); 50 62 hdr->inode = desc->pg_inode; 51 63 hdr->cred = hdr->req->wb_context->cred; 52 64 hdr->io_start = req_offset(hdr->req); 53 - hdr->good_bytes = desc->pg_count; 65 + hdr->good_bytes = mirror->pg_count; 54 66 hdr->dreq = desc->pg_dreq; 55 67 hdr->layout_private = desc->pg_layout_private; 56 68 hdr->release = release; 57 69 hdr->completion_ops = desc->pg_completion_ops; 58 70 if (hdr->completion_ops->init_hdr) 59 71 hdr->completion_ops->init_hdr(hdr); 72 + 73 + hdr->pgio_mirror_idx = desc->pg_mirror_idx; 60 74 } 61 75 EXPORT_SYMBOL_GPL(nfs_pgheader_init); 62 76 ··· 494 480 size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, 495 481 struct nfs_page *prev, struct nfs_page *req) 496 482 { 497 - if (desc->pg_count > desc->pg_bsize) { 483 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 484 + 485 + 486 + if (mirror->pg_count > mirror->pg_bsize) { 498 487 /* should never happen */ 499 488 WARN_ON_ONCE(1); 500 489 return 0; ··· 507 490 * Limit the request size so that we can still allocate a page array 508 491 * for it without upsetting the slab allocator. 509 492 */ 510 - if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * 493 + if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * 511 494 sizeof(struct page) > PAGE_SIZE) 512 495 return 0; 513 496 514 - return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); 497 + return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); 515 498 } 516 499 EXPORT_SYMBOL_GPL(nfs_generic_pg_test); 517 500 ··· 614 597 } 615 598 616 599 int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, 600 + struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, 617 601 const struct rpc_call_ops *call_ops, int how, int flags) 618 602 { 619 603 struct rpc_task *task; 620 604 struct rpc_message msg = { 621 605 .rpc_argp = &hdr->args, 622 606 .rpc_resp = &hdr->res, 623 - .rpc_cred = hdr->cred, 607 + .rpc_cred = cred, 624 608 }; 625 609 struct rpc_task_setup task_setup_data = { 626 610 .rpc_client = clnt, ··· 634 616 }; 635 617 int ret = 0; 636 618 637 - hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how); 619 + hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); 638 620 639 621 dprintk("NFS: %5u initiated pgio call " 640 622 "(req %s/%llu, %u bytes @ offset %llu)\n", ··· 668 650 static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, 669 651 struct nfs_pgio_header *hdr) 670 652 { 653 + struct nfs_pgio_mirror *mirror; 654 + u32 midx; 655 + 671 656 set_bit(NFS_IOHDR_REDO, &hdr->flags); 672 657 nfs_pgio_data_destroy(hdr); 673 658 hdr->completion_ops->completion(hdr); 674 - desc->pg_completion_ops->error_cleanup(&desc->pg_list); 659 + /* TODO: Make sure it's right to clean up all mirrors here 660 + * and not just hdr->pgio_mirror_idx */ 661 + for (midx = 0; midx < desc->pg_mirror_count; midx++) { 662 + mirror = &desc->pg_mirrors[midx]; 663 + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); 664 + } 675 665 return -ENOMEM; 676 666 } 677 667 ··· 694 668 hdr->rw_ops->rw_release(hdr); 695 669 nfs_pgio_data_destroy(hdr); 696 670 hdr->completion_ops->completion(hdr); 671 + } 672 + 673 + static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, 674 + unsigned int bsize) 675 + { 676 + INIT_LIST_HEAD(&mirror->pg_list); 677 + mirror->pg_bytes_written = 0; 678 + mirror->pg_count = 0; 679 + mirror->pg_bsize = bsize; 680 + mirror->pg_base = 0; 681 + mirror->pg_recoalesce = 0; 697 682 } 698 683 699 684 /** ··· 723 686 size_t bsize, 724 687 int io_flags) 725 688 { 726 - INIT_LIST_HEAD(&desc->pg_list); 727 - desc->pg_bytes_written = 0; 728 - desc->pg_count = 0; 729 - desc->pg_bsize = bsize; 730 - desc->pg_base = 0; 689 + struct nfs_pgio_mirror *new; 690 + int i; 691 + 731 692 desc->pg_moreio = 0; 732 - desc->pg_recoalesce = 0; 733 693 desc->pg_inode = inode; 734 694 desc->pg_ops = pg_ops; 735 695 desc->pg_completion_ops = compl_ops; ··· 736 702 desc->pg_lseg = NULL; 737 703 desc->pg_dreq = NULL; 738 704 desc->pg_layout_private = NULL; 705 + desc->pg_bsize = bsize; 706 + 707 + desc->pg_mirror_count = 1; 708 + desc->pg_mirror_idx = 0; 709 + 710 + if (pg_ops->pg_get_mirror_count) { 711 + /* until we have a request, we don't have an lseg and no 712 + * idea how many mirrors there will be */ 713 + new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, 714 + sizeof(struct nfs_pgio_mirror), GFP_KERNEL); 715 + desc->pg_mirrors_dynamic = new; 716 + desc->pg_mirrors = new; 717 + 718 + for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) 719 + nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); 720 + } else { 721 + desc->pg_mirrors_dynamic = NULL; 722 + desc->pg_mirrors = desc->pg_mirrors_static; 723 + nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); 724 + } 739 725 } 740 726 EXPORT_SYMBOL_GPL(nfs_pageio_init); 741 727 ··· 791 737 int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, 792 738 struct nfs_pgio_header *hdr) 793 739 { 740 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 741 + 794 742 struct nfs_page *req; 795 743 struct page **pages, 796 744 *last_page; 797 - struct list_head *head = &desc->pg_list; 745 + struct list_head *head = &mirror->pg_list; 798 746 struct nfs_commit_info cinfo; 799 747 unsigned int pagecount, pageused; 800 748 801 - pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); 749 + pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); 802 750 if (!nfs_pgarray_set(&hdr->page_array, pagecount)) 803 751 return nfs_pgio_error(desc, hdr); 804 752 ··· 828 772 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 829 773 830 774 /* Set up the argument struct */ 831 - nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); 775 + nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); 832 776 desc->pg_rpc_callops = &nfs_pgio_common_ops; 833 777 return 0; 834 778 } ··· 836 780 837 781 static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) 838 782 { 783 + struct nfs_pgio_mirror *mirror; 839 784 struct nfs_pgio_header *hdr; 840 785 int ret; 841 786 787 + mirror = nfs_pgio_current_mirror(desc); 788 + 842 789 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 843 790 if (!hdr) { 844 - desc->pg_completion_ops->error_cleanup(&desc->pg_list); 791 + /* TODO: make sure this is right with mirroring - or 792 + * should it back out all mirrors? */ 793 + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); 845 794 return -ENOMEM; 846 795 } 847 796 nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); 848 797 ret = nfs_generic_pgio(desc, hdr); 849 798 if (ret == 0) 850 799 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), 851 - hdr, desc->pg_rpc_callops, 800 + hdr, 801 + hdr->cred, 802 + NFS_PROTO(hdr->inode), 803 + desc->pg_rpc_callops, 852 804 desc->pg_ioflags, 0); 853 805 return ret; 806 + } 807 + 808 + /* 809 + * nfs_pageio_setup_mirroring - determine if mirroring is to be used 810 + * by calling the pg_get_mirror_count op 811 + */ 812 + static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, 813 + struct nfs_page *req) 814 + { 815 + int mirror_count = 1; 816 + 817 + if (!pgio->pg_ops->pg_get_mirror_count) 818 + return 0; 819 + 820 + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); 821 + 822 + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) 823 + return -EINVAL; 824 + 825 + if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) 826 + return -EINVAL; 827 + 828 + pgio->pg_mirror_count = mirror_count; 829 + 830 + return 0; 831 + } 832 + 833 + /* 834 + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) 835 + */ 836 + void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) 837 + { 838 + pgio->pg_mirror_count = 1; 839 + pgio->pg_mirror_idx = 0; 840 + } 841 + 842 + static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) 843 + { 844 + pgio->pg_mirror_count = 1; 845 + pgio->pg_mirror_idx = 0; 846 + pgio->pg_mirrors = pgio->pg_mirrors_static; 847 + kfree(pgio->pg_mirrors_dynamic); 848 + pgio->pg_mirrors_dynamic = NULL; 854 849 } 855 850 856 851 static bool nfs_match_open_context(const struct nfs_open_context *ctx1, ··· 970 863 static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, 971 864 struct nfs_page *req) 972 865 { 866 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 867 + 973 868 struct nfs_page *prev = NULL; 974 - if (desc->pg_count != 0) { 975 - prev = nfs_list_entry(desc->pg_list.prev); 869 + 870 + if (mirror->pg_count != 0) { 871 + prev = nfs_list_entry(mirror->pg_list.prev); 976 872 } else { 977 873 if (desc->pg_ops->pg_init) 978 874 desc->pg_ops->pg_init(desc, req); 979 - desc->pg_base = req->wb_pgbase; 875 + mirror->pg_base = req->wb_pgbase; 980 876 } 981 877 if (!nfs_can_coalesce_requests(prev, req, desc)) 982 878 return 0; 983 879 nfs_list_remove_request(req); 984 - nfs_list_add_request(req, &desc->pg_list); 985 - desc->pg_count += req->wb_bytes; 880 + nfs_list_add_request(req, &mirror->pg_list); 881 + mirror->pg_count += req->wb_bytes; 986 882 return 1; 987 883 } 988 884 ··· 994 884 */ 995 885 static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 996 886 { 997 - if (!list_empty(&desc->pg_list)) { 887 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 888 + 889 + 890 + if (!list_empty(&mirror->pg_list)) { 998 891 int error = desc->pg_ops->pg_doio(desc); 999 892 if (error < 0) 1000 893 desc->pg_error = error; 1001 894 else 1002 - desc->pg_bytes_written += desc->pg_count; 895 + mirror->pg_bytes_written += mirror->pg_count; 1003 896 } 1004 - if (list_empty(&desc->pg_list)) { 1005 - desc->pg_count = 0; 1006 - desc->pg_base = 0; 897 + if (list_empty(&mirror->pg_list)) { 898 + mirror->pg_count = 0; 899 + mirror->pg_base = 0; 1007 900 } 1008 901 } 1009 902 ··· 1024 911 static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 1025 912 struct nfs_page *req) 1026 913 { 914 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 915 + 1027 916 struct nfs_page *subreq; 1028 917 unsigned int bytes_left = 0; 1029 918 unsigned int offset, pgbase; ··· 1049 934 nfs_pageio_doio(desc); 1050 935 if (desc->pg_error < 0) 1051 936 return 0; 1052 - if (desc->pg_recoalesce) 937 + if (mirror->pg_recoalesce) 1053 938 return 0; 1054 939 /* retry add_request for this subreq */ 1055 940 nfs_page_group_lock(req, false); ··· 1087 972 1088 973 static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) 1089 974 { 975 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 1090 976 LIST_HEAD(head); 1091 977 1092 978 do { 1093 - list_splice_init(&desc->pg_list, &head); 1094 - desc->pg_bytes_written -= desc->pg_count; 1095 - desc->pg_count = 0; 1096 - desc->pg_base = 0; 1097 - desc->pg_recoalesce = 0; 979 + list_splice_init(&mirror->pg_list, &head); 980 + mirror->pg_bytes_written -= mirror->pg_count; 981 + mirror->pg_count = 0; 982 + mirror->pg_base = 0; 983 + mirror->pg_recoalesce = 0; 984 + 1098 985 desc->pg_moreio = 0; 1099 986 1100 987 while (!list_empty(&head)) { ··· 1110 993 return 0; 1111 994 break; 1112 995 } 1113 - } while (desc->pg_recoalesce); 996 + } while (mirror->pg_recoalesce); 1114 997 return 1; 1115 998 } 1116 999 1117 - int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 1000 + static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, 1118 1001 struct nfs_page *req) 1119 1002 { 1120 1003 int ret; ··· 1127 1010 break; 1128 1011 ret = nfs_do_recoalesce(desc); 1129 1012 } while (ret); 1013 + 1130 1014 return ret; 1015 + } 1016 + 1017 + int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 1018 + struct nfs_page *req) 1019 + { 1020 + u32 midx; 1021 + unsigned int pgbase, offset, bytes; 1022 + struct nfs_page *dupreq, *lastreq; 1023 + 1024 + pgbase = req->wb_pgbase; 1025 + offset = req->wb_offset; 1026 + bytes = req->wb_bytes; 1027 + 1028 + nfs_pageio_setup_mirroring(desc, req); 1029 + 1030 + for (midx = 0; midx < desc->pg_mirror_count; midx++) { 1031 + if (midx) { 1032 + nfs_page_group_lock(req, false); 1033 + 1034 + /* find the last request */ 1035 + for (lastreq = req->wb_head; 1036 + lastreq->wb_this_page != req->wb_head; 1037 + lastreq = lastreq->wb_this_page) 1038 + ; 1039 + 1040 + dupreq = nfs_create_request(req->wb_context, 1041 + req->wb_page, lastreq, pgbase, bytes); 1042 + 1043 + if (IS_ERR(dupreq)) { 1044 + nfs_page_group_unlock(req); 1045 + return 0; 1046 + } 1047 + 1048 + nfs_lock_request(dupreq); 1049 + nfs_page_group_unlock(req); 1050 + dupreq->wb_offset = offset; 1051 + dupreq->wb_index = req->wb_index; 1052 + } else 1053 + dupreq = req; 1054 + 1055 + if (nfs_pgio_has_mirroring(desc)) 1056 + desc->pg_mirror_idx = midx; 1057 + if (!nfs_pageio_add_request_mirror(desc, dupreq)) 1058 + return 0; 1059 + } 1060 + 1061 + return 1; 1062 + } 1063 + 1064 + /* 1065 + * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an 1066 + * nfs_pageio_descriptor 1067 + * @desc: pointer to io descriptor 1068 + */ 1069 + static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, 1070 + u32 mirror_idx) 1071 + { 1072 + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; 1073 + u32 restore_idx = desc->pg_mirror_idx; 1074 + 1075 + if (nfs_pgio_has_mirroring(desc)) 1076 + desc->pg_mirror_idx = mirror_idx; 1077 + for (;;) { 1078 + nfs_pageio_doio(desc); 1079 + if (!mirror->pg_recoalesce) 1080 + break; 1081 + if (!nfs_do_recoalesce(desc)) 1082 + break; 1083 + } 1084 + desc->pg_mirror_idx = restore_idx; 1131 1085 } 1132 1086 1133 1087 /* ··· 1234 1046 EXPORT_SYMBOL_GPL(nfs_pageio_resend); 1235 1047 1236 1048 /** 1237 - * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor 1049 + * nfs_pageio_complete - Complete I/O then cleanup an nfs_pageio_descriptor 1238 1050 * @desc: pointer to io descriptor 1239 1051 */ 1240 1052 void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) 1241 1053 { 1242 - for (;;) { 1243 - nfs_pageio_doio(desc); 1244 - if (!desc->pg_recoalesce) 1245 - break; 1246 - if (!nfs_do_recoalesce(desc)) 1247 - break; 1248 - } 1054 + u32 midx; 1055 + 1056 + for (midx = 0; midx < desc->pg_mirror_count; midx++) 1057 + nfs_pageio_complete_mirror(desc, midx); 1058 + 1059 + if (desc->pg_ops->pg_cleanup) 1060 + desc->pg_ops->pg_cleanup(desc); 1061 + nfs_pageio_cleanup_mirroring(desc); 1249 1062 } 1250 1063 1251 1064 /** ··· 1262 1073 */ 1263 1074 void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) 1264 1075 { 1265 - if (!list_empty(&desc->pg_list)) { 1266 - struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); 1267 - if (index != prev->wb_index + 1) 1268 - nfs_pageio_complete(desc); 1076 + struct nfs_pgio_mirror *mirror; 1077 + struct nfs_page *prev; 1078 + u32 midx; 1079 + 1080 + for (midx = 0; midx < desc->pg_mirror_count; midx++) { 1081 + mirror = &desc->pg_mirrors[midx]; 1082 + if (!list_empty(&mirror->pg_list)) { 1083 + prev = nfs_list_entry(mirror->pg_list.prev); 1084 + if (index != prev->wb_index + 1) 1085 + nfs_pageio_complete_mirror(desc, midx); 1086 + } 1269 1087 } 1270 1088 } 1271 1089
+380 -89
fs/nfs/pnfs.c
··· 51 51 */ 52 52 static LIST_HEAD(pnfs_modules_tbl); 53 53 54 + static int 55 + pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, 56 + enum pnfs_iomode iomode, bool sync); 57 + 54 58 /* Return the registered pnfs layout driver module matching given id */ 55 59 static struct pnfs_layoutdriver_type * 56 60 find_pnfs_driver_locked(u32 id) ··· 243 239 struct inode *inode = lo->plh_inode; 244 240 245 241 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 242 + if (!list_empty(&lo->plh_segs)) 243 + WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); 246 244 pnfs_detach_layout_hdr(lo); 247 245 spin_unlock(&inode->i_lock); 248 246 pnfs_free_layout_hdr(lo); ··· 344 338 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 345 339 } 346 340 341 + /* Return true if layoutreturn is needed */ 342 + static bool 343 + pnfs_layout_need_return(struct pnfs_layout_hdr *lo, 344 + struct pnfs_layout_segment *lseg) 345 + { 346 + struct pnfs_layout_segment *s; 347 + 348 + if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) 349 + return false; 350 + 351 + list_for_each_entry(s, &lo->plh_segs, pls_list) 352 + if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) 353 + return false; 354 + 355 + return true; 356 + } 357 + 358 + static void pnfs_layoutreturn_free_lseg(struct work_struct *work) 359 + { 360 + struct pnfs_layout_segment *lseg; 361 + struct pnfs_layout_hdr *lo; 362 + struct inode *inode; 363 + 364 + lseg = container_of(work, struct pnfs_layout_segment, pls_work); 365 + WARN_ON(atomic_read(&lseg->pls_refcount)); 366 + lo = lseg->pls_layout; 367 + inode = lo->plh_inode; 368 + 369 + spin_lock(&inode->i_lock); 370 + if (pnfs_layout_need_return(lo, lseg)) { 371 + nfs4_stateid stateid; 372 + enum pnfs_iomode iomode; 373 + 374 + stateid = lo->plh_stateid; 375 + iomode = lo->plh_return_iomode; 376 + /* decreased in pnfs_send_layoutreturn() */ 377 + lo->plh_block_lgets++; 378 + lo->plh_return_iomode = 0; 379 + spin_unlock(&inode->i_lock); 380 + 381 + pnfs_send_layoutreturn(lo, stateid, iomode, true); 382 + spin_lock(&inode->i_lock); 383 + } else 384 + /* match pnfs_get_layout_hdr #2 in pnfs_put_lseg */ 385 + pnfs_put_layout_hdr(lo); 386 + pnfs_layout_remove_lseg(lo, lseg); 387 + spin_unlock(&inode->i_lock); 388 + pnfs_free_lseg(lseg); 389 + /* match pnfs_get_layout_hdr #1 in pnfs_put_lseg */ 390 + pnfs_put_layout_hdr(lo); 391 + } 392 + 393 + static void 394 + pnfs_layoutreturn_free_lseg_async(struct pnfs_layout_segment *lseg) 395 + { 396 + INIT_WORK(&lseg->pls_work, pnfs_layoutreturn_free_lseg); 397 + queue_work(nfsiod_workqueue, &lseg->pls_work); 398 + } 399 + 347 400 void 348 401 pnfs_put_lseg(struct pnfs_layout_segment *lseg) 349 402 { ··· 419 354 inode = lo->plh_inode; 420 355 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 421 356 pnfs_get_layout_hdr(lo); 422 - pnfs_layout_remove_lseg(lo, lseg); 423 - spin_unlock(&inode->i_lock); 424 - pnfs_free_lseg(lseg); 425 - pnfs_put_layout_hdr(lo); 357 + if (pnfs_layout_need_return(lo, lseg)) { 358 + spin_unlock(&inode->i_lock); 359 + /* hdr reference dropped in nfs4_layoutreturn_release */ 360 + pnfs_get_layout_hdr(lo); 361 + pnfs_layoutreturn_free_lseg_async(lseg); 362 + } else { 363 + pnfs_layout_remove_lseg(lo, lseg); 364 + spin_unlock(&inode->i_lock); 365 + pnfs_free_lseg(lseg); 366 + pnfs_put_layout_hdr(lo); 367 + } 426 368 } 427 369 } 428 370 EXPORT_SYMBOL_GPL(pnfs_put_lseg); ··· 616 544 pnfs_get_layout_hdr(lo); 617 545 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 618 546 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 547 + pnfs_clear_retry_layoutget(lo); 619 548 spin_unlock(&nfsi->vfs_inode.i_lock); 620 549 pnfs_free_lseg_list(&tmp_list); 621 550 pnfs_put_layout_hdr(lo); ··· 814 741 return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); 815 742 } 816 743 744 + static bool 745 + pnfs_layout_returning(const struct pnfs_layout_hdr *lo, 746 + struct pnfs_layout_range *range) 747 + { 748 + return test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && 749 + (lo->plh_return_iomode == IOMODE_ANY || 750 + lo->plh_return_iomode == range->iomode); 751 + } 752 + 817 753 /* lget is set to 1 if called from inside send_layoutget call chain */ 818 754 static bool 819 - pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) 755 + pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, 756 + struct pnfs_layout_range *range, int lget) 820 757 { 821 758 return lo->plh_block_lgets || 822 759 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 823 760 (list_empty(&lo->plh_segs) && 824 - (atomic_read(&lo->plh_outstanding) > lget)); 761 + (atomic_read(&lo->plh_outstanding) > lget)) || 762 + pnfs_layout_returning(lo, range); 825 763 } 826 764 827 765 int 828 766 pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, 767 + struct pnfs_layout_range *range, 829 768 struct nfs4_state *open_state) 830 769 { 831 770 int status = 0; 832 771 833 772 dprintk("--> %s\n", __func__); 834 773 spin_lock(&lo->plh_inode->i_lock); 835 - if (pnfs_layoutgets_blocked(lo, 1)) { 774 + if (pnfs_layoutgets_blocked(lo, range, 1)) { 836 775 status = -EAGAIN; 837 776 } else if (!nfs4_valid_open_stateid(open_state)) { 838 777 status = -EBADF; ··· 911 826 pnfs_layout_io_set_failed(lo, range->iomode); 912 827 } 913 828 return NULL; 914 - } 829 + } else 830 + pnfs_layout_clear_fail_bit(lo, 831 + pnfs_iomode_to_fail_bit(range->iomode)); 915 832 916 833 return lseg; 917 834 } ··· 933 846 } 934 847 } 935 848 849 + void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) 850 + { 851 + clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); 852 + smp_mb__after_atomic(); 853 + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); 854 + } 855 + 856 + static int 857 + pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, 858 + enum pnfs_iomode iomode, bool sync) 859 + { 860 + struct inode *ino = lo->plh_inode; 861 + struct nfs4_layoutreturn *lrp; 862 + int status = 0; 863 + 864 + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); 865 + if (unlikely(lrp == NULL)) { 866 + status = -ENOMEM; 867 + spin_lock(&ino->i_lock); 868 + lo->plh_block_lgets--; 869 + pnfs_clear_layoutreturn_waitbit(lo); 870 + rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); 871 + spin_unlock(&ino->i_lock); 872 + pnfs_put_layout_hdr(lo); 873 + goto out; 874 + } 875 + 876 + lrp->args.stateid = stateid; 877 + lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 878 + lrp->args.inode = ino; 879 + lrp->args.range.iomode = iomode; 880 + lrp->args.range.offset = 0; 881 + lrp->args.range.length = NFS4_MAX_UINT64; 882 + lrp->args.layout = lo; 883 + lrp->clp = NFS_SERVER(ino)->nfs_client; 884 + lrp->cred = lo->plh_lc_cred; 885 + 886 + status = nfs4_proc_layoutreturn(lrp, sync); 887 + out: 888 + dprintk("<-- %s status: %d\n", __func__, status); 889 + return status; 890 + } 891 + 936 892 /* 937 893 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 938 894 * when the layout segment list is empty. ··· 990 860 struct pnfs_layout_hdr *lo = NULL; 991 861 struct nfs_inode *nfsi = NFS_I(ino); 992 862 LIST_HEAD(tmp_list); 993 - struct nfs4_layoutreturn *lrp; 994 863 nfs4_stateid stateid; 995 864 int status = 0, empty; 996 865 ··· 1031 902 spin_unlock(&ino->i_lock); 1032 903 pnfs_free_lseg_list(&tmp_list); 1033 904 1034 - lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); 1035 - if (unlikely(lrp == NULL)) { 1036 - status = -ENOMEM; 1037 - spin_lock(&ino->i_lock); 1038 - lo->plh_block_lgets--; 1039 - spin_unlock(&ino->i_lock); 1040 - pnfs_put_layout_hdr(lo); 1041 - goto out; 1042 - } 1043 - 1044 - lrp->args.stateid = stateid; 1045 - lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 1046 - lrp->args.inode = ino; 1047 - lrp->args.layout = lo; 1048 - lrp->clp = NFS_SERVER(ino)->nfs_client; 1049 - lrp->cred = lo->plh_lc_cred; 1050 - 1051 - status = nfs4_proc_layoutreturn(lrp); 905 + status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); 1052 906 out: 1053 907 dprintk("<-- %s status: %d\n", __func__, status); 1054 908 return status; ··· 1072 960 struct nfs4_state *state; 1073 961 struct pnfs_layout_hdr *lo; 1074 962 struct pnfs_layout_segment *lseg, *tmp; 963 + nfs4_stateid stateid; 1075 964 LIST_HEAD(tmp_list); 1076 - bool found = false; 965 + bool found = false, layoutreturn = false; 1077 966 1078 967 spin_lock(&ino->i_lock); 1079 968 lo = nfsi->layout; ··· 1093 980 goto out_noroc; 1094 981 } 1095 982 983 + goto out_noroc; 984 + pnfs_clear_retry_layoutget(lo); 1096 985 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 1097 986 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 1098 987 mark_lseg_invalid(lseg, &tmp_list); ··· 1109 994 return true; 1110 995 1111 996 out_noroc: 997 + if (lo) { 998 + stateid = lo->plh_stateid; 999 + layoutreturn = 1000 + test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1001 + &lo->plh_flags); 1002 + if (layoutreturn) { 1003 + lo->plh_block_lgets++; 1004 + pnfs_get_layout_hdr(lo); 1005 + } 1006 + } 1112 1007 spin_unlock(&ino->i_lock); 1008 + if (layoutreturn) 1009 + pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); 1113 1010 return false; 1114 1011 } 1115 1012 ··· 1156 1029 struct nfs_inode *nfsi = NFS_I(ino); 1157 1030 struct pnfs_layout_hdr *lo; 1158 1031 struct pnfs_layout_segment *lseg; 1032 + nfs4_stateid stateid; 1159 1033 u32 current_seqid; 1160 - bool found = false; 1034 + bool found = false, layoutreturn = false; 1161 1035 1162 1036 spin_lock(&ino->i_lock); 1163 1037 list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) ··· 1175 1047 */ 1176 1048 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 1177 1049 out: 1050 + if (!found) { 1051 + stateid = lo->plh_stateid; 1052 + layoutreturn = 1053 + test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1054 + &lo->plh_flags); 1055 + if (layoutreturn) { 1056 + lo->plh_block_lgets++; 1057 + pnfs_get_layout_hdr(lo); 1058 + } 1059 + } 1178 1060 spin_unlock(&ino->i_lock); 1061 + if (layoutreturn) { 1062 + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); 1063 + pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); 1064 + } 1179 1065 return found; 1180 1066 } 1181 1067 ··· 1336 1194 1337 1195 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 1338 1196 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 1197 + !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && 1339 1198 pnfs_lseg_range_match(&lseg->pls_range, range)) { 1340 1199 ret = pnfs_get_lseg(lseg); 1341 1200 break; ··· 1425 1282 return ret; 1426 1283 } 1427 1284 1285 + /* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ 1286 + static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key) 1287 + { 1288 + if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags)) 1289 + return 1; 1290 + return nfs_wait_bit_killable(key); 1291 + } 1292 + 1293 + static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) 1294 + { 1295 + /* 1296 + * send layoutcommit as it can hold up layoutreturn due to lseg 1297 + * reference 1298 + */ 1299 + pnfs_layoutcommit_inode(lo->plh_inode, false); 1300 + return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, 1301 + pnfs_layoutget_retry_bit_wait, 1302 + TASK_UNINTERRUPTIBLE); 1303 + } 1304 + 1305 + static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) 1306 + { 1307 + unsigned long *bitlock = &lo->plh_flags; 1308 + 1309 + clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); 1310 + smp_mb__after_atomic(); 1311 + wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); 1312 + } 1313 + 1428 1314 /* 1429 1315 * Layout segment is retreived from the server if not cached. 1430 1316 * The appropriate layout segment is referenced and returned to the caller. ··· 1484 1312 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) 1485 1313 goto out; 1486 1314 1315 + lookup_again: 1316 + first = false; 1487 1317 spin_lock(&ino->i_lock); 1488 1318 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1489 1319 if (lo == NULL) { ··· 1500 1326 } 1501 1327 1502 1328 /* if LAYOUTGET already failed once we don't try again */ 1503 - if (pnfs_layout_io_test_failed(lo, iomode)) 1329 + if (pnfs_layout_io_test_failed(lo, iomode) && 1330 + !pnfs_should_retry_layoutget(lo)) 1504 1331 goto out_unlock; 1505 1332 1506 - /* Check to see if the layout for the given range already exists */ 1507 - lseg = pnfs_find_lseg(lo, &arg); 1508 - if (lseg) 1509 - goto out_unlock; 1333 + first = list_empty(&lo->plh_segs); 1334 + if (first) { 1335 + /* The first layoutget for the file. Need to serialize per 1336 + * RFC 5661 Errata 3208. 1337 + */ 1338 + if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, 1339 + &lo->plh_flags)) { 1340 + spin_unlock(&ino->i_lock); 1341 + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, 1342 + TASK_UNINTERRUPTIBLE); 1343 + pnfs_put_layout_hdr(lo); 1344 + goto lookup_again; 1345 + } 1346 + } else { 1347 + /* Check to see if the layout for the given range 1348 + * already exists 1349 + */ 1350 + lseg = pnfs_find_lseg(lo, &arg); 1351 + if (lseg) 1352 + goto out_unlock; 1353 + } 1510 1354 1511 - if (pnfs_layoutgets_blocked(lo, 0)) 1355 + /* 1356 + * Because we free lsegs before sending LAYOUTRETURN, we need to wait 1357 + * for LAYOUTRETURN even if first is true. 1358 + */ 1359 + if (!lseg && pnfs_should_retry_layoutget(lo) && 1360 + test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { 1361 + spin_unlock(&ino->i_lock); 1362 + dprintk("%s wait for layoutreturn\n", __func__); 1363 + if (pnfs_prepare_to_retry_layoutget(lo)) { 1364 + if (first) 1365 + pnfs_clear_first_layoutget(lo); 1366 + pnfs_put_layout_hdr(lo); 1367 + dprintk("%s retrying\n", __func__); 1368 + goto lookup_again; 1369 + } 1370 + goto out_put_layout_hdr; 1371 + } 1372 + 1373 + if (pnfs_layoutgets_blocked(lo, &arg, 0)) 1512 1374 goto out_unlock; 1513 1375 atomic_inc(&lo->plh_outstanding); 1514 - 1515 - first = list_empty(&lo->plh_layouts) ? true : false; 1516 1376 spin_unlock(&ino->i_lock); 1517 1377 1518 - if (first) { 1378 + if (list_empty(&lo->plh_layouts)) { 1519 1379 /* The lo must be on the clp list if there is any 1520 1380 * chance of a CB_LAYOUTRECALL(FILE) coming in. 1521 1381 */ 1522 1382 spin_lock(&clp->cl_lock); 1523 - list_add_tail(&lo->plh_layouts, &server->layouts); 1383 + if (list_empty(&lo->plh_layouts)) 1384 + list_add_tail(&lo->plh_layouts, &server->layouts); 1524 1385 spin_unlock(&clp->cl_lock); 1525 1386 } 1526 1387 ··· 1568 1359 arg.length = PAGE_CACHE_ALIGN(arg.length); 1569 1360 1570 1361 lseg = send_layoutget(lo, ctx, &arg, gfp_flags); 1362 + pnfs_clear_retry_layoutget(lo); 1571 1363 atomic_dec(&lo->plh_outstanding); 1572 1364 out_put_layout_hdr: 1365 + if (first) 1366 + pnfs_clear_first_layoutget(lo); 1573 1367 pnfs_put_layout_hdr(lo); 1574 1368 out: 1575 1369 dprintk("%s: inode %s/%llu pNFS layout segment %s for " ··· 1621 1409 goto out_forget_reply; 1622 1410 } 1623 1411 1624 - if (pnfs_layoutgets_blocked(lo, 1)) { 1412 + if (pnfs_layoutgets_blocked(lo, &lgp->args.range, 1)) { 1625 1413 dprintk("%s forget reply due to state\n", __func__); 1626 1414 goto out_forget_reply; 1627 1415 } ··· 1668 1456 goto out; 1669 1457 } 1670 1458 1459 + static void 1460 + pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1461 + struct list_head *tmp_list, 1462 + struct pnfs_layout_range *return_range) 1463 + { 1464 + struct pnfs_layout_segment *lseg, *next; 1465 + 1466 + dprintk("%s:Begin lo %p\n", __func__, lo); 1467 + 1468 + if (list_empty(&lo->plh_segs)) 1469 + return; 1470 + 1471 + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 1472 + if (should_free_lseg(&lseg->pls_range, return_range)) { 1473 + dprintk("%s: marking lseg %p iomode %d " 1474 + "offset %llu length %llu\n", __func__, 1475 + lseg, lseg->pls_range.iomode, 1476 + lseg->pls_range.offset, 1477 + lseg->pls_range.length); 1478 + set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1479 + mark_lseg_invalid(lseg, tmp_list); 1480 + } 1481 + } 1482 + 1483 + void pnfs_error_mark_layout_for_return(struct inode *inode, 1484 + struct pnfs_layout_segment *lseg) 1485 + { 1486 + struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; 1487 + int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode); 1488 + struct pnfs_layout_range range = { 1489 + .iomode = lseg->pls_range.iomode, 1490 + .offset = 0, 1491 + .length = NFS4_MAX_UINT64, 1492 + }; 1493 + LIST_HEAD(free_me); 1494 + 1495 + spin_lock(&inode->i_lock); 1496 + /* set failure bit so that pnfs path will be retried later */ 1497 + pnfs_layout_set_fail_bit(lo, iomode); 1498 + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); 1499 + if (lo->plh_return_iomode == 0) 1500 + lo->plh_return_iomode = range.iomode; 1501 + else if (lo->plh_return_iomode != range.iomode) 1502 + lo->plh_return_iomode = IOMODE_ANY; 1503 + /* 1504 + * mark all matching lsegs so that we are sure to have no live 1505 + * segments at hand when sending layoutreturn. See pnfs_put_lseg() 1506 + * for how it works. 1507 + */ 1508 + pnfs_mark_matching_lsegs_return(lo, &free_me, &range); 1509 + spin_unlock(&inode->i_lock); 1510 + pnfs_free_lseg_list(&free_me); 1511 + } 1512 + EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 1513 + 1671 1514 void 1672 1515 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1673 1516 { 1674 1517 u64 rd_size = req->wb_bytes; 1675 1518 1676 - WARN_ON_ONCE(pgio->pg_lseg != NULL); 1519 + if (pgio->pg_lseg == NULL) { 1520 + if (pgio->pg_dreq == NULL) 1521 + rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1522 + else 1523 + rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1677 1524 1678 - if (pgio->pg_dreq == NULL) 1679 - rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1680 - else 1681 - rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1682 - 1683 - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1684 - req->wb_context, 1685 - req_offset(req), 1686 - rd_size, 1687 - IOMODE_READ, 1688 - GFP_KERNEL); 1525 + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1526 + req->wb_context, 1527 + req_offset(req), 1528 + rd_size, 1529 + IOMODE_READ, 1530 + GFP_KERNEL); 1531 + } 1689 1532 /* If no lseg, fall back to read through mds */ 1690 1533 if (pgio->pg_lseg == NULL) 1691 1534 nfs_pageio_reset_read_mds(pgio); ··· 1752 1485 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 1753 1486 struct nfs_page *req, u64 wb_size) 1754 1487 { 1755 - WARN_ON_ONCE(pgio->pg_lseg != NULL); 1756 - 1757 - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1758 - req->wb_context, 1759 - req_offset(req), 1760 - wb_size, 1761 - IOMODE_RW, 1762 - GFP_NOFS); 1488 + if (pgio->pg_lseg == NULL) 1489 + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1490 + req->wb_context, 1491 + req_offset(req), 1492 + wb_size, 1493 + IOMODE_RW, 1494 + GFP_NOFS); 1763 1495 /* If no lseg, fall back to write through mds */ 1764 1496 if (pgio->pg_lseg == NULL) 1765 1497 nfs_pageio_reset_write_mds(pgio); 1766 1498 } 1767 1499 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1768 1500 1501 + void 1502 + pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) 1503 + { 1504 + if (desc->pg_lseg) { 1505 + pnfs_put_lseg(desc->pg_lseg); 1506 + desc->pg_lseg = NULL; 1507 + } 1508 + } 1509 + EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); 1510 + 1769 1511 /* 1770 1512 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 1771 1513 * of bytes (maximum @req->wb_bytes) that can be coalesced. 1772 1514 */ 1773 1515 size_t 1774 - pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1775 - struct nfs_page *req) 1516 + pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, 1517 + struct nfs_page *prev, struct nfs_page *req) 1776 1518 { 1777 1519 unsigned int size; 1778 1520 u64 seg_end, req_start, seg_left; ··· 1805 1529 seg_end = end_offset(pgio->pg_lseg->pls_range.offset, 1806 1530 pgio->pg_lseg->pls_range.length); 1807 1531 req_start = req_offset(req); 1808 - WARN_ON_ONCE(req_start > seg_end); 1532 + WARN_ON_ONCE(req_start >= seg_end); 1809 1533 /* start of request is past the last byte of this segment */ 1810 - if (req_start >= seg_end) 1534 + if (req_start >= seg_end) { 1535 + /* reference the new lseg */ 1536 + if (pgio->pg_ops->pg_cleanup) 1537 + pgio->pg_ops->pg_cleanup(pgio); 1538 + if (pgio->pg_ops->pg_init) 1539 + pgio->pg_ops->pg_init(pgio, req); 1811 1540 return 0; 1541 + } 1812 1542 1813 1543 /* adjust 'size' iff there are fewer bytes left in the 1814 1544 * segment than what nfs_generic_pg_test returned */ ··· 1869 1587 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1870 1588 struct nfs_pgio_header *hdr) 1871 1589 { 1590 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 1591 + 1872 1592 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1873 - list_splice_tail_init(&hdr->pages, &desc->pg_list); 1593 + list_splice_tail_init(&hdr->pages, &mirror->pg_list); 1874 1594 nfs_pageio_reset_write_mds(desc); 1875 - desc->pg_recoalesce = 1; 1595 + mirror->pg_recoalesce = 1; 1876 1596 } 1877 1597 nfs_pgio_data_destroy(hdr); 1878 1598 } ··· 1908 1624 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1909 1625 enum pnfs_try_status trypnfs; 1910 1626 1911 - desc->pg_lseg = NULL; 1912 1627 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); 1913 1628 if (trypnfs == PNFS_NOT_ATTEMPTED) 1914 1629 pnfs_write_through_mds(desc, hdr); 1915 - pnfs_put_lseg(lseg); 1916 1630 } 1917 1631 1918 1632 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) ··· 1923 1641 int 1924 1642 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1925 1643 { 1644 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 1645 + 1926 1646 struct nfs_pgio_header *hdr; 1927 1647 int ret; 1928 1648 1929 1649 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 1930 1650 if (!hdr) { 1931 - desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1932 - pnfs_put_lseg(desc->pg_lseg); 1933 - desc->pg_lseg = NULL; 1651 + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); 1934 1652 return -ENOMEM; 1935 1653 } 1936 1654 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 1655 + 1937 1656 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1938 1657 ret = nfs_generic_pgio(desc, hdr); 1939 - if (ret != 0) { 1940 - pnfs_put_lseg(desc->pg_lseg); 1941 - desc->pg_lseg = NULL; 1942 - } else 1658 + if (!ret) 1943 1659 pnfs_do_write(desc, hdr, desc->pg_ioflags); 1660 + 1944 1661 return ret; 1945 1662 } 1946 1663 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); ··· 1984 1703 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1985 1704 struct nfs_pgio_header *hdr) 1986 1705 { 1706 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 1707 + 1987 1708 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1988 - list_splice_tail_init(&hdr->pages, &desc->pg_list); 1709 + list_splice_tail_init(&hdr->pages, &mirror->pg_list); 1989 1710 nfs_pageio_reset_read_mds(desc); 1990 - desc->pg_recoalesce = 1; 1711 + mirror->pg_recoalesce = 1; 1991 1712 } 1992 1713 nfs_pgio_data_destroy(hdr); 1993 1714 } ··· 2018 1735 return trypnfs; 2019 1736 } 2020 1737 1738 + /* Resend all requests through pnfs. */ 1739 + int pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) 1740 + { 1741 + struct nfs_pageio_descriptor pgio; 1742 + 1743 + nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); 1744 + return nfs_pageio_resend(&pgio, hdr); 1745 + } 1746 + EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); 1747 + 2021 1748 static void 2022 1749 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 2023 1750 { 2024 1751 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2025 1752 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2026 1753 enum pnfs_try_status trypnfs; 1754 + int err = 0; 2027 1755 2028 - desc->pg_lseg = NULL; 2029 1756 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); 2030 - if (trypnfs == PNFS_NOT_ATTEMPTED) 1757 + if (trypnfs == PNFS_TRY_AGAIN) 1758 + err = pnfs_read_resend_pnfs(hdr); 1759 + if (trypnfs == PNFS_NOT_ATTEMPTED || err) 2031 1760 pnfs_read_through_mds(desc, hdr); 2032 - pnfs_put_lseg(lseg); 2033 1761 } 2034 1762 2035 1763 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) ··· 2053 1759 int 2054 1760 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 2055 1761 { 1762 + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 1763 + 2056 1764 struct nfs_pgio_header *hdr; 2057 1765 int ret; 2058 1766 2059 1767 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2060 1768 if (!hdr) { 2061 - desc->pg_completion_ops->error_cleanup(&desc->pg_list); 2062 - ret = -ENOMEM; 2063 - pnfs_put_lseg(desc->pg_lseg); 2064 - desc->pg_lseg = NULL; 2065 - return ret; 1769 + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); 1770 + return -ENOMEM; 2066 1771 } 2067 1772 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 2068 1773 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2069 1774 ret = nfs_generic_pgio(desc, hdr); 2070 - if (ret != 0) { 2071 - pnfs_put_lseg(desc->pg_lseg); 2072 - desc->pg_lseg = NULL; 2073 - } else 1775 + if (!ret) 2074 1776 pnfs_do_read(desc, hdr); 2075 1777 return ret; 2076 1778 } ··· 2272 1982 pnfs_clear_layoutcommitting(inode); 2273 1983 goto out; 2274 1984 } 1985 + EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); 2275 1986 2276 1987 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 2277 1988 {
+105 -20
fs/nfs/pnfs.h
··· 38 38 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 39 39 NFS_LSEG_ROC, /* roc bit received from server */ 40 40 NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ 41 + NFS_LSEG_LAYOUTRETURN, /* layoutreturn bit set for layoutreturn */ 42 + }; 43 + 44 + /* Individual ip address */ 45 + struct nfs4_pnfs_ds_addr { 46 + struct sockaddr_storage da_addr; 47 + size_t da_addrlen; 48 + struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ 49 + char *da_remotestr; /* human readable addr+port */ 50 + }; 51 + 52 + struct nfs4_pnfs_ds { 53 + struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ 54 + char *ds_remotestr; /* comma sep list of addrs */ 55 + struct list_head ds_addrs; 56 + struct nfs_client *ds_clp; 57 + atomic_t ds_count; 58 + unsigned long ds_state; 59 + #define NFS4DS_CONNECTING 0 /* ds is establishing connection */ 41 60 }; 42 61 43 62 struct pnfs_layout_segment { ··· 72 53 enum pnfs_try_status { 73 54 PNFS_ATTEMPTED = 0, 74 55 PNFS_NOT_ATTEMPTED = 1, 56 + PNFS_TRY_AGAIN = 2, 75 57 }; 76 58 77 59 #ifdef CONFIG_NFS_V4_1 78 60 79 61 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" 62 + 63 + /* 64 + * Default data server connection timeout and retrans vaules. 65 + * Set by module parameters dataserver_timeo and dataserver_retrans. 66 + */ 67 + #define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ 68 + #define NFS4_DEF_DS_RETRANS 5 69 + 70 + /* error codes for internal use */ 71 + #define NFS4ERR_RESET_TO_MDS 12001 72 + #define NFS4ERR_RESET_TO_PNFS 12002 80 73 81 74 enum { 82 75 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ ··· 96 65 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ 97 66 NFS_LAYOUT_ROC, /* some lseg had roc bit set */ 98 67 NFS_LAYOUT_RETURN, /* Return this layout ASAP */ 68 + NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ 99 69 NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ 70 + NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ 71 + NFS_LAYOUT_RETRY_LAYOUTGET, /* Retry layoutget */ 100 72 }; 101 73 102 74 enum layoutdriver_policy_flags { ··· 140 106 struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode); 141 107 void (*mark_request_commit) (struct nfs_page *req, 142 108 struct pnfs_layout_segment *lseg, 143 - struct nfs_commit_info *cinfo); 109 + struct nfs_commit_info *cinfo, 110 + u32 ds_commit_idx); 144 111 void (*clear_request_commit) (struct nfs_page *req, 145 112 struct nfs_commit_info *cinfo); 146 113 int (*scan_commit_lists) (struct nfs_commit_info *cinfo, ··· 189 154 u32 plh_barrier; /* ignore lower seqids */ 190 155 unsigned long plh_retry_timestamp; 191 156 unsigned long plh_flags; 157 + enum pnfs_iomode plh_return_iomode; 192 158 loff_t plh_lwb; /* last write byte for layoutcommit */ 193 159 struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ 194 160 struct inode *plh_inode; ··· 221 185 struct pnfs_device *dev, 222 186 struct rpc_cred *cred); 223 187 extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); 224 - extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); 188 + extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); 225 189 226 190 /* pnfs.c */ 227 191 void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); ··· 234 198 int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); 235 199 void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 236 200 struct nfs_page *req, u64 wb_size); 201 + void pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *); 237 202 int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); 238 203 size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, 239 204 struct nfs_page *prev, struct nfs_page *req); ··· 254 217 bool update_barrier); 255 218 int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, 256 219 struct pnfs_layout_hdr *lo, 220 + struct pnfs_layout_range *range, 257 221 struct nfs4_state *open_state); 258 222 int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 259 223 struct list_head *tmp_list, ··· 271 233 int pnfs_commit_and_return_layout(struct inode *); 272 234 void pnfs_ld_write_done(struct nfs_pgio_header *); 273 235 void pnfs_ld_read_done(struct nfs_pgio_header *); 236 + int pnfs_read_resend_pnfs(struct nfs_pgio_header *); 274 237 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 275 238 struct nfs_open_context *ctx, 276 239 loff_t pos, 277 240 u64 count, 278 241 enum pnfs_iomode iomode, 279 242 gfp_t gfp_flags); 243 + void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo); 280 244 281 245 void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); 282 246 int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); 283 247 int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); 284 248 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); 249 + void pnfs_error_mark_layout_for_return(struct inode *inode, 250 + struct pnfs_layout_segment *lseg); 285 251 286 252 /* nfs4_deviceid_flags */ 287 253 enum { ··· 317 275 bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); 318 276 void nfs4_deviceid_purge_client(const struct nfs_client *); 319 277 278 + /* pnfs_nfs.c */ 279 + void pnfs_generic_clear_request_commit(struct nfs_page *req, 280 + struct nfs_commit_info *cinfo); 281 + void pnfs_generic_commit_release(void *calldata); 282 + void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data); 283 + void pnfs_generic_rw_release(void *data); 284 + void pnfs_generic_recover_commit_reqs(struct list_head *dst, 285 + struct nfs_commit_info *cinfo); 286 + int pnfs_generic_commit_pagelist(struct inode *inode, 287 + struct list_head *mds_pages, 288 + int how, 289 + struct nfs_commit_info *cinfo, 290 + int (*initiate_commit)(struct nfs_commit_data *data, 291 + int how)); 292 + int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max); 293 + void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); 294 + void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); 295 + struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, 296 + gfp_t gfp_flags); 297 + void nfs4_pnfs_v3_ds_connect_unload(void); 298 + void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, 299 + struct nfs4_deviceid_node *devid, unsigned int timeo, 300 + unsigned int retrans, u32 version, u32 minor_version, 301 + rpc_authflavor_t au_flavor); 302 + struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, 303 + struct xdr_stream *xdr, 304 + gfp_t gfp_flags); 305 + 320 306 static inline bool nfs_have_layout(struct inode *inode) 321 307 { 322 308 return NFS_I(inode)->layout != NULL; ··· 355 285 { 356 286 atomic_inc(&d->ref); 357 287 return d; 288 + } 289 + 290 + static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo) 291 + { 292 + if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) 293 + atomic_inc(&lo->plh_refcount); 294 + } 295 + 296 + static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo) 297 + { 298 + if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) { 299 + atomic_dec(&lo->plh_refcount); 300 + /* wake up waiters for LAYOUTRETURN as that is not needed */ 301 + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); 302 + } 303 + } 304 + 305 + static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo) 306 + { 307 + return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags); 358 308 } 359 309 360 310 static inline struct pnfs_layout_segment * ··· 412 322 return ld->get_ds_info(inode); 413 323 } 414 324 325 + static inline void 326 + pnfs_generic_mark_devid_invalid(struct nfs4_deviceid_node *node) 327 + { 328 + set_bit(NFS_DEVICEID_INVALID, &node->flags); 329 + } 330 + 415 331 static inline bool 416 332 pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, 417 - struct nfs_commit_info *cinfo) 333 + struct nfs_commit_info *cinfo, u32 ds_commit_idx) 418 334 { 419 335 struct inode *inode = req->wb_context->dentry->d_inode; 420 336 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 421 337 422 338 if (lseg == NULL || ld->mark_request_commit == NULL) 423 339 return false; 424 - ld->mark_request_commit(req, lseg, cinfo); 340 + ld->mark_request_commit(req, lseg, cinfo, ds_commit_idx); 425 341 return true; 426 342 } 427 343 ··· 451 355 return 0; 452 356 else 453 357 return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); 454 - } 455 - 456 - static inline void 457 - pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, 458 - struct nfs_commit_info *cinfo) 459 - { 460 - if (cinfo->ds == NULL || cinfo->ds->nwritten == 0) 461 - return; 462 - NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); 463 358 } 464 359 465 360 static inline struct nfs_page * ··· 610 523 611 524 static inline bool 612 525 pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, 613 - struct nfs_commit_info *cinfo) 526 + struct nfs_commit_info *cinfo, u32 ds_commit_idx) 614 527 { 615 528 return false; 616 529 } ··· 626 539 int max) 627 540 { 628 541 return 0; 629 - } 630 - 631 - static inline void 632 - pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, 633 - struct nfs_commit_info *cinfo) 634 - { 635 542 } 636 543 637 544 static inline struct nfs_page * ··· 657 576 static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 658 577 { 659 578 return NULL; 579 + } 580 + 581 + static inline void nfs4_pnfs_v3_ds_connect_unload(void) 582 + { 660 583 } 661 584 662 585 #endif /* CONFIG_NFS_V4_1 */
+840
fs/nfs/pnfs_nfs.c
··· 1 + /* 2 + * Common NFS I/O operations for the pnfs file based 3 + * layout drivers. 4 + * 5 + * Copyright (c) 2014, Primary Data, Inc. All rights reserved. 6 + * 7 + * Tom Haynes <loghyr@primarydata.com> 8 + */ 9 + 10 + #include <linux/nfs_fs.h> 11 + #include <linux/nfs_page.h> 12 + #include <linux/sunrpc/addr.h> 13 + #include <linux/module.h> 14 + 15 + #include "nfs4session.h" 16 + #include "internal.h" 17 + #include "pnfs.h" 18 + 19 + #define NFSDBG_FACILITY NFSDBG_PNFS 20 + 21 + void pnfs_generic_rw_release(void *data) 22 + { 23 + struct nfs_pgio_header *hdr = data; 24 + 25 + nfs_put_client(hdr->ds_clp); 26 + hdr->mds_ops->rpc_release(data); 27 + } 28 + EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); 29 + 30 + /* Fake up some data that will cause nfs_commit_release to retry the writes. */ 31 + void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) 32 + { 33 + struct nfs_page *first = nfs_list_entry(data->pages.next); 34 + 35 + data->task.tk_status = 0; 36 + memcpy(&data->verf.verifier, &first->wb_verf, 37 + sizeof(data->verf.verifier)); 38 + data->verf.verifier.data[0]++; /* ensure verifier mismatch */ 39 + } 40 + EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); 41 + 42 + void pnfs_generic_write_commit_done(struct rpc_task *task, void *data) 43 + { 44 + struct nfs_commit_data *wdata = data; 45 + 46 + /* Note this may cause RPC to be resent */ 47 + wdata->mds_ops->rpc_call_done(task, data); 48 + } 49 + EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done); 50 + 51 + void pnfs_generic_commit_release(void *calldata) 52 + { 53 + struct nfs_commit_data *data = calldata; 54 + 55 + data->completion_ops->completion(data); 56 + pnfs_put_lseg(data->lseg); 57 + nfs_put_client(data->ds_clp); 58 + nfs_commitdata_release(data); 59 + } 60 + EXPORT_SYMBOL_GPL(pnfs_generic_commit_release); 61 + 62 + /* The generic layer is about to remove the req from the commit list. 63 + * If this will make the bucket empty, it will need to put the lseg reference. 64 + * Note this must be called holding the inode (/cinfo) lock 65 + */ 66 + void 67 + pnfs_generic_clear_request_commit(struct nfs_page *req, 68 + struct nfs_commit_info *cinfo) 69 + { 70 + struct pnfs_layout_segment *freeme = NULL; 71 + 72 + if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 73 + goto out; 74 + cinfo->ds->nwritten--; 75 + if (list_is_singular(&req->wb_list)) { 76 + struct pnfs_commit_bucket *bucket; 77 + 78 + bucket = list_first_entry(&req->wb_list, 79 + struct pnfs_commit_bucket, 80 + written); 81 + freeme = bucket->wlseg; 82 + bucket->wlseg = NULL; 83 + } 84 + out: 85 + nfs_request_remove_commit_list(req, cinfo); 86 + pnfs_put_lseg_locked(freeme); 87 + } 88 + EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); 89 + 90 + static int 91 + pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, 92 + struct nfs_commit_info *cinfo, int max) 93 + { 94 + struct nfs_page *req, *tmp; 95 + int ret = 0; 96 + 97 + list_for_each_entry_safe(req, tmp, src, wb_list) { 98 + if (!nfs_lock_request(req)) 99 + continue; 100 + kref_get(&req->wb_kref); 101 + if (cond_resched_lock(cinfo->lock)) 102 + list_safe_reset_next(req, tmp, wb_list); 103 + nfs_request_remove_commit_list(req, cinfo); 104 + clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); 105 + nfs_list_add_request(req, dst); 106 + ret++; 107 + if ((ret == max) && !cinfo->dreq) 108 + break; 109 + } 110 + return ret; 111 + } 112 + 113 + static int 114 + pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, 115 + struct nfs_commit_info *cinfo, 116 + int max) 117 + { 118 + struct list_head *src = &bucket->written; 119 + struct list_head *dst = &bucket->committing; 120 + int ret; 121 + 122 + lockdep_assert_held(cinfo->lock); 123 + ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); 124 + if (ret) { 125 + cinfo->ds->nwritten -= ret; 126 + cinfo->ds->ncommitting += ret; 127 + bucket->clseg = bucket->wlseg; 128 + if (list_empty(src)) 129 + bucket->wlseg = NULL; 130 + else 131 + pnfs_get_lseg(bucket->clseg); 132 + } 133 + return ret; 134 + } 135 + 136 + /* Move reqs from written to committing lists, returning count 137 + * of number moved. 138 + */ 139 + int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, 140 + int max) 141 + { 142 + int i, rv = 0, cnt; 143 + 144 + lockdep_assert_held(cinfo->lock); 145 + for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { 146 + cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], 147 + cinfo, max); 148 + max -= cnt; 149 + rv += cnt; 150 + } 151 + return rv; 152 + } 153 + EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists); 154 + 155 + /* Pull everything off the committing lists and dump into @dst. */ 156 + void pnfs_generic_recover_commit_reqs(struct list_head *dst, 157 + struct nfs_commit_info *cinfo) 158 + { 159 + struct pnfs_commit_bucket *b; 160 + struct pnfs_layout_segment *freeme; 161 + int i; 162 + 163 + lockdep_assert_held(cinfo->lock); 164 + restart: 165 + for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { 166 + if (pnfs_generic_transfer_commit_list(&b->written, dst, 167 + cinfo, 0)) { 168 + freeme = b->wlseg; 169 + b->wlseg = NULL; 170 + spin_unlock(cinfo->lock); 171 + pnfs_put_lseg(freeme); 172 + spin_lock(cinfo->lock); 173 + goto restart; 174 + } 175 + } 176 + cinfo->ds->nwritten = 0; 177 + } 178 + EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); 179 + 180 + static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) 181 + { 182 + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; 183 + struct pnfs_commit_bucket *bucket; 184 + struct pnfs_layout_segment *freeme; 185 + int i; 186 + 187 + for (i = idx; i < fl_cinfo->nbuckets; i++) { 188 + bucket = &fl_cinfo->buckets[i]; 189 + if (list_empty(&bucket->committing)) 190 + continue; 191 + nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo, i); 192 + spin_lock(cinfo->lock); 193 + freeme = bucket->clseg; 194 + bucket->clseg = NULL; 195 + spin_unlock(cinfo->lock); 196 + pnfs_put_lseg(freeme); 197 + } 198 + } 199 + 200 + static unsigned int 201 + pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, 202 + struct list_head *list) 203 + { 204 + struct pnfs_ds_commit_info *fl_cinfo; 205 + struct pnfs_commit_bucket *bucket; 206 + struct nfs_commit_data *data; 207 + int i; 208 + unsigned int nreq = 0; 209 + 210 + fl_cinfo = cinfo->ds; 211 + bucket = fl_cinfo->buckets; 212 + for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { 213 + if (list_empty(&bucket->committing)) 214 + continue; 215 + data = nfs_commitdata_alloc(); 216 + if (!data) 217 + break; 218 + data->ds_commit_index = i; 219 + spin_lock(cinfo->lock); 220 + data->lseg = bucket->clseg; 221 + bucket->clseg = NULL; 222 + spin_unlock(cinfo->lock); 223 + list_add(&data->pages, list); 224 + nreq++; 225 + } 226 + 227 + /* Clean up on error */ 228 + pnfs_generic_retry_commit(cinfo, i); 229 + return nreq; 230 + } 231 + 232 + /* This follows nfs_commit_list pretty closely */ 233 + int 234 + pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, 235 + int how, struct nfs_commit_info *cinfo, 236 + int (*initiate_commit)(struct nfs_commit_data *data, 237 + int how)) 238 + { 239 + struct nfs_commit_data *data, *tmp; 240 + LIST_HEAD(list); 241 + unsigned int nreq = 0; 242 + 243 + if (!list_empty(mds_pages)) { 244 + data = nfs_commitdata_alloc(); 245 + if (data != NULL) { 246 + data->lseg = NULL; 247 + list_add(&data->pages, &list); 248 + nreq++; 249 + } else { 250 + nfs_retry_commit(mds_pages, NULL, cinfo, 0); 251 + pnfs_generic_retry_commit(cinfo, 0); 252 + cinfo->completion_ops->error_cleanup(NFS_I(inode)); 253 + return -ENOMEM; 254 + } 255 + } 256 + 257 + nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); 258 + 259 + if (nreq == 0) { 260 + cinfo->completion_ops->error_cleanup(NFS_I(inode)); 261 + goto out; 262 + } 263 + 264 + atomic_add(nreq, &cinfo->mds->rpcs_out); 265 + 266 + list_for_each_entry_safe(data, tmp, &list, pages) { 267 + list_del_init(&data->pages); 268 + if (!data->lseg) { 269 + nfs_init_commit(data, mds_pages, NULL, cinfo); 270 + nfs_initiate_commit(NFS_CLIENT(inode), data, 271 + NFS_PROTO(data->inode), 272 + data->mds_ops, how, 0); 273 + } else { 274 + struct pnfs_commit_bucket *buckets; 275 + 276 + buckets = cinfo->ds->buckets; 277 + nfs_init_commit(data, 278 + &buckets[data->ds_commit_index].committing, 279 + data->lseg, 280 + cinfo); 281 + initiate_commit(data, how); 282 + } 283 + } 284 + out: 285 + cinfo->ds->ncommitting = 0; 286 + return PNFS_ATTEMPTED; 287 + } 288 + EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); 289 + 290 + /* 291 + * Data server cache 292 + * 293 + * Data servers can be mapped to different device ids. 294 + * nfs4_pnfs_ds reference counting 295 + * - set to 1 on allocation 296 + * - incremented when a device id maps a data server already in the cache. 297 + * - decremented when deviceid is removed from the cache. 298 + */ 299 + static DEFINE_SPINLOCK(nfs4_ds_cache_lock); 300 + static LIST_HEAD(nfs4_data_server_cache); 301 + 302 + /* Debug routines */ 303 + static void 304 + print_ds(struct nfs4_pnfs_ds *ds) 305 + { 306 + if (ds == NULL) { 307 + printk(KERN_WARNING "%s NULL device\n", __func__); 308 + return; 309 + } 310 + printk(KERN_WARNING " ds %s\n" 311 + " ref count %d\n" 312 + " client %p\n" 313 + " cl_exchange_flags %x\n", 314 + ds->ds_remotestr, 315 + atomic_read(&ds->ds_count), ds->ds_clp, 316 + ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); 317 + } 318 + 319 + static bool 320 + same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) 321 + { 322 + struct sockaddr_in *a, *b; 323 + struct sockaddr_in6 *a6, *b6; 324 + 325 + if (addr1->sa_family != addr2->sa_family) 326 + return false; 327 + 328 + switch (addr1->sa_family) { 329 + case AF_INET: 330 + a = (struct sockaddr_in *)addr1; 331 + b = (struct sockaddr_in *)addr2; 332 + 333 + if (a->sin_addr.s_addr == b->sin_addr.s_addr && 334 + a->sin_port == b->sin_port) 335 + return true; 336 + break; 337 + 338 + case AF_INET6: 339 + a6 = (struct sockaddr_in6 *)addr1; 340 + b6 = (struct sockaddr_in6 *)addr2; 341 + 342 + /* LINKLOCAL addresses must have matching scope_id */ 343 + if (ipv6_addr_src_scope(&a6->sin6_addr) == 344 + IPV6_ADDR_SCOPE_LINKLOCAL && 345 + a6->sin6_scope_id != b6->sin6_scope_id) 346 + return false; 347 + 348 + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && 349 + a6->sin6_port == b6->sin6_port) 350 + return true; 351 + break; 352 + 353 + default: 354 + dprintk("%s: unhandled address family: %u\n", 355 + __func__, addr1->sa_family); 356 + return false; 357 + } 358 + 359 + return false; 360 + } 361 + 362 + static bool 363 + _same_data_server_addrs_locked(const struct list_head *dsaddrs1, 364 + const struct list_head *dsaddrs2) 365 + { 366 + struct nfs4_pnfs_ds_addr *da1, *da2; 367 + 368 + /* step through both lists, comparing as we go */ 369 + for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), 370 + da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); 371 + da1 != NULL && da2 != NULL; 372 + da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), 373 + da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { 374 + if (!same_sockaddr((struct sockaddr *)&da1->da_addr, 375 + (struct sockaddr *)&da2->da_addr)) 376 + return false; 377 + } 378 + if (da1 == NULL && da2 == NULL) 379 + return true; 380 + 381 + return false; 382 + } 383 + 384 + /* 385 + * Lookup DS by addresses. nfs4_ds_cache_lock is held 386 + */ 387 + static struct nfs4_pnfs_ds * 388 + _data_server_lookup_locked(const struct list_head *dsaddrs) 389 + { 390 + struct nfs4_pnfs_ds *ds; 391 + 392 + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) 393 + if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) 394 + return ds; 395 + return NULL; 396 + } 397 + 398 + static void destroy_ds(struct nfs4_pnfs_ds *ds) 399 + { 400 + struct nfs4_pnfs_ds_addr *da; 401 + 402 + dprintk("--> %s\n", __func__); 403 + ifdebug(FACILITY) 404 + print_ds(ds); 405 + 406 + nfs_put_client(ds->ds_clp); 407 + 408 + while (!list_empty(&ds->ds_addrs)) { 409 + da = list_first_entry(&ds->ds_addrs, 410 + struct nfs4_pnfs_ds_addr, 411 + da_node); 412 + list_del_init(&da->da_node); 413 + kfree(da->da_remotestr); 414 + kfree(da); 415 + } 416 + 417 + kfree(ds->ds_remotestr); 418 + kfree(ds); 419 + } 420 + 421 + void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) 422 + { 423 + if (atomic_dec_and_lock(&ds->ds_count, 424 + &nfs4_ds_cache_lock)) { 425 + list_del_init(&ds->ds_node); 426 + spin_unlock(&nfs4_ds_cache_lock); 427 + destroy_ds(ds); 428 + } 429 + } 430 + EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put); 431 + 432 + /* 433 + * Create a string with a human readable address and port to avoid 434 + * complicated setup around many dprinks. 435 + */ 436 + static char * 437 + nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) 438 + { 439 + struct nfs4_pnfs_ds_addr *da; 440 + char *remotestr; 441 + size_t len; 442 + char *p; 443 + 444 + len = 3; /* '{', '}' and eol */ 445 + list_for_each_entry(da, dsaddrs, da_node) { 446 + len += strlen(da->da_remotestr) + 1; /* string plus comma */ 447 + } 448 + 449 + remotestr = kzalloc(len, gfp_flags); 450 + if (!remotestr) 451 + return NULL; 452 + 453 + p = remotestr; 454 + *(p++) = '{'; 455 + len--; 456 + list_for_each_entry(da, dsaddrs, da_node) { 457 + size_t ll = strlen(da->da_remotestr); 458 + 459 + if (ll > len) 460 + goto out_err; 461 + 462 + memcpy(p, da->da_remotestr, ll); 463 + p += ll; 464 + len -= ll; 465 + 466 + if (len < 1) 467 + goto out_err; 468 + (*p++) = ','; 469 + len--; 470 + } 471 + if (len < 2) 472 + goto out_err; 473 + *(p++) = '}'; 474 + *p = '\0'; 475 + return remotestr; 476 + out_err: 477 + kfree(remotestr); 478 + return NULL; 479 + } 480 + 481 + /* 482 + * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if 483 + * uncached and return cached struct nfs4_pnfs_ds. 484 + */ 485 + struct nfs4_pnfs_ds * 486 + nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) 487 + { 488 + struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; 489 + char *remotestr; 490 + 491 + if (list_empty(dsaddrs)) { 492 + dprintk("%s: no addresses defined\n", __func__); 493 + goto out; 494 + } 495 + 496 + ds = kzalloc(sizeof(*ds), gfp_flags); 497 + if (!ds) 498 + goto out; 499 + 500 + /* this is only used for debugging, so it's ok if its NULL */ 501 + remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); 502 + 503 + spin_lock(&nfs4_ds_cache_lock); 504 + tmp_ds = _data_server_lookup_locked(dsaddrs); 505 + if (tmp_ds == NULL) { 506 + INIT_LIST_HEAD(&ds->ds_addrs); 507 + list_splice_init(dsaddrs, &ds->ds_addrs); 508 + ds->ds_remotestr = remotestr; 509 + atomic_set(&ds->ds_count, 1); 510 + INIT_LIST_HEAD(&ds->ds_node); 511 + ds->ds_clp = NULL; 512 + list_add(&ds->ds_node, &nfs4_data_server_cache); 513 + dprintk("%s add new data server %s\n", __func__, 514 + ds->ds_remotestr); 515 + } else { 516 + kfree(remotestr); 517 + kfree(ds); 518 + atomic_inc(&tmp_ds->ds_count); 519 + dprintk("%s data server %s found, inc'ed ds_count to %d\n", 520 + __func__, tmp_ds->ds_remotestr, 521 + atomic_read(&tmp_ds->ds_count)); 522 + ds = tmp_ds; 523 + } 524 + spin_unlock(&nfs4_ds_cache_lock); 525 + out: 526 + return ds; 527 + } 528 + EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add); 529 + 530 + static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) 531 + { 532 + might_sleep(); 533 + wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, 534 + TASK_KILLABLE); 535 + } 536 + 537 + static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) 538 + { 539 + smp_mb__before_atomic(); 540 + clear_bit(NFS4DS_CONNECTING, &ds->ds_state); 541 + smp_mb__after_atomic(); 542 + wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); 543 + } 544 + 545 + static struct nfs_client *(*get_v3_ds_connect)( 546 + struct nfs_client *mds_clp, 547 + const struct sockaddr *ds_addr, 548 + int ds_addrlen, 549 + int ds_proto, 550 + unsigned int ds_timeo, 551 + unsigned int ds_retrans, 552 + rpc_authflavor_t au_flavor); 553 + 554 + static bool load_v3_ds_connect(void) 555 + { 556 + if (!get_v3_ds_connect) { 557 + get_v3_ds_connect = symbol_request(nfs3_set_ds_client); 558 + WARN_ON_ONCE(!get_v3_ds_connect); 559 + } 560 + 561 + return(get_v3_ds_connect != NULL); 562 + } 563 + 564 + void __exit nfs4_pnfs_v3_ds_connect_unload(void) 565 + { 566 + if (get_v3_ds_connect) { 567 + symbol_put(nfs3_set_ds_client); 568 + get_v3_ds_connect = NULL; 569 + } 570 + } 571 + EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload); 572 + 573 + static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, 574 + struct nfs4_pnfs_ds *ds, 575 + unsigned int timeo, 576 + unsigned int retrans, 577 + rpc_authflavor_t au_flavor) 578 + { 579 + struct nfs_client *clp = ERR_PTR(-EIO); 580 + struct nfs4_pnfs_ds_addr *da; 581 + int status = 0; 582 + 583 + dprintk("--> %s DS %s au_flavor %d\n", __func__, 584 + ds->ds_remotestr, au_flavor); 585 + 586 + if (!load_v3_ds_connect()) 587 + goto out; 588 + 589 + list_for_each_entry(da, &ds->ds_addrs, da_node) { 590 + dprintk("%s: DS %s: trying address %s\n", 591 + __func__, ds->ds_remotestr, da->da_remotestr); 592 + 593 + clp = get_v3_ds_connect(mds_srv->nfs_client, 594 + (struct sockaddr *)&da->da_addr, 595 + da->da_addrlen, IPPROTO_TCP, 596 + timeo, retrans, au_flavor); 597 + if (!IS_ERR(clp)) 598 + break; 599 + } 600 + 601 + if (IS_ERR(clp)) { 602 + status = PTR_ERR(clp); 603 + goto out; 604 + } 605 + 606 + smp_wmb(); 607 + ds->ds_clp = clp; 608 + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); 609 + out: 610 + return status; 611 + } 612 + 613 + static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, 614 + struct nfs4_pnfs_ds *ds, 615 + unsigned int timeo, 616 + unsigned int retrans, 617 + u32 minor_version, 618 + rpc_authflavor_t au_flavor) 619 + { 620 + struct nfs_client *clp = ERR_PTR(-EIO); 621 + struct nfs4_pnfs_ds_addr *da; 622 + int status = 0; 623 + 624 + dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, 625 + au_flavor); 626 + 627 + list_for_each_entry(da, &ds->ds_addrs, da_node) { 628 + dprintk("%s: DS %s: trying address %s\n", 629 + __func__, ds->ds_remotestr, da->da_remotestr); 630 + 631 + clp = nfs4_set_ds_client(mds_srv->nfs_client, 632 + (struct sockaddr *)&da->da_addr, 633 + da->da_addrlen, IPPROTO_TCP, 634 + timeo, retrans, minor_version, 635 + au_flavor); 636 + if (!IS_ERR(clp)) 637 + break; 638 + } 639 + 640 + if (IS_ERR(clp)) { 641 + status = PTR_ERR(clp); 642 + goto out; 643 + } 644 + 645 + status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); 646 + if (status) 647 + goto out_put; 648 + 649 + smp_wmb(); 650 + ds->ds_clp = clp; 651 + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); 652 + out: 653 + return status; 654 + out_put: 655 + nfs_put_client(clp); 656 + goto out; 657 + } 658 + 659 + /* 660 + * Create an rpc connection to the nfs4_pnfs_ds data server. 661 + * Currently only supports IPv4 and IPv6 addresses. 662 + * If connection fails, make devid unavailable. 663 + */ 664 + void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, 665 + struct nfs4_deviceid_node *devid, unsigned int timeo, 666 + unsigned int retrans, u32 version, 667 + u32 minor_version, rpc_authflavor_t au_flavor) 668 + { 669 + if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { 670 + int err = 0; 671 + 672 + if (version == 3) { 673 + err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, 674 + retrans, au_flavor); 675 + } else if (version == 4) { 676 + err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, 677 + retrans, minor_version, 678 + au_flavor); 679 + } else { 680 + dprintk("%s: unsupported DS version %d\n", __func__, 681 + version); 682 + err = -EPROTONOSUPPORT; 683 + } 684 + 685 + if (err) 686 + nfs4_mark_deviceid_unavailable(devid); 687 + nfs4_clear_ds_conn_bit(ds); 688 + } else { 689 + nfs4_wait_ds_connect(ds); 690 + } 691 + } 692 + EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); 693 + 694 + /* 695 + * Currently only supports ipv4, ipv6 and one multi-path address. 696 + */ 697 + struct nfs4_pnfs_ds_addr * 698 + nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) 699 + { 700 + struct nfs4_pnfs_ds_addr *da = NULL; 701 + char *buf, *portstr; 702 + __be16 port; 703 + int nlen, rlen; 704 + int tmp[2]; 705 + __be32 *p; 706 + char *netid, *match_netid; 707 + size_t len, match_netid_len; 708 + char *startsep = ""; 709 + char *endsep = ""; 710 + 711 + 712 + /* r_netid */ 713 + p = xdr_inline_decode(xdr, 4); 714 + if (unlikely(!p)) 715 + goto out_err; 716 + nlen = be32_to_cpup(p++); 717 + 718 + p = xdr_inline_decode(xdr, nlen); 719 + if (unlikely(!p)) 720 + goto out_err; 721 + 722 + netid = kmalloc(nlen+1, gfp_flags); 723 + if (unlikely(!netid)) 724 + goto out_err; 725 + 726 + netid[nlen] = '\0'; 727 + memcpy(netid, p, nlen); 728 + 729 + /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ 730 + p = xdr_inline_decode(xdr, 4); 731 + if (unlikely(!p)) 732 + goto out_free_netid; 733 + rlen = be32_to_cpup(p); 734 + 735 + p = xdr_inline_decode(xdr, rlen); 736 + if (unlikely(!p)) 737 + goto out_free_netid; 738 + 739 + /* port is ".ABC.DEF", 8 chars max */ 740 + if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { 741 + dprintk("%s: Invalid address, length %d\n", __func__, 742 + rlen); 743 + goto out_free_netid; 744 + } 745 + buf = kmalloc(rlen + 1, gfp_flags); 746 + if (!buf) { 747 + dprintk("%s: Not enough memory\n", __func__); 748 + goto out_free_netid; 749 + } 750 + buf[rlen] = '\0'; 751 + memcpy(buf, p, rlen); 752 + 753 + /* replace port '.' with '-' */ 754 + portstr = strrchr(buf, '.'); 755 + if (!portstr) { 756 + dprintk("%s: Failed finding expected dot in port\n", 757 + __func__); 758 + goto out_free_buf; 759 + } 760 + *portstr = '-'; 761 + 762 + /* find '.' between address and port */ 763 + portstr = strrchr(buf, '.'); 764 + if (!portstr) { 765 + dprintk("%s: Failed finding expected dot between address and " 766 + "port\n", __func__); 767 + goto out_free_buf; 768 + } 769 + *portstr = '\0'; 770 + 771 + da = kzalloc(sizeof(*da), gfp_flags); 772 + if (unlikely(!da)) 773 + goto out_free_buf; 774 + 775 + INIT_LIST_HEAD(&da->da_node); 776 + 777 + if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, 778 + sizeof(da->da_addr))) { 779 + dprintk("%s: error parsing address %s\n", __func__, buf); 780 + goto out_free_da; 781 + } 782 + 783 + portstr++; 784 + sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); 785 + port = htons((tmp[0] << 8) | (tmp[1])); 786 + 787 + switch (da->da_addr.ss_family) { 788 + case AF_INET: 789 + ((struct sockaddr_in *)&da->da_addr)->sin_port = port; 790 + da->da_addrlen = sizeof(struct sockaddr_in); 791 + match_netid = "tcp"; 792 + match_netid_len = 3; 793 + break; 794 + 795 + case AF_INET6: 796 + ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; 797 + da->da_addrlen = sizeof(struct sockaddr_in6); 798 + match_netid = "tcp6"; 799 + match_netid_len = 4; 800 + startsep = "["; 801 + endsep = "]"; 802 + break; 803 + 804 + default: 805 + dprintk("%s: unsupported address family: %u\n", 806 + __func__, da->da_addr.ss_family); 807 + goto out_free_da; 808 + } 809 + 810 + if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { 811 + dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", 812 + __func__, netid, match_netid); 813 + goto out_free_da; 814 + } 815 + 816 + /* save human readable address */ 817 + len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; 818 + da->da_remotestr = kzalloc(len, gfp_flags); 819 + 820 + /* NULL is ok, only used for dprintk */ 821 + if (da->da_remotestr) 822 + snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, 823 + buf, endsep, ntohs(port)); 824 + 825 + dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); 826 + kfree(buf); 827 + kfree(netid); 828 + return da; 829 + 830 + out_free_da: 831 + kfree(da); 832 + out_free_buf: 833 + dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); 834 + kfree(buf); 835 + out_free_netid: 836 + kfree(netid); 837 + out_err: 838 + return NULL; 839 + } 840 + EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr);
+27 -6
fs/nfs/read.c
··· 70 70 71 71 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) 72 72 { 73 + struct nfs_pgio_mirror *mirror; 74 + 73 75 pgio->pg_ops = &nfs_pgio_rw_ops; 74 - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; 76 + 77 + /* read path should never have more than one mirror */ 78 + WARN_ON_ONCE(pgio->pg_mirror_count != 1); 79 + 80 + mirror = &pgio->pg_mirrors[0]; 81 + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; 75 82 } 76 83 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 77 84 ··· 88 81 struct nfs_page *new; 89 82 unsigned int len; 90 83 struct nfs_pageio_descriptor pgio; 84 + struct nfs_pgio_mirror *pgm; 91 85 92 86 len = nfs_page_length(page); 93 87 if (len == 0) ··· 105 97 &nfs_async_read_completion_ops); 106 98 nfs_pageio_add_request(&pgio, new); 107 99 nfs_pageio_complete(&pgio); 108 - NFS_I(inode)->read_io += pgio.pg_bytes_written; 100 + 101 + /* It doesn't make sense to do mirrored reads! */ 102 + WARN_ON_ONCE(pgio.pg_mirror_count != 1); 103 + 104 + pgm = &pgio.pg_mirrors[0]; 105 + NFS_I(inode)->read_io += pgm->pg_bytes_written; 106 + 109 107 return 0; 110 108 } 111 109 ··· 182 168 183 169 static void nfs_initiate_read(struct nfs_pgio_header *hdr, 184 170 struct rpc_message *msg, 171 + const struct nfs_rpc_ops *rpc_ops, 185 172 struct rpc_task_setup *task_setup_data, int how) 186 173 { 187 174 struct inode *inode = hdr->inode; 188 175 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 189 176 190 177 task_setup_data->flags |= swap_flags; 191 - NFS_PROTO(inode)->read_setup(hdr, msg); 178 + rpc_ops->read_setup(hdr, msg); 192 179 } 193 180 194 181 static void ··· 366 351 struct list_head *pages, unsigned nr_pages) 367 352 { 368 353 struct nfs_pageio_descriptor pgio; 354 + struct nfs_pgio_mirror *pgm; 369 355 struct nfs_readdesc desc = { 370 356 .pgio = &pgio, 371 357 }; ··· 402 386 &nfs_async_read_completion_ops); 403 387 404 388 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 405 - 406 389 nfs_pageio_complete(&pgio); 407 - NFS_I(inode)->read_io += pgio.pg_bytes_written; 408 - npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 390 + 391 + /* It doesn't make sense to do mirrored reads! */ 392 + WARN_ON_ONCE(pgio.pg_mirror_count != 1); 393 + 394 + pgm = &pgio.pg_mirrors[0]; 395 + NFS_I(inode)->read_io += pgm->pg_bytes_written; 396 + npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >> 397 + PAGE_CACHE_SHIFT; 409 398 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 410 399 read_complete: 411 400 put_nfs_open_context(desc.ctx);
+33 -19
fs/nfs/write.c
··· 473 473 do { 474 474 /* 475 475 * Subrequests are always contiguous, non overlapping 476 - * and in order. If not, it's a programming error. 476 + * and in order - but may be repeated (mirrored writes). 477 477 */ 478 - WARN_ON_ONCE(subreq->wb_offset != 479 - (head->wb_offset + total_bytes)); 480 - 481 - /* keep track of how many bytes this group covers */ 482 - total_bytes += subreq->wb_bytes; 478 + if (subreq->wb_offset == (head->wb_offset + total_bytes)) { 479 + /* keep track of how many bytes this group covers */ 480 + total_bytes += subreq->wb_bytes; 481 + } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || 482 + ((subreq->wb_offset + subreq->wb_bytes) > 483 + (head->wb_offset + total_bytes)))) { 484 + nfs_page_group_unlock(head); 485 + spin_unlock(&inode->i_lock); 486 + return ERR_PTR(-EIO); 487 + } 483 488 484 489 if (!nfs_lock_request(subreq)) { 485 490 /* releases page group bit lock and ··· 847 842 */ 848 843 void 849 844 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, 850 - struct nfs_commit_info *cinfo) 845 + struct nfs_commit_info *cinfo, u32 ds_commit_idx) 851 846 { 852 - if (pnfs_mark_request_commit(req, lseg, cinfo)) 847 + if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx)) 853 848 return; 854 849 nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); 855 850 } ··· 905 900 } 906 901 if (nfs_write_need_commit(hdr)) { 907 902 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); 908 - nfs_mark_request_commit(req, hdr->lseg, &cinfo); 903 + nfs_mark_request_commit(req, hdr->lseg, &cinfo, 904 + hdr->pgio_mirror_idx); 909 905 goto next; 910 906 } 911 907 remove_req: ··· 1246 1240 1247 1241 static void nfs_initiate_write(struct nfs_pgio_header *hdr, 1248 1242 struct rpc_message *msg, 1243 + const struct nfs_rpc_ops *rpc_ops, 1249 1244 struct rpc_task_setup *task_setup_data, int how) 1250 1245 { 1251 - struct inode *inode = hdr->inode; 1252 1246 int priority = flush_task_priority(how); 1253 1247 1254 1248 task_setup_data->priority = priority; 1255 - NFS_PROTO(inode)->write_setup(hdr, msg); 1249 + rpc_ops->write_setup(hdr, msg); 1256 1250 1257 - nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, 1251 + nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, 1258 1252 &task_setup_data->rpc_client, msg, hdr); 1259 1253 } 1260 1254 ··· 1304 1298 1305 1299 void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) 1306 1300 { 1301 + struct nfs_pgio_mirror *mirror; 1302 + 1307 1303 pgio->pg_ops = &nfs_pgio_rw_ops; 1308 - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; 1304 + 1305 + nfs_pageio_stop_mirroring(pgio); 1306 + 1307 + mirror = &pgio->pg_mirrors[0]; 1308 + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; 1309 1309 } 1310 1310 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); 1311 1311 ··· 1477 1465 EXPORT_SYMBOL_GPL(nfs_commitdata_release); 1478 1466 1479 1467 int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, 1468 + const struct nfs_rpc_ops *nfs_ops, 1480 1469 const struct rpc_call_ops *call_ops, 1481 1470 int how, int flags) 1482 1471 { ··· 1499 1486 .priority = priority, 1500 1487 }; 1501 1488 /* Set up the initial task struct. */ 1502 - NFS_PROTO(data->inode)->commit_setup(data, &msg); 1489 + nfs_ops->commit_setup(data, &msg); 1503 1490 1504 1491 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1505 1492 ··· 1567 1554 1568 1555 void nfs_retry_commit(struct list_head *page_list, 1569 1556 struct pnfs_layout_segment *lseg, 1570 - struct nfs_commit_info *cinfo) 1557 + struct nfs_commit_info *cinfo, 1558 + u32 ds_commit_idx) 1571 1559 { 1572 1560 struct nfs_page *req; 1573 1561 1574 1562 while (!list_empty(page_list)) { 1575 1563 req = nfs_list_entry(page_list->next); 1576 1564 nfs_list_remove_request(req); 1577 - nfs_mark_request_commit(req, lseg, cinfo); 1565 + nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx); 1578 1566 if (!cinfo->dreq) { 1579 1567 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1580 1568 dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, ··· 1603 1589 /* Set up the argument struct */ 1604 1590 nfs_init_commit(data, head, NULL, cinfo); 1605 1591 atomic_inc(&cinfo->mds->rpcs_out); 1606 - return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, 1607 - how, 0); 1592 + return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), 1593 + data->mds_ops, how, 0); 1608 1594 out_bad: 1609 - nfs_retry_commit(head, NULL, cinfo); 1595 + nfs_retry_commit(head, NULL, cinfo, 0); 1610 1596 cinfo->completion_ops->error_cleanup(NFS_I(inode)); 1611 1597 return -ENOMEM; 1612 1598 }
+1
include/linux/nfs4.h
··· 516 516 LAYOUT_NFSV4_1_FILES = 1, 517 517 LAYOUT_OSD2_OBJECTS = 2, 518 518 LAYOUT_BLOCK_VOLUME = 3, 519 + LAYOUT_FLEX_FILES = 4, 519 520 }; 520 521 521 522 /* used for both layout return and recall */
+5 -4
include/linux/nfs_fs_sb.h
··· 77 77 /* Client owner identifier */ 78 78 const char * cl_owner_id; 79 79 80 - /* Our own IP address, as a null-terminated string. 81 - * This is used to generate the mv0 callback address. 82 - */ 83 - char cl_ipaddr[48]; 84 80 u32 cl_cb_ident; /* v4.0 callback identifier */ 85 81 const struct nfs4_minor_version_ops *cl_mvops; 86 82 unsigned long cl_mig_gen; ··· 103 107 #define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ 104 108 #define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ 105 109 #endif /* CONFIG_NFS_V4 */ 110 + 111 + /* Our own IP address, as a null-terminated string. 112 + * This is used to generate the mv0 callback address. 113 + */ 114 + char cl_ipaddr[48]; 106 115 107 116 #ifdef CONFIG_NFS_FSCACHE 108 117 struct fscache_cookie *fscache; /* client index cache cookie */
+2
include/linux/nfs_idmap.h
··· 73 73 int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t); 74 74 int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t); 75 75 76 + int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res); 77 + 76 78 extern unsigned int nfs_idmap_cache_timeout; 77 79 #endif /* NFS_IDMAP_H */
+19 -3
include/linux/nfs_page.h
··· 58 58 size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, 59 59 struct nfs_page *); 60 60 int (*pg_doio)(struct nfs_pageio_descriptor *); 61 + unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, 62 + struct nfs_page *); 63 + void (*pg_cleanup)(struct nfs_pageio_descriptor *); 61 64 }; 62 65 63 66 struct nfs_rw_ops { ··· 72 69 struct inode *); 73 70 void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); 74 71 void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *, 72 + const struct nfs_rpc_ops *, 75 73 struct rpc_task_setup *, int); 76 74 }; 77 75 78 - struct nfs_pageio_descriptor { 76 + struct nfs_pgio_mirror { 79 77 struct list_head pg_list; 80 78 unsigned long pg_bytes_written; 81 79 size_t pg_count; 82 80 size_t pg_bsize; 83 81 unsigned int pg_base; 84 - unsigned char pg_moreio : 1, 85 - pg_recoalesce : 1; 82 + unsigned char pg_recoalesce : 1; 83 + }; 86 84 85 + struct nfs_pageio_descriptor { 86 + unsigned char pg_moreio : 1; 87 87 struct inode *pg_inode; 88 88 const struct nfs_pageio_ops *pg_ops; 89 89 const struct nfs_rw_ops *pg_rw_ops; ··· 97 91 struct pnfs_layout_segment *pg_lseg; 98 92 struct nfs_direct_req *pg_dreq; 99 93 void *pg_layout_private; 94 + unsigned int pg_bsize; /* default bsize for mirrors */ 95 + 96 + u32 pg_mirror_count; 97 + struct nfs_pgio_mirror *pg_mirrors; 98 + struct nfs_pgio_mirror pg_mirrors_static[1]; 99 + struct nfs_pgio_mirror *pg_mirrors_dynamic; 100 + u32 pg_mirror_idx; /* current mirror */ 100 101 }; 102 + 103 + /* arbitrarily selected limit to number of mirrors */ 104 + #define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 101 105 102 106 #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) 103 107
+5 -1
include/linux/nfs_xdr.h
··· 293 293 struct nfs4_sequence_args seq_args; 294 294 struct pnfs_layout_hdr *layout; 295 295 struct inode *inode; 296 + struct pnfs_layout_range range; 296 297 nfs4_stateid stateid; 297 298 __u32 layout_type; 298 299 }; ··· 515 514 struct nfs4_sequence_res seq_res; 516 515 struct nfs_fattr * fattr; 517 516 __u32 count; 517 + __u32 op_status; 518 518 int eof; /* used by read */ 519 519 struct nfs_writeverf * verf; /* used by write */ 520 520 const struct nfs_server *server; /* used by write */ ··· 535 533 536 534 struct nfs_commitres { 537 535 struct nfs4_sequence_res seq_res; 536 + __u32 op_status; 538 537 struct nfs_fattr *fattr; 539 538 struct nfs_writeverf *verf; 540 539 const struct nfs_server *server; ··· 1329 1326 __u64 mds_offset; /* Filelayout dense stripe */ 1330 1327 struct nfs_page_array page_array; 1331 1328 struct nfs_client *ds_clp; /* pNFS data server */ 1332 - int ds_idx; /* ds index if ds_clp is set */ 1329 + int ds_commit_idx; /* ds index if ds_clp is set */ 1330 + int pgio_mirror_idx;/* mirror index in pgio layer */ 1333 1331 }; 1334 1332 1335 1333 struct nfs_mds_commit_info {
+4
include/linux/sunrpc/metrics.h
··· 79 79 struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *); 80 80 void rpc_count_iostats(const struct rpc_task *, 81 81 struct rpc_iostats *); 82 + void rpc_count_iostats_metrics(const struct rpc_task *, 83 + struct rpc_iostats *); 82 84 void rpc_print_iostats(struct seq_file *, struct rpc_clnt *); 83 85 void rpc_free_iostats(struct rpc_iostats *); 84 86 ··· 89 87 static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; } 90 88 static inline void rpc_count_iostats(const struct rpc_task *task, 91 89 struct rpc_iostats *stats) {} 90 + static inline void rpc_count_iostats_metrics(const struct rpc_task *, 91 + struct rpc_iostats *) {} 92 92 static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {} 93 93 static inline void rpc_free_iostats(struct rpc_iostats *stats) {} 94 94
+19 -7
net/sunrpc/stats.c
··· 140 140 EXPORT_SYMBOL_GPL(rpc_free_iostats); 141 141 142 142 /** 143 - * rpc_count_iostats - tally up per-task stats 143 + * rpc_count_iostats_metrics - tally up per-task stats 144 144 * @task: completed rpc_task 145 - * @stats: array of stat structures 145 + * @op_metrics: stat structure for OP that will accumulate stats from @task 146 146 */ 147 - void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) 147 + void rpc_count_iostats_metrics(const struct rpc_task *task, 148 + struct rpc_iostats *op_metrics) 148 149 { 149 150 struct rpc_rqst *req = task->tk_rqstp; 150 - struct rpc_iostats *op_metrics; 151 151 ktime_t delta, now; 152 152 153 - if (!stats || !req) 153 + if (!op_metrics || !req) 154 154 return; 155 155 156 156 now = ktime_get(); 157 - op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; 158 - 159 157 spin_lock(&op_metrics->om_lock); 160 158 161 159 op_metrics->om_ops++; ··· 172 174 op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta); 173 175 174 176 spin_unlock(&op_metrics->om_lock); 177 + } 178 + EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics); 179 + 180 + /** 181 + * rpc_count_iostats - tally up per-task stats 182 + * @task: completed rpc_task 183 + * @stats: array of stat structures 184 + * 185 + * Uses the statidx from @task 186 + */ 187 + void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) 188 + { 189 + rpc_count_iostats_metrics(task, 190 + &stats[task->tk_msg.rpc_proc->p_statidx]); 175 191 } 176 192 EXPORT_SYMBOL_GPL(rpc_count_iostats); 177 193