Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfs-for-4.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
"Highlights include:

Stable fixes:

- Fix a 1-byte stack overflow in nfs_idmap_read_and_verify_message

- Fix a hang due to incorrect error returns in rpcrdma_convert_iovs()

- Revert an incorrect change to the NFSv4.1 callback channel

- Fix a bug in the NFSv4.1 sequence error handling

Features and optimisations:

- Support for piggybacking a LAYOUTGET operation to the OPEN compound

- RDMA performance enhancements to deal with transport congestion

- Add proper SPDX tags for NetApp-contributed RDMA source

- Do not request delegated file attributes (size+change) from the
server

- Optimise away a GETATTR in the lookup revalidate code when doing
NFSv4 OPEN

- Optimise away unnecessary lookups for rename targets

- Misc performance improvements when freeing NFSv4 delegations

Bugfixes and cleanups:

- Try to fail quickly if proto=rdma

- Clean up RDMA receive trace points

- Fix sillyrename to return the delegation when appropriate

- Misc attribute revalidation fixes

- Immediately clear the pNFS layout on a file when the server returns
ESTALE

- Return NFS4ERR_DELAY when delegation/layout recalls fail due to
igrab()

- Fix the client behaviour on NFS4ERR_SEQ_FALSE_RETRY"

* tag 'nfs-for-4.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (80 commits)
skip LAYOUTRETURN if layout is invalid
NFSv4.1: Fix the client behaviour on NFS4ERR_SEQ_FALSE_RETRY
NFSv4: Fix a typo in nfs41_sequence_process
NFSv4: Revert commit 5f83d86cf531d ("NFSv4.x: Fix wraparound issues..")
NFSv4: Return NFS4ERR_DELAY when a layout recall fails due to igrab()
NFSv4: Return NFS4ERR_DELAY when a delegation recall fails due to igrab()
NFSv4.0: Remove transport protocol name from non-UCS client ID
NFSv4.0: Remove cl_ipaddr from non-UCS client ID
NFSv4: Fix a compiler warning when CONFIG_NFS_V4_1 is undefined
NFS: Filter cache invalidation when holding a delegation
NFS: Ignore NFS_INO_REVAL_FORCED in nfs_check_inode_attributes()
NFS: Improve caching while holding a delegation
NFS: Fix attribute revalidation
NFS: fix up nfs_setattr_update_inode
NFSv4: Ensure the inode is clean when we set a delegation
NFSv4: Ignore NFS_INO_REVAL_FORCED in nfs4_proc_access
NFSv4: Don't ask for delegated attributes when adding a hard link
NFSv4: Don't ask for delegated attributes when revalidating the inode
NFS: Pass the inode down to the getattr() callback
NFSv4: Don't request size+change attribute if they are delegated to us
...

+1244 -737
+24 -19
fs/nfs/callback_proc.c
··· 40 40 rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); 41 41 42 42 inode = nfs_delegation_find_inode(cps->clp, &args->fh); 43 - if (inode == NULL) { 43 + if (IS_ERR(inode)) { 44 + if (inode == ERR_PTR(-EAGAIN)) 45 + res->status = htonl(NFS4ERR_DELAY); 44 46 trace_nfs4_cb_getattr(cps->clp, &args->fh, NULL, 45 47 -ntohl(res->status)); 46 48 goto out; ··· 88 86 89 87 res = htonl(NFS4ERR_BADHANDLE); 90 88 inode = nfs_delegation_find_inode(cps->clp, &args->fh); 91 - if (inode == NULL) { 89 + if (IS_ERR(inode)) { 90 + if (inode == ERR_PTR(-EAGAIN)) 91 + res = htonl(NFS4ERR_DELAY); 92 92 trace_nfs4_cb_recall(cps->clp, &args->fh, NULL, 93 93 &args->stateid, -ntohl(res)); 94 94 goto out; ··· 128 124 struct inode *inode; 129 125 struct pnfs_layout_hdr *lo; 130 126 131 - restart: 132 127 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 133 128 list_for_each_entry(lo, &server->layouts, plh_layouts) { 134 129 if (stateid != NULL && ··· 135 132 continue; 136 133 inode = igrab(lo->plh_inode); 137 134 if (!inode) 138 - continue; 135 + return ERR_PTR(-EAGAIN); 139 136 if (!nfs_sb_active(inode->i_sb)) { 140 137 rcu_read_unlock(); 141 138 spin_unlock(&clp->cl_lock); 142 139 iput(inode); 143 140 spin_lock(&clp->cl_lock); 144 141 rcu_read_lock(); 145 - goto restart; 142 + return ERR_PTR(-EAGAIN); 146 143 } 147 144 return inode; 148 145 } 149 146 } 150 147 151 - return NULL; 148 + return ERR_PTR(-ENOENT); 152 149 } 153 150 154 151 /* ··· 165 162 struct inode *inode; 166 163 struct pnfs_layout_hdr *lo; 167 164 168 - restart: 169 165 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 170 166 list_for_each_entry(lo, &server->layouts, plh_layouts) { 171 167 nfsi = NFS_I(lo->plh_inode); ··· 174 172 continue; 175 173 inode = igrab(lo->plh_inode); 176 174 if (!inode) 177 - continue; 175 + return ERR_PTR(-EAGAIN); 178 176 if (!nfs_sb_active(inode->i_sb)) { 179 177 rcu_read_unlock(); 180 178 spin_unlock(&clp->cl_lock); 181 179 iput(inode); 182 180 spin_lock(&clp->cl_lock); 183 181 rcu_read_lock(); 184 - goto restart; 182 + return ERR_PTR(-EAGAIN); 185 183 } 186 184 return inode; 187 185 } 188 186 } 189 187 190 - return NULL; 188 + return ERR_PTR(-ENOENT); 191 189 } 192 190 193 191 static struct inode *nfs_layout_find_inode(struct nfs_client *clp, ··· 199 197 spin_lock(&clp->cl_lock); 200 198 rcu_read_lock(); 201 199 inode = nfs_layout_find_inode_by_stateid(clp, stateid); 202 - if (!inode) 200 + if (inode == ERR_PTR(-ENOENT)) 203 201 inode = nfs_layout_find_inode_by_fh(clp, fh); 204 202 rcu_read_unlock(); 205 203 spin_unlock(&clp->cl_lock); ··· 254 252 LIST_HEAD(free_me_list); 255 253 256 254 ino = nfs_layout_find_inode(clp, &args->cbl_fh, &args->cbl_stateid); 257 - if (!ino) 258 - goto out; 255 + if (IS_ERR(ino)) { 256 + if (ino == ERR_PTR(-EAGAIN)) 257 + rv = NFS4ERR_DELAY; 258 + goto out_noput; 259 + } 259 260 260 261 pnfs_layoutcommit_inode(ino, false); 261 262 ··· 304 299 nfs_commit_inode(ino, 0); 305 300 pnfs_put_layout_hdr(lo); 306 301 out: 302 + nfs_iput_and_deactive(ino); 303 + out_noput: 307 304 trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, 308 305 &args->cbl_stateid, -rv); 309 - nfs_iput_and_deactive(ino); 310 306 return rv; 311 307 } 312 308 ··· 328 322 static u32 do_callback_layoutrecall(struct nfs_client *clp, 329 323 struct cb_layoutrecallargs *args) 330 324 { 325 + write_seqcount_begin(&clp->cl_callback_count); 326 + write_seqcount_end(&clp->cl_callback_count); 331 327 if (args->cbl_recall_type == RETURN_FILE) 332 328 return initiate_file_draining(clp, args); 333 329 return initiate_bulk_draining(clp, args); ··· 428 420 return htonl(NFS4ERR_SEQ_FALSE_RETRY); 429 421 } 430 422 431 - /* Wraparound */ 432 - if (unlikely(slot->seq_nr == 0xFFFFFFFFU)) { 433 - if (args->csa_sequenceid == 1) 434 - return htonl(NFS4_OK); 435 - } else if (likely(args->csa_sequenceid == slot->seq_nr + 1)) 423 + /* Note: wraparound relies on seq_nr being of type u32 */ 424 + if (likely(args->csa_sequenceid == slot->seq_nr + 1)) 436 425 return htonl(NFS4_OK); 437 426 438 427 /* Misordered request */
+2 -1
fs/nfs/client.c
··· 969 969 } 970 970 971 971 if (!(fattr->valid & NFS_ATTR_FATTR)) { 972 - error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr, NULL); 972 + error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, 973 + fattr, NULL, NULL); 973 974 if (error < 0) { 974 975 dprintk("nfs_create_server: getattr error = %d\n", -error); 975 976 goto error;
+72 -14
fs/nfs/delegation.c
··· 404 404 405 405 trace_nfs4_set_delegation(inode, type); 406 406 407 + spin_lock(&inode->i_lock); 408 + if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) 409 + NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED; 410 + spin_unlock(&inode->i_lock); 407 411 out: 408 412 spin_unlock(&clp->cl_lock); 409 413 if (delegation != NULL) ··· 487 483 int nfs_client_return_marked_delegations(struct nfs_client *clp) 488 484 { 489 485 struct nfs_delegation *delegation; 486 + struct nfs_delegation *prev; 490 487 struct nfs_server *server; 491 488 struct inode *inode; 489 + struct inode *place_holder = NULL; 490 + struct nfs_delegation *place_holder_deleg = NULL; 492 491 int err = 0; 493 492 494 493 restart: 494 + /* 495 + * To avoid quadratic looping we hold a reference 496 + * to an inode place_holder. Each time we restart, we 497 + * list nfs_servers from the server of that inode, and 498 + * delegation in the server from the delegations of that 499 + * inode. 500 + * prev is an RCU-protected pointer to a delegation which 501 + * wasn't marked for return and might be a good choice for 502 + * the next place_holder. 503 + */ 495 504 rcu_read_lock(); 496 - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 497 - list_for_each_entry_rcu(delegation, &server->delegations, 498 - super_list) { 499 - if (!nfs_delegation_need_return(delegation)) 505 + prev = NULL; 506 + if (place_holder) 507 + server = NFS_SERVER(place_holder); 508 + else 509 + server = list_entry_rcu(clp->cl_superblocks.next, 510 + struct nfs_server, client_link); 511 + list_for_each_entry_from_rcu(server, &clp->cl_superblocks, client_link) { 512 + delegation = NULL; 513 + if (place_holder && server == NFS_SERVER(place_holder)) 514 + delegation = rcu_dereference(NFS_I(place_holder)->delegation); 515 + if (!delegation || delegation != place_holder_deleg) 516 + delegation = list_entry_rcu(server->delegations.next, 517 + struct nfs_delegation, super_list); 518 + list_for_each_entry_from_rcu(delegation, &server->delegations, super_list) { 519 + struct inode *to_put = NULL; 520 + 521 + if (!nfs_delegation_need_return(delegation)) { 522 + prev = delegation; 500 523 continue; 524 + } 501 525 if (!nfs_sb_active(server->super)) 502 - continue; 526 + break; /* continue in outer loop */ 527 + 528 + if (prev) { 529 + struct inode *tmp; 530 + 531 + tmp = nfs_delegation_grab_inode(prev); 532 + if (tmp) { 533 + to_put = place_holder; 534 + place_holder = tmp; 535 + place_holder_deleg = prev; 536 + } 537 + } 538 + 503 539 inode = nfs_delegation_grab_inode(delegation); 504 540 if (inode == NULL) { 505 541 rcu_read_unlock(); 542 + if (to_put) 543 + iput(to_put); 506 544 nfs_sb_deactive(server->super); 507 545 goto restart; 508 546 } 509 547 delegation = nfs_start_delegation_return_locked(NFS_I(inode)); 510 548 rcu_read_unlock(); 511 549 550 + if (to_put) 551 + iput(to_put); 552 + 512 553 err = nfs_end_delegation_return(inode, delegation, 0); 513 554 iput(inode); 514 555 nfs_sb_deactive(server->super); 556 + cond_resched(); 515 557 if (!err) 516 558 goto restart; 517 559 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); 560 + if (place_holder) 561 + iput(place_holder); 518 562 return err; 519 563 } 520 564 } 521 565 rcu_read_unlock(); 566 + if (place_holder) 567 + iput(place_holder); 522 568 return 0; 523 569 } 524 570 ··· 856 802 if (delegation->inode != NULL && 857 803 nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { 858 804 res = igrab(delegation->inode); 805 + spin_unlock(&delegation->lock); 806 + if (res != NULL) 807 + return res; 808 + return ERR_PTR(-EAGAIN); 859 809 } 860 810 spin_unlock(&delegation->lock); 861 - if (res != NULL) 862 - break; 863 811 } 864 - return res; 812 + return ERR_PTR(-ENOENT); 865 813 } 866 814 867 815 /** ··· 878 822 const struct nfs_fh *fhandle) 879 823 { 880 824 struct nfs_server *server; 881 - struct inode *res = NULL; 825 + struct inode *res; 882 826 883 827 rcu_read_lock(); 884 828 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 885 829 res = nfs_delegation_find_inode_server(server, fhandle); 886 - if (res != NULL) 887 - break; 830 + if (res != ERR_PTR(-ENOENT)) 831 + return res; 888 832 } 889 833 rcu_read_unlock(); 890 - return res; 834 + return ERR_PTR(-ENOENT); 891 835 } 892 836 893 837 static void nfs_delegation_mark_reclaim_server(struct nfs_server *server) ··· 943 887 &delegation->flags) == 0) 944 888 continue; 945 889 if (!nfs_sb_active(server->super)) 946 - continue; 890 + break; /* continue in outer loop */ 947 891 inode = nfs_delegation_grab_inode(delegation); 948 892 if (inode == NULL) { 949 893 rcu_read_unlock(); ··· 960 904 } 961 905 iput(inode); 962 906 nfs_sb_deactive(server->super); 907 + cond_resched(); 963 908 goto restart; 964 909 } 965 910 } ··· 1052 995 &delegation->flags) == 0) 1053 996 continue; 1054 997 if (!nfs_sb_active(server->super)) 1055 - continue; 998 + break; /* continue in outer loop */ 1056 999 inode = nfs_delegation_grab_inode(delegation); 1057 1000 if (inode == NULL) { 1058 1001 rcu_read_unlock(); ··· 1077 1020 } 1078 1021 iput(inode); 1079 1022 nfs_sb_deactive(server->super); 1023 + cond_resched(); 1080 1024 goto restart; 1081 1025 } 1082 1026 }
+38 -13
fs/nfs/dir.c
··· 1012 1012 1013 1013 if (IS_AUTOMOUNT(inode)) 1014 1014 return 0; 1015 + 1016 + if (flags & LOOKUP_OPEN) { 1017 + switch (inode->i_mode & S_IFMT) { 1018 + case S_IFREG: 1019 + /* A NFSv4 OPEN will revalidate later */ 1020 + if (server->caps & NFS_CAP_ATOMIC_OPEN) 1021 + goto out; 1022 + /* Fallthrough */ 1023 + case S_IFDIR: 1024 + if (server->flags & NFS_MOUNT_NOCTO) 1025 + break; 1026 + /* NFS close-to-open cache consistency validation */ 1027 + goto out_force; 1028 + } 1029 + } 1030 + 1015 1031 /* VFS wants an on-the-wire revalidation */ 1016 1032 if (flags & LOOKUP_REVAL) 1017 - goto out_force; 1018 - /* This is an open(2) */ 1019 - if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) && 1020 - (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) 1021 1033 goto out_force; 1022 1034 out: 1023 1035 return (inode->i_nlink == 0) ? -ENOENT : 0; ··· 1051 1039 * 1052 1040 * If LOOKUP_RCU prevents us from performing a full check, return 1 1053 1041 * suggesting a reval is needed. 1042 + * 1043 + * Note that when creating a new file, or looking up a rename target, 1044 + * then it shouldn't be necessary to revalidate a negative dentry. 1054 1045 */ 1055 1046 static inline 1056 1047 int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, 1057 1048 unsigned int flags) 1058 1049 { 1059 - /* Don't revalidate a negative dentry if we're creating a new file */ 1060 - if (flags & LOOKUP_CREATE) 1050 + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) 1061 1051 return 0; 1062 1052 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) 1063 1053 return 1; ··· 1120 1106 goto out_set_verifier; 1121 1107 1122 1108 /* Force a full look up iff the parent directory has changed */ 1123 - if (!nfs_is_exclusive_create(dir, flags) && 1109 + if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) && 1124 1110 nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { 1125 1111 error = nfs_lookup_verify_inode(inode, flags); 1126 1112 if (error) { ··· 1284 1270 { 1285 1271 spin_lock(&inode->i_lock); 1286 1272 /* drop the inode if we're reasonably sure this is the last link */ 1287 - if (inode->i_nlink == 1) 1288 - clear_nlink(inode); 1273 + if (inode->i_nlink > 0) 1274 + drop_nlink(inode); 1275 + NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); 1289 1276 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE 1290 1277 | NFS_INO_INVALID_CTIME 1291 - | NFS_INO_INVALID_OTHER; 1278 + | NFS_INO_INVALID_OTHER 1279 + | NFS_INO_REVAL_FORCED; 1292 1280 spin_unlock(&inode->i_lock); 1293 1281 } 1294 1282 ··· 1351 1335 * If we're doing an exclusive create, optimize away the lookup 1352 1336 * but don't hash the dentry. 1353 1337 */ 1354 - if (nfs_is_exclusive_create(dir, flags)) 1338 + if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET) 1355 1339 return NULL; 1356 1340 1357 1341 res = ERR_PTR(-ENOMEM); ··· 1656 1640 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1657 1641 if (!(fattr->valid & NFS_ATTR_FATTR)) { 1658 1642 struct nfs_server *server = NFS_SB(dentry->d_sb); 1659 - error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr, NULL); 1643 + error = server->nfs_client->rpc_ops->getattr(server, fhandle, 1644 + fattr, NULL, NULL); 1660 1645 if (error < 0) 1661 1646 goto out_error; 1662 1647 } ··· 2053 2036 } else 2054 2037 error = task->tk_status; 2055 2038 rpc_put_task(task); 2056 - nfs_mark_for_revalidate(old_inode); 2039 + /* Ensure the inode attributes are revalidated */ 2040 + if (error == 0) { 2041 + spin_lock(&old_inode->i_lock); 2042 + NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter(); 2043 + NFS_I(old_inode)->cache_validity |= NFS_INO_INVALID_CHANGE 2044 + | NFS_INO_INVALID_CTIME 2045 + | NFS_INO_REVAL_FORCED; 2046 + spin_unlock(&old_inode->i_lock); 2047 + } 2057 2048 out: 2058 2049 if (rehash) 2059 2050 d_rehash(rehash);
+1 -1
fs/nfs/export.c
··· 102 102 } 103 103 104 104 rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops; 105 - ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label); 105 + ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL); 106 106 if (ret) { 107 107 dprintk("%s: getattr failed %d\n", __func__, ret); 108 108 dentry = ERR_PTR(ret);
+1
fs/nfs/flexfilelayout/flexfilelayout.c
··· 2347 2347 .id = LAYOUT_FLEX_FILES, 2348 2348 .name = "LAYOUT_FLEX_FILES", 2349 2349 .owner = THIS_MODULE, 2350 + .flags = PNFS_LAYOUTGET_ON_OPEN, 2350 2351 .set_layoutdriver = ff_layout_set_layoutdriver, 2351 2352 .alloc_layout_hdr = ff_layout_alloc_layout_hdr, 2352 2353 .free_layout_hdr = ff_layout_free_layout_hdr,
+87 -39
fs/nfs/inode.c
··· 195 195 static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) 196 196 { 197 197 struct nfs_inode *nfsi = NFS_I(inode); 198 - bool have_delegation = nfs_have_delegated_attributes(inode); 198 + bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); 199 199 200 - if (have_delegation) 201 - flags &= ~(NFS_INO_INVALID_CHANGE|NFS_INO_REVAL_PAGECACHE); 200 + if (have_delegation) { 201 + if (!(flags & NFS_INO_REVAL_FORCED)) 202 + flags &= ~NFS_INO_INVALID_OTHER; 203 + flags &= ~(NFS_INO_INVALID_CHANGE 204 + | NFS_INO_INVALID_SIZE 205 + | NFS_INO_REVAL_PAGECACHE); 206 + } 207 + 202 208 if (inode->i_mapping->nrpages == 0) 203 209 flags &= ~NFS_INO_INVALID_DATA; 204 210 nfsi->cache_validity |= flags; ··· 454 448 /* We can't support update_atime(), since the server will reset it */ 455 449 inode->i_flags |= S_NOATIME|S_NOCMTIME; 456 450 inode->i_mode = fattr->mode; 451 + nfsi->cache_validity = 0; 457 452 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 458 453 && nfs_server_capable(inode, NFS_CAP_MODE)) 459 454 nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); ··· 540 533 */ 541 534 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); 542 535 } 536 + 537 + if (nfsi->cache_validity != 0) 538 + nfsi->cache_validity |= NFS_INO_REVAL_FORCED; 543 539 544 540 nfs_setsecurity(inode, fattr, label); 545 541 ··· 677 667 678 668 spin_lock(&inode->i_lock); 679 669 NFS_I(inode)->attr_gencount = fattr->gencount; 680 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE 681 - | NFS_INO_INVALID_CTIME); 670 + if ((attr->ia_valid & ATTR_SIZE) != 0) { 671 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); 672 + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); 673 + nfs_vmtruncate(inode, attr->ia_size); 674 + } 682 675 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { 676 + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME; 683 677 if ((attr->ia_valid & ATTR_MODE) != 0) { 684 678 int mode = attr->ia_mode & S_IALLUGO; 685 679 mode |= inode->i_mode & ~S_IALLUGO; ··· 693 679 inode->i_uid = attr->ia_uid; 694 680 if ((attr->ia_valid & ATTR_GID) != 0) 695 681 inode->i_gid = attr->ia_gid; 682 + if (fattr->valid & NFS_ATTR_FATTR_CTIME) 683 + inode->i_ctime = fattr->ctime; 684 + else 685 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE 686 + | NFS_INO_INVALID_CTIME); 696 687 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS 697 688 | NFS_INO_INVALID_ACL); 698 689 } 699 - if ((attr->ia_valid & ATTR_SIZE) != 0) { 700 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); 701 - nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); 702 - nfs_vmtruncate(inode, attr->ia_size); 690 + if (attr->ia_valid & (ATTR_ATIME_SET|ATTR_ATIME)) { 691 + NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME 692 + | NFS_INO_INVALID_CTIME); 693 + if (fattr->valid & NFS_ATTR_FATTR_ATIME) 694 + inode->i_atime = fattr->atime; 695 + else if (attr->ia_valid & ATTR_ATIME_SET) 696 + inode->i_atime = attr->ia_atime; 697 + else 698 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); 699 + 700 + if (fattr->valid & NFS_ATTR_FATTR_CTIME) 701 + inode->i_ctime = fattr->ctime; 702 + else 703 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE 704 + | NFS_INO_INVALID_CTIME); 705 + } 706 + if (attr->ia_valid & (ATTR_MTIME_SET|ATTR_MTIME)) { 707 + NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME 708 + | NFS_INO_INVALID_CTIME); 709 + if (fattr->valid & NFS_ATTR_FATTR_MTIME) 710 + inode->i_mtime = fattr->mtime; 711 + else if (attr->ia_valid & ATTR_MTIME_SET) 712 + inode->i_mtime = attr->ia_mtime; 713 + else 714 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); 715 + 716 + if (fattr->valid & NFS_ATTR_FATTR_CTIME) 717 + inode->i_ctime = fattr->ctime; 718 + else 719 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE 720 + | NFS_INO_INVALID_CTIME); 703 721 } 704 722 if (fattr->valid) 705 723 nfs_update_inode(inode, fattr); ··· 1143 1097 goto out; 1144 1098 } 1145 1099 1146 - status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label); 1100 + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, 1101 + label, inode); 1147 1102 if (status != 0) { 1148 1103 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", 1149 1104 inode->i_sb->s_id, ··· 1396 1349 unsigned long invalid = 0; 1397 1350 1398 1351 1399 - if (nfs_have_delegated_attributes(inode)) 1352 + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) 1400 1353 return 0; 1354 + 1401 1355 /* Has the inode gone and changed behind our back? */ 1402 1356 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 1403 1357 return -ESTALE; ··· 1448 1400 invalid |= NFS_INO_INVALID_ATIME; 1449 1401 1450 1402 if (invalid != 0) 1451 - nfs_set_cache_invalid(inode, invalid | NFS_INO_REVAL_FORCED); 1403 + nfs_set_cache_invalid(inode, invalid); 1452 1404 1453 1405 nfsi->read_cache_jiffies = fattr->time_start; 1454 1406 return 0; ··· 1677 1629 nfs_fattr_set_barrier(fattr); 1678 1630 status = nfs_post_op_update_inode_locked(inode, fattr, 1679 1631 NFS_INO_INVALID_CHANGE 1680 - | NFS_INO_INVALID_CTIME); 1632 + | NFS_INO_INVALID_CTIME 1633 + | NFS_INO_REVAL_FORCED); 1681 1634 spin_unlock(&inode->i_lock); 1682 1635 1683 1636 return status; ··· 1795 1746 unsigned long save_cache_validity; 1796 1747 bool have_writers = nfs_file_has_buffered_writers(nfsi); 1797 1748 bool cache_revalidated = true; 1749 + bool attr_changed = false; 1750 + bool have_delegation; 1798 1751 1799 1752 dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", 1800 1753 __func__, inode->i_sb->s_id, inode->i_ino, ··· 1831 1780 !IS_AUTOMOUNT(inode)) 1832 1781 server->fsid = fattr->fsid; 1833 1782 1783 + /* Save the delegation state before clearing cache_validity */ 1784 + have_delegation = nfs_have_delegated_attributes(inode); 1785 + 1834 1786 /* 1835 1787 * Update the read time so we don't revalidate too often. 1836 1788 */ ··· 1856 1802 /* More cache consistency checks */ 1857 1803 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { 1858 1804 if (!inode_eq_iversion_raw(inode, fattr->change_attr)) { 1859 - dprintk("NFS: change_attr change on server for file %s/%ld\n", 1860 - inode->i_sb->s_id, inode->i_ino); 1861 1805 /* Could it be a race with writeback? */ 1862 - if (!have_writers) { 1863 - invalid |= NFS_INO_INVALID_CHANGE 1864 - | NFS_INO_INVALID_DATA 1806 + if (!(have_writers || have_delegation)) { 1807 + invalid |= NFS_INO_INVALID_DATA 1865 1808 | NFS_INO_INVALID_ACCESS 1866 1809 | NFS_INO_INVALID_ACL; 1867 1810 /* Force revalidate of all attributes */ ··· 1868 1817 | NFS_INO_INVALID_OTHER; 1869 1818 if (S_ISDIR(inode->i_mode)) 1870 1819 nfs_force_lookup_revalidate(inode); 1820 + dprintk("NFS: change_attr change on server for file %s/%ld\n", 1821 + inode->i_sb->s_id, 1822 + inode->i_ino); 1871 1823 } 1872 1824 inode_set_iversion_raw(inode, fattr->change_attr); 1825 + attr_changed = true; 1873 1826 } 1874 1827 } else { 1875 1828 nfsi->cache_validity |= save_cache_validity & ··· 1905 1850 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 1906 1851 new_isize = nfs_size_to_loff_t(fattr->size); 1907 1852 cur_isize = i_size_read(inode); 1908 - if (new_isize != cur_isize) { 1853 + if (new_isize != cur_isize && !have_delegation) { 1909 1854 /* Do we perhaps have any outstanding writes, or has 1910 1855 * the file grown beyond our last write? */ 1911 1856 if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { 1912 1857 i_size_write(inode, new_isize); 1913 1858 if (!have_writers) 1914 1859 invalid |= NFS_INO_INVALID_DATA; 1860 + attr_changed = true; 1915 1861 } 1916 1862 dprintk("NFS: isize change on server for file %s/%ld " 1917 1863 "(%Ld to %Ld)\n", ··· 1945 1889 newmode |= fattr->mode & S_IALLUGO; 1946 1890 inode->i_mode = newmode; 1947 1891 invalid |= NFS_INO_INVALID_ACCESS 1948 - | NFS_INO_INVALID_ACL 1949 - | NFS_INO_INVALID_OTHER; 1892 + | NFS_INO_INVALID_ACL; 1893 + attr_changed = true; 1950 1894 } 1951 1895 } else if (server->caps & NFS_CAP_MODE) { 1952 1896 nfsi->cache_validity |= save_cache_validity & 1953 - (NFS_INO_INVALID_ACCESS 1954 - | NFS_INO_INVALID_ACL 1955 - | NFS_INO_INVALID_OTHER 1897 + (NFS_INO_INVALID_OTHER 1956 1898 | NFS_INO_REVAL_FORCED); 1957 1899 cache_revalidated = false; 1958 1900 } ··· 1958 1904 if (fattr->valid & NFS_ATTR_FATTR_OWNER) { 1959 1905 if (!uid_eq(inode->i_uid, fattr->uid)) { 1960 1906 invalid |= NFS_INO_INVALID_ACCESS 1961 - | NFS_INO_INVALID_ACL 1962 - | NFS_INO_INVALID_OTHER; 1907 + | NFS_INO_INVALID_ACL; 1963 1908 inode->i_uid = fattr->uid; 1909 + attr_changed = true; 1964 1910 } 1965 1911 } else if (server->caps & NFS_CAP_OWNER) { 1966 1912 nfsi->cache_validity |= save_cache_validity & 1967 - (NFS_INO_INVALID_ACCESS 1968 - | NFS_INO_INVALID_ACL 1969 - | NFS_INO_INVALID_OTHER 1913 + (NFS_INO_INVALID_OTHER 1970 1914 | NFS_INO_REVAL_FORCED); 1971 1915 cache_revalidated = false; 1972 1916 } ··· 1972 1920 if (fattr->valid & NFS_ATTR_FATTR_GROUP) { 1973 1921 if (!gid_eq(inode->i_gid, fattr->gid)) { 1974 1922 invalid |= NFS_INO_INVALID_ACCESS 1975 - | NFS_INO_INVALID_ACL 1976 - | NFS_INO_INVALID_OTHER; 1923 + | NFS_INO_INVALID_ACL; 1977 1924 inode->i_gid = fattr->gid; 1925 + attr_changed = true; 1978 1926 } 1979 1927 } else if (server->caps & NFS_CAP_OWNER_GROUP) { 1980 1928 nfsi->cache_validity |= save_cache_validity & 1981 - (NFS_INO_INVALID_ACCESS 1982 - | NFS_INO_INVALID_ACL 1983 - | NFS_INO_INVALID_OTHER 1929 + (NFS_INO_INVALID_OTHER 1984 1930 | NFS_INO_REVAL_FORCED); 1985 1931 cache_revalidated = false; 1986 1932 } 1987 1933 1988 1934 if (fattr->valid & NFS_ATTR_FATTR_NLINK) { 1989 1935 if (inode->i_nlink != fattr->nlink) { 1990 - invalid |= NFS_INO_INVALID_OTHER; 1991 1936 if (S_ISDIR(inode->i_mode)) 1992 1937 invalid |= NFS_INO_INVALID_DATA; 1993 1938 set_nlink(inode, fattr->nlink); 1939 + attr_changed = true; 1994 1940 } 1995 1941 } else if (server->caps & NFS_CAP_NLINK) { 1996 1942 nfsi->cache_validity |= save_cache_validity & ··· 2008 1958 cache_revalidated = false; 2009 1959 2010 1960 /* Update attrtimeo value if we're out of the unstable period */ 2011 - if (invalid & NFS_INO_INVALID_ATTR) { 1961 + if (attr_changed) { 2012 1962 invalid &= ~NFS_INO_INVALID_ATTR; 2013 1963 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); 2014 1964 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); ··· 2034 1984 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) 2035 1985 || S_ISLNK(inode->i_mode))) 2036 1986 invalid &= ~NFS_INO_INVALID_DATA; 2037 - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || 2038 - (save_cache_validity & NFS_INO_REVAL_FORCED)) 2039 - nfs_set_cache_invalid(inode, invalid); 1987 + nfs_set_cache_invalid(inode, invalid); 2040 1988 2041 1989 return 0; 2042 1990 out_err:
+9 -4
fs/nfs/nfs3proc.c
··· 101 101 */ 102 102 static int 103 103 nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, 104 - struct nfs_fattr *fattr, struct nfs4_label *label) 104 + struct nfs_fattr *fattr, struct nfs4_label *label, 105 + struct inode *inode) 105 106 { 106 107 struct rpc_message msg = { 107 108 .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], ··· 415 414 } 416 415 417 416 static void 418 - nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) 417 + nfs3_proc_unlink_setup(struct rpc_message *msg, 418 + struct dentry *dentry, 419 + struct inode *inode) 419 420 { 420 421 msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; 421 422 } ··· 826 823 } 827 824 828 825 static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr, 829 - struct rpc_message *msg) 826 + struct rpc_message *msg, 827 + struct rpc_clnt **clnt) 830 828 { 831 829 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; 832 830 } ··· 848 844 return 0; 849 845 } 850 846 851 - static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) 847 + static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg, 848 + struct rpc_clnt **clnt) 852 849 { 853 850 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; 854 851 }
+5 -1
fs/nfs/nfs42proc.c
··· 370 370 switch (task->tk_status) { 371 371 case 0: 372 372 break; 373 + case -NFS4ERR_BADHANDLE: 374 + case -ESTALE: 375 + pnfs_destroy_layout(NFS_I(inode)); 376 + break; 373 377 case -NFS4ERR_EXPIRED: 374 378 case -NFS4ERR_ADMIN_REVOKED: 375 379 case -NFS4ERR_DELEG_REVOKED: ··· 466 462 nfs42_layoutstat_release(data); 467 463 return -EAGAIN; 468 464 } 469 - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); 465 + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0); 470 466 task = rpc_run_task(&task_setup); 471 467 if (IS_ERR(task)) 472 468 return PTR_ERR(task);
+26 -1
fs/nfs/nfs4_fs.h
··· 212 212 struct rpc_cred *); 213 213 }; 214 214 215 + struct nfs4_opendata { 216 + struct kref kref; 217 + struct nfs_openargs o_arg; 218 + struct nfs_openres o_res; 219 + struct nfs_open_confirmargs c_arg; 220 + struct nfs_open_confirmres c_res; 221 + struct nfs4_string owner_name; 222 + struct nfs4_string group_name; 223 + struct nfs4_label *a_label; 224 + struct nfs_fattr f_attr; 225 + struct nfs4_label *f_label; 226 + struct dentry *dir; 227 + struct dentry *dentry; 228 + struct nfs4_state_owner *owner; 229 + struct nfs4_state *state; 230 + struct iattr attrs; 231 + struct nfs4_layoutget *lgp; 232 + unsigned long timestamp; 233 + bool rpc_done; 234 + bool file_created; 235 + bool is_recover; 236 + bool cancelled; 237 + int rpc_status; 238 + }; 239 + 215 240 struct nfs4_add_xprt_data { 216 241 struct nfs_client *clp; 217 242 struct rpc_cred *cred; ··· 276 251 extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, 277 252 struct rpc_message *, struct nfs4_sequence_args *, 278 253 struct nfs4_sequence_res *, int); 279 - extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); 254 + extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int, int); 280 255 extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); 281 256 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); 282 257 extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
+3 -2
fs/nfs/nfs4idmap.c
··· 343 343 int id_len; 344 344 ssize_t ret; 345 345 346 - id_len = snprintf(id_str, sizeof(id_str), "%u", id); 346 + id_len = nfs_map_numeric_to_string(id, id_str, sizeof(id_str)); 347 347 ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap); 348 348 if (ret < 0) 349 349 return -EINVAL; ··· 627 627 if (strcmp(upcall->im_name, im->im_name) != 0) 628 628 break; 629 629 /* Note: here we store the NUL terminator too */ 630 - len = sprintf(id_str, "%d", im->im_id) + 1; 630 + len = 1 + nfs_map_numeric_to_string(im->im_id, id_str, 631 + sizeof(id_str)); 631 632 ret = nfs_idmap_instantiate(key, authkey, id_str, len); 632 633 break; 633 634 case IDMAP_CONV_IDTONAME:
+180 -211
fs/nfs/nfs4proc.c
··· 71 71 72 72 #define NFSDBG_FACILITY NFSDBG_PROC 73 73 74 + #define NFS4_BITMASK_SZ 3 75 + 74 76 #define NFS4_POLL_RETRY_MIN (HZ/10) 75 77 #define NFS4_POLL_RETRY_MAX (15*HZ) 76 78 ··· 88 86 | ATTR_MTIME_SET) 89 87 90 88 struct nfs4_opendata; 91 - static int _nfs4_proc_open(struct nfs4_opendata *data); 92 89 static int _nfs4_recover_proc_open(struct nfs4_opendata *data); 93 90 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 94 91 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); 95 - static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); 96 - static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); 92 + static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode); 93 + static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode); 97 94 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 98 95 struct nfs_fattr *fattr, struct iattr *sattr, 99 96 struct nfs_open_context *ctx, struct nfs4_label *ilabel, ··· 275 274 | FATTR4_WORD1_MOUNTED_ON_FILEID, 276 275 }; 277 276 277 + static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, 278 + struct inode *inode) 279 + { 280 + unsigned long cache_validity; 281 + 282 + memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst)); 283 + if (!inode || !nfs4_have_delegation(inode, FMODE_READ)) 284 + return; 285 + 286 + cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); 287 + if (!(cache_validity & NFS_INO_REVAL_FORCED)) 288 + cache_validity &= ~(NFS_INO_INVALID_CHANGE 289 + | NFS_INO_INVALID_SIZE); 290 + 291 + if (!(cache_validity & NFS_INO_INVALID_SIZE)) 292 + dst[0] &= ~FATTR4_WORD0_SIZE; 293 + 294 + if (!(cache_validity & NFS_INO_INVALID_CHANGE)) 295 + dst[0] &= ~FATTR4_WORD0_CHANGE; 296 + } 297 + 298 + static void nfs4_bitmap_copy_adjust_setattr(__u32 *dst, 299 + const __u32 *src, struct inode *inode) 300 + { 301 + nfs4_bitmap_copy_adjust(dst, src, inode); 302 + } 303 + 278 304 static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, 279 305 struct nfs4_readdir_arg *readdir) 280 306 { ··· 435 407 switch(errorcode) { 436 408 case 0: 437 409 return 0; 410 + case -NFS4ERR_BADHANDLE: 411 + case -ESTALE: 412 + if (inode != NULL && S_ISREG(inode->i_mode)) 413 + pnfs_destroy_layout(NFS_I(inode)); 414 + break; 438 415 case -NFS4ERR_DELEG_REVOKED: 439 416 case -NFS4ERR_ADMIN_REVOKED: 440 417 case -NFS4ERR_EXPIRED: ··· 641 608 }; 642 609 643 610 void nfs4_init_sequence(struct nfs4_sequence_args *args, 644 - struct nfs4_sequence_res *res, int cache_reply) 611 + struct nfs4_sequence_res *res, int cache_reply, 612 + int privileged) 645 613 { 646 614 args->sa_slot = NULL; 647 615 args->sa_cache_this = cache_reply; 648 - args->sa_privileged = 0; 616 + args->sa_privileged = privileged; 649 617 650 618 res->sr_slot = NULL; 651 - } 652 - 653 - static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) 654 - { 655 - args->sa_privileged = 1; 656 619 } 657 620 658 621 static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res) ··· 775 746 slot->slot_nr, 776 747 slot->seq_nr); 777 748 goto out_retry; 749 + case -NFS4ERR_RETRY_UNCACHED_REP: 750 + case -NFS4ERR_SEQ_FALSE_RETRY: 751 + /* 752 + * The server thinks we tried to replay a request. 753 + * Retry the call after bumping the sequence ID. 754 + */ 755 + goto retry_new_seq; 778 756 case -NFS4ERR_BADSLOT: 779 757 /* 780 758 * The slot id we used was probably retired. Try again 781 759 * using a different slot id. 782 760 */ 783 - if (slot->seq_nr < slot->table->target_highest_slotid) 761 + if (slot->slot_nr < slot->table->target_highest_slotid) 784 762 goto session_recover; 785 763 goto retry_nowait; 786 764 case -NFS4ERR_SEQ_MISORDERED: ··· 805 769 slot->seq_nr = 1; 806 770 goto retry_nowait; 807 771 } 808 - goto session_recover; 809 - case -NFS4ERR_SEQ_FALSE_RETRY: 810 - if (interrupted) 811 - goto retry_new_seq; 812 772 goto session_recover; 813 773 default: 814 774 /* Just update the slot sequence no. */ ··· 1067 1035 struct nfs4_sequence_res *res, 1068 1036 int cache_reply) 1069 1037 { 1070 - nfs4_init_sequence(args, res, cache_reply); 1038 + nfs4_init_sequence(args, res, cache_reply, 0); 1071 1039 return nfs4_call_sync_sequence(clnt, server, msg, args, res); 1072 1040 } 1073 1041 ··· 1095 1063 nfs_fscache_invalidate(dir); 1096 1064 spin_unlock(&dir->i_lock); 1097 1065 } 1098 - 1099 - struct nfs4_opendata { 1100 - struct kref kref; 1101 - struct nfs_openargs o_arg; 1102 - struct nfs_openres o_res; 1103 - struct nfs_open_confirmargs c_arg; 1104 - struct nfs_open_confirmres c_res; 1105 - struct nfs4_string owner_name; 1106 - struct nfs4_string group_name; 1107 - struct nfs4_label *a_label; 1108 - struct nfs_fattr f_attr; 1109 - struct nfs4_label *f_label; 1110 - struct dentry *dir; 1111 - struct dentry *dentry; 1112 - struct nfs4_state_owner *owner; 1113 - struct nfs4_state *state; 1114 - struct iattr attrs; 1115 - unsigned long timestamp; 1116 - bool rpc_done; 1117 - bool file_created; 1118 - bool is_recover; 1119 - bool cancelled; 1120 - int rpc_status; 1121 - }; 1122 1066 1123 1067 struct nfs4_open_createattrs { 1124 1068 struct nfs4_label *label; ··· 1276 1268 struct nfs4_opendata, kref); 1277 1269 struct super_block *sb = p->dentry->d_sb; 1278 1270 1271 + nfs4_lgopen_release(p->lgp); 1279 1272 nfs_free_seqid(p->o_arg.seqid); 1280 1273 nfs4_sequence_free_slot(&p->o_res.seq_res); 1281 1274 if (p->state != NULL) ··· 2196 2187 }; 2197 2188 int status; 2198 2189 2199 - nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1); 2190 + nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1, 2191 + data->is_recover); 2200 2192 kref_get(&data->kref); 2201 2193 data->rpc_done = false; 2202 2194 data->rpc_status = 0; 2203 2195 data->timestamp = jiffies; 2204 - if (data->is_recover) 2205 - nfs4_set_sequence_privileged(&data->c_arg.seq_args); 2206 2196 task = rpc_run_task(&task_setup_data); 2207 2197 if (IS_ERR(task)) 2208 2198 return PTR_ERR(task); ··· 2335 2327 .rpc_release = nfs4_open_release, 2336 2328 }; 2337 2329 2338 - static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) 2330 + static int nfs4_run_open_task(struct nfs4_opendata *data, 2331 + struct nfs_open_context *ctx) 2339 2332 { 2340 2333 struct inode *dir = d_inode(data->dir); 2341 2334 struct nfs_server *server = NFS_SERVER(dir); ··· 2359 2350 }; 2360 2351 int status; 2361 2352 2362 - nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); 2363 2353 kref_get(&data->kref); 2364 2354 data->rpc_done = false; 2365 2355 data->rpc_status = 0; 2366 2356 data->cancelled = false; 2367 2357 data->is_recover = false; 2368 - if (isrecover) { 2369 - nfs4_set_sequence_privileged(&o_arg->seq_args); 2358 + if (!ctx) { 2359 + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 1); 2370 2360 data->is_recover = true; 2361 + } else { 2362 + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 0); 2363 + pnfs_lgopen_prepare(data, ctx); 2371 2364 } 2372 2365 task = rpc_run_task(&task_setup_data); 2373 2366 if (IS_ERR(task)) ··· 2391 2380 struct nfs_openres *o_res = &data->o_res; 2392 2381 int status; 2393 2382 2394 - status = nfs4_run_open_task(data, 1); 2383 + status = nfs4_run_open_task(data, NULL); 2395 2384 if (status != 0 || !data->rpc_done) 2396 2385 return status; 2397 2386 ··· 2452 2441 /* 2453 2442 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata 2454 2443 */ 2455 - static int _nfs4_proc_open(struct nfs4_opendata *data) 2444 + static int _nfs4_proc_open(struct nfs4_opendata *data, 2445 + struct nfs_open_context *ctx) 2456 2446 { 2457 2447 struct inode *dir = d_inode(data->dir); 2458 2448 struct nfs_server *server = NFS_SERVER(dir); ··· 2461 2449 struct nfs_openres *o_res = &data->o_res; 2462 2450 int status; 2463 2451 2464 - status = nfs4_run_open_task(data, 0); 2452 + status = nfs4_run_open_task(data, ctx); 2465 2453 if (!data->rpc_done) 2466 2454 return status; 2467 2455 if (status != 0) { ··· 2492 2480 } 2493 2481 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { 2494 2482 nfs4_sequence_free_slot(&o_res->seq_res); 2495 - nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); 2483 + nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, 2484 + o_res->f_label, NULL); 2496 2485 } 2497 2486 return 0; 2498 2487 } ··· 2813 2800 2814 2801 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); 2815 2802 2816 - ret = _nfs4_proc_open(opendata); 2803 + ret = _nfs4_proc_open(opendata, ctx); 2817 2804 if (ret != 0) 2818 2805 goto out; 2819 2806 2820 - state = nfs4_opendata_to_nfs4_state(opendata); 2807 + state = _nfs4_opendata_to_nfs4_state(opendata); 2821 2808 ret = PTR_ERR(state); 2822 2809 if (IS_ERR(state)) 2823 2810 goto out; ··· 2851 2838 nfs_inode_attach_open_context(ctx); 2852 2839 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) 2853 2840 nfs4_schedule_stateid_recovery(server, state); 2841 + else 2842 + pnfs_parse_lgopen(state->inode, opendata->lgp, ctx); 2854 2843 } 2844 + 2855 2845 out: 2846 + nfs4_sequence_free_slot(&opendata->o_res.seq_res); 2856 2847 return ret; 2857 2848 } 2858 2849 ··· 3056 3039 }; 3057 3040 struct rpc_cred *delegation_cred = NULL; 3058 3041 unsigned long timestamp = jiffies; 3059 - fmode_t fmode; 3060 3042 bool truncate; 3061 3043 int status; 3062 3044 ··· 3063 3047 3064 3048 /* Servers should only apply open mode checks for file size changes */ 3065 3049 truncate = (arg->iap->ia_valid & ATTR_SIZE) ? true : false; 3066 - fmode = truncate ? FMODE_WRITE : FMODE_READ; 3050 + if (!truncate) 3051 + goto zero_stateid; 3067 3052 3068 - if (nfs4_copy_delegation_stateid(inode, fmode, &arg->stateid, &delegation_cred)) { 3053 + if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) { 3069 3054 /* Use that stateid */ 3070 - } else if (truncate && ctx != NULL) { 3055 + } else if (ctx != NULL) { 3071 3056 struct nfs_lock_context *l_ctx; 3072 3057 if (!nfs4_valid_open_stateid(ctx->state)) 3073 3058 return -EBADF; ··· 3080 3063 nfs_put_lock_context(l_ctx); 3081 3064 if (status == -EIO) 3082 3065 return -EBADF; 3083 - } else 3066 + } else { 3067 + zero_stateid: 3084 3068 nfs4_stateid_copy(&arg->stateid, &zero_stateid); 3069 + } 3085 3070 if (delegation_cred) 3086 3071 msg.rpc_cred = delegation_cred; 3087 3072 ··· 3102 3083 struct nfs4_label *olabel) 3103 3084 { 3104 3085 struct nfs_server *server = NFS_SERVER(inode); 3086 + __u32 bitmask[NFS4_BITMASK_SZ]; 3105 3087 struct nfs4_state *state = ctx ? ctx->state : NULL; 3106 3088 struct nfs_setattrargs arg = { 3107 3089 .fh = NFS_FH(inode), 3108 3090 .iap = sattr, 3109 3091 .server = server, 3110 - .bitmask = server->attr_bitmask, 3092 + .bitmask = bitmask, 3111 3093 .label = ilabel, 3112 3094 }; 3113 3095 struct nfs_setattrres res = { ··· 3123 3103 }; 3124 3104 int err; 3125 3105 3126 - arg.bitmask = nfs4_bitmask(server, ilabel); 3127 - if (ilabel) 3128 - arg.bitmask = nfs4_bitmask(server, olabel); 3129 - 3130 3106 do { 3107 + nfs4_bitmap_copy_adjust_setattr(bitmask, 3108 + nfs4_bitmask(server, olabel), 3109 + inode); 3110 + 3131 3111 err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx); 3132 3112 switch (err) { 3133 3113 case -NFS4ERR_OPENMODE: ··· 3413 3393 calldata = kzalloc(sizeof(*calldata), gfp_mask); 3414 3394 if (calldata == NULL) 3415 3395 goto out; 3416 - nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); 3396 + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1, 0); 3417 3397 calldata->inode = state->inode; 3418 3398 calldata->state = state; 3419 3399 calldata->arg.fh = NFS_FH(state->inode); ··· 3762 3742 if (IS_ERR(label)) 3763 3743 return PTR_ERR(label); 3764 3744 3765 - error = nfs4_proc_getattr(server, mntfh, fattr, label); 3745 + error = nfs4_proc_getattr(server, mntfh, fattr, label, NULL); 3766 3746 if (error < 0) { 3767 3747 dprintk("nfs4_get_root: getattr error = %d\n", -error); 3768 3748 goto err_free_label; ··· 3827 3807 } 3828 3808 3829 3809 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, 3830 - struct nfs_fattr *fattr, struct nfs4_label *label) 3810 + struct nfs_fattr *fattr, struct nfs4_label *label, 3811 + struct inode *inode) 3831 3812 { 3813 + __u32 bitmask[NFS4_BITMASK_SZ]; 3832 3814 struct nfs4_getattr_arg args = { 3833 3815 .fh = fhandle, 3834 - .bitmask = server->attr_bitmask, 3816 + .bitmask = bitmask, 3835 3817 }; 3836 3818 struct nfs4_getattr_res res = { 3837 3819 .fattr = fattr, ··· 3846 3824 .rpc_resp = &res, 3847 3825 }; 3848 3826 3849 - args.bitmask = nfs4_bitmask(server, label); 3827 + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); 3850 3828 3851 3829 nfs_fattr_init(fattr); 3852 3830 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); 3853 3831 } 3854 3832 3855 3833 static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, 3856 - struct nfs_fattr *fattr, struct nfs4_label *label) 3834 + struct nfs_fattr *fattr, struct nfs4_label *label, 3835 + struct inode *inode) 3857 3836 { 3858 3837 struct nfs4_exception exception = { }; 3859 3838 int err; 3860 3839 do { 3861 - err = _nfs4_proc_getattr(server, fhandle, fattr, label); 3840 + err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode); 3862 3841 trace_nfs4_getattr(server, fhandle, fattr, err); 3863 3842 err = nfs4_handle_exception(server, err, 3864 3843 &exception); ··· 4112 4089 }; 4113 4090 int status = 0; 4114 4091 4115 - if (!nfs_have_delegated_attributes(inode)) { 4092 + if (!nfs4_have_delegation(inode, FMODE_READ)) { 4116 4093 res.fattr = nfs_alloc_fattr(); 4117 4094 if (res.fattr == NULL) 4118 4095 return -ENOMEM; ··· 4288 4265 return err; 4289 4266 } 4290 4267 4291 - static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) 4268 + static void nfs4_proc_unlink_setup(struct rpc_message *msg, 4269 + struct dentry *dentry, 4270 + struct inode *inode) 4292 4271 { 4293 4272 struct nfs_removeargs *args = msg->rpc_argp; 4294 4273 struct nfs_removeres *res = msg->rpc_resp; 4295 - struct inode *inode = d_inode(dentry); 4296 4274 4297 4275 res->server = NFS_SB(dentry->d_sb); 4298 4276 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 4299 - nfs4_init_sequence(&args->seq_args, &res->seq_res, 1); 4277 + nfs4_init_sequence(&args->seq_args, &res->seq_res, 1, 0); 4300 4278 4301 4279 nfs_fattr_init(res->dir_attr); 4302 4280 ··· 4343 4319 nfs4_inode_return_delegation(new_inode); 4344 4320 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; 4345 4321 res->server = NFS_SB(old_dentry->d_sb); 4346 - nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1); 4322 + nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1, 0); 4347 4323 } 4348 4324 4349 4325 static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) ··· 4376 4352 static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name) 4377 4353 { 4378 4354 struct nfs_server *server = NFS_SERVER(inode); 4355 + __u32 bitmask[NFS4_BITMASK_SZ]; 4379 4356 struct nfs4_link_arg arg = { 4380 4357 .fh = NFS_FH(inode), 4381 4358 .dir_fh = NFS_FH(dir), 4382 4359 .name = name, 4383 - .bitmask = server->attr_bitmask, 4360 + .bitmask = bitmask, 4384 4361 }; 4385 4362 struct nfs4_link_res res = { 4386 4363 .server = server, ··· 4403 4378 status = PTR_ERR(res.label); 4404 4379 goto out; 4405 4380 } 4406 - arg.bitmask = nfs4_bitmask(server, res.label); 4407 4381 4408 4382 nfs4_inode_make_writeable(inode); 4383 + nfs4_bitmap_copy_adjust_setattr(bitmask, nfs4_bitmask(server, res.label), inode); 4409 4384 4410 4385 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 4411 4386 if (!status) { ··· 4920 4895 if (!hdr->pgio_done_cb) 4921 4896 hdr->pgio_done_cb = nfs4_read_done_cb; 4922 4897 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 4923 - nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0); 4898 + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); 4924 4899 } 4925 4900 4926 4901 static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, ··· 5004 4979 } 5005 4980 5006 4981 static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, 5007 - struct rpc_message *msg) 4982 + struct rpc_message *msg, 4983 + struct rpc_clnt **clnt) 5008 4984 { 5009 4985 struct nfs_server *server = NFS_SERVER(hdr->inode); 5010 4986 ··· 5021 4995 hdr->timestamp = jiffies; 5022 4996 5023 4997 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 5024 - nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1); 4998 + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1, 0); 4999 + nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr); 5025 5000 } 5026 5001 5027 5002 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) ··· 5053 5026 return data->commit_done_cb(task, data); 5054 5027 } 5055 5028 5056 - static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) 5029 + static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg, 5030 + struct rpc_clnt **clnt) 5057 5031 { 5058 5032 struct nfs_server *server = NFS_SERVER(data->inode); 5059 5033 ··· 5062 5034 data->commit_done_cb = nfs4_commit_done_cb; 5063 5035 data->res.server = server; 5064 5036 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 5065 - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 5037 + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); 5038 + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg); 5066 5039 } 5067 5040 5068 5041 struct nfs4_renewdata { ··· 5420 5391 */ 5421 5392 spin_lock(&inode->i_lock); 5422 5393 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE 5423 - | NFS_INO_INVALID_CTIME; 5394 + | NFS_INO_INVALID_CTIME 5395 + | NFS_INO_REVAL_FORCED; 5424 5396 spin_unlock(&inode->i_lock); 5425 5397 nfs_access_zap_cache(inode); 5426 5398 nfs_zap_acl_cache(inode); ··· 5621 5591 return 0; 5622 5592 5623 5593 rcu_read_lock(); 5624 - len = 14 + strlen(clp->cl_ipaddr) + 1 + 5625 - strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + 5594 + len = 14 + 5595 + strlen(clp->cl_rpcclient->cl_nodename) + 5626 5596 1 + 5627 - strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) + 5597 + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + 5628 5598 1; 5629 5599 rcu_read_unlock(); 5630 - 5600 + if (nfs4_client_id_uniquifier[0] != '\0') 5601 + len += strlen(nfs4_client_id_uniquifier) + 1; 5631 5602 if (len > NFS4_OPAQUE_LIMIT + 1) 5632 5603 return -EINVAL; 5633 5604 ··· 5642 5611 return -ENOMEM; 5643 5612 5644 5613 rcu_read_lock(); 5645 - scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", 5646 - clp->cl_ipaddr, 5647 - rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), 5648 - rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); 5614 + if (nfs4_client_id_uniquifier[0] != '\0') 5615 + scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s", 5616 + clp->cl_rpcclient->cl_nodename, 5617 + nfs4_client_id_uniquifier, 5618 + rpc_peeraddr2str(clp->cl_rpcclient, 5619 + RPC_DISPLAY_ADDR)); 5620 + else 5621 + scnprintf(str, len, "Linux NFSv4.0 %s/%s", 5622 + clp->cl_rpcclient->cl_nodename, 5623 + rpc_peeraddr2str(clp->cl_rpcclient, 5624 + RPC_DISPLAY_ADDR)); 5649 5625 rcu_read_unlock(); 5650 5626 5651 5627 clp->cl_owner_id = str; ··· 6010 5972 data = kzalloc(sizeof(*data), GFP_NOFS); 6011 5973 if (data == NULL) 6012 5974 return -ENOMEM; 6013 - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 5975 + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); 6014 5976 6015 5977 nfs4_state_protect(server->nfs_client, 6016 5978 NFS_SP4_MACH_CRED_CLEANUP, ··· 6285 6247 return ERR_PTR(-ENOMEM); 6286 6248 } 6287 6249 6288 - nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); 6250 + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1, 0); 6289 6251 msg.rpc_argp = &data->arg; 6290 6252 msg.rpc_resp = &data->res; 6291 6253 task_setup_data.callback_data = data; ··· 6449 6411 case 0: 6450 6412 renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), 6451 6413 data->timestamp); 6452 - if (data->arg.new_lock) { 6414 + if (data->arg.new_lock && !data->cancelled) { 6453 6415 data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); 6454 - if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) { 6455 - rpc_restart_call_prepare(task); 6416 + if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) 6456 6417 break; 6457 - } 6458 6418 } 6419 + 6459 6420 if (data->arg.new_lock_owner != 0) { 6460 6421 nfs_confirm_seqid(&lsp->ls_seqid, 0); 6461 6422 nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); 6462 6423 set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); 6463 - } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) 6464 - rpc_restart_call_prepare(task); 6424 + goto out_done; 6425 + } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid)) 6426 + goto out_done; 6427 + 6465 6428 break; 6466 6429 case -NFS4ERR_BAD_STATEID: 6467 6430 case -NFS4ERR_OLD_STATEID: 6468 6431 case -NFS4ERR_STALE_STATEID: 6469 6432 case -NFS4ERR_EXPIRED: 6470 6433 if (data->arg.new_lock_owner != 0) { 6471 - if (!nfs4_stateid_match(&data->arg.open_stateid, 6434 + if (nfs4_stateid_match(&data->arg.open_stateid, 6472 6435 &lsp->ls_state->open_stateid)) 6473 - rpc_restart_call_prepare(task); 6474 - } else if (!nfs4_stateid_match(&data->arg.lock_stateid, 6436 + goto out_done; 6437 + } else if (nfs4_stateid_match(&data->arg.lock_stateid, 6475 6438 &lsp->ls_stateid)) 6476 - rpc_restart_call_prepare(task); 6439 + goto out_done; 6477 6440 } 6441 + if (!data->cancelled) 6442 + rpc_restart_call_prepare(task); 6443 + out_done: 6478 6444 dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); 6479 6445 } 6480 6446 ··· 6551 6509 return -ENOMEM; 6552 6510 if (IS_SETLKW(cmd)) 6553 6511 data->arg.block = 1; 6554 - nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); 6512 + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1, 6513 + recovery_type > NFS_LOCK_NEW); 6555 6514 msg.rpc_argp = &data->arg; 6556 6515 msg.rpc_resp = &data->res; 6557 6516 task_setup_data.callback_data = data; 6558 6517 if (recovery_type > NFS_LOCK_NEW) { 6559 6518 if (recovery_type == NFS_LOCK_RECLAIM) 6560 6519 data->arg.reclaim = NFS_LOCK_RECLAIM; 6561 - nfs4_set_sequence_privileged(&data->arg.seq_args); 6562 6520 } else 6563 6521 data->arg.new_lock = 1; 6564 6522 task = rpc_run_task(&task_setup_data); ··· 6953 6911 6954 6912 msg.rpc_argp = &data->args; 6955 6913 msg.rpc_resp = &data->res; 6956 - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); 6914 + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0); 6957 6915 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); 6958 6916 } 6959 6917 ··· 7149 7107 locations->server = server; 7150 7108 locations->nlocations = 0; 7151 7109 7152 - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); 7153 - nfs4_set_sequence_privileged(&args.seq_args); 7110 + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); 7154 7111 status = nfs4_call_sync_sequence(clnt, server, &msg, 7155 7112 &args.seq_args, &res.seq_res); 7156 7113 if (status) ··· 7202 7161 locations->server = server; 7203 7162 locations->nlocations = 0; 7204 7163 7205 - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); 7206 - nfs4_set_sequence_privileged(&args.seq_args); 7164 + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); 7207 7165 status = nfs4_call_sync_sequence(clnt, server, &msg, 7208 7166 &args.seq_args, &res.seq_res); 7209 7167 if (status == NFS4_OK && ··· 7289 7249 if (res.fh == NULL) 7290 7250 return -ENOMEM; 7291 7251 7292 - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); 7293 - nfs4_set_sequence_privileged(&args.seq_args); 7252 + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); 7294 7253 status = nfs4_call_sync_sequence(clnt, server, &msg, 7295 7254 &args.seq_args, &res.seq_res); 7296 7255 nfs_free_fhandle(res.fh); ··· 7330 7291 if (res.fh == NULL) 7331 7292 return -ENOMEM; 7332 7293 7333 - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); 7334 - nfs4_set_sequence_privileged(&args.seq_args); 7294 + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); 7335 7295 status = nfs4_call_sync_sequence(clnt, server, &msg, 7336 7296 &args.seq_args, &res.seq_res); 7337 7297 nfs_free_fhandle(res.fh); ··· 8108 8070 }; 8109 8071 int status; 8110 8072 8111 - nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); 8112 - nfs4_set_sequence_privileged(&args.la_seq_args); 8073 + nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0, 1); 8113 8074 task = rpc_run_task(&task_setup); 8114 8075 8115 8076 if (IS_ERR(task)) ··· 8445 8408 calldata = kzalloc(sizeof(*calldata), GFP_NOFS); 8446 8409 if (calldata == NULL) 8447 8410 goto out_put_clp; 8448 - nfs4_init_sequence(&calldata->args, &calldata->res, 0); 8411 + nfs4_init_sequence(&calldata->args, &calldata->res, 0, is_privileged); 8449 8412 nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot); 8450 - if (is_privileged) 8451 - nfs4_set_sequence_privileged(&calldata->args); 8452 8413 msg.rpc_argp = &calldata->args; 8453 8414 msg.rpc_resp = &calldata->res; 8454 8415 calldata->clp = clp; ··· 8598 8563 calldata->clp = clp; 8599 8564 calldata->arg.one_fs = 0; 8600 8565 8601 - nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); 8602 - nfs4_set_sequence_privileged(&calldata->arg.seq_args); 8566 + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0, 1); 8603 8567 msg.rpc_argp = &calldata->arg; 8604 8568 msg.rpc_resp = &calldata->res; 8605 8569 task_setup_data.callback_data = calldata; ··· 8727 8693 return status; 8728 8694 } 8729 8695 8730 - static size_t max_response_pages(struct nfs_server *server) 8696 + size_t max_response_pages(struct nfs_server *server) 8731 8697 { 8732 8698 u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 8733 8699 return nfs_page_array_len(0, max_resp_sz); 8734 8700 } 8735 8701 8736 - static void nfs4_free_pages(struct page **pages, size_t size) 8737 - { 8738 - int i; 8739 - 8740 - if (!pages) 8741 - return; 8742 - 8743 - for (i = 0; i < size; i++) { 8744 - if (!pages[i]) 8745 - break; 8746 - __free_page(pages[i]); 8747 - } 8748 - kfree(pages); 8749 - } 8750 - 8751 - static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) 8752 - { 8753 - struct page **pages; 8754 - int i; 8755 - 8756 - pages = kcalloc(size, sizeof(struct page *), gfp_flags); 8757 - if (!pages) { 8758 - dprintk("%s: can't alloc array of %zu pages\n", __func__, size); 8759 - return NULL; 8760 - } 8761 - 8762 - for (i = 0; i < size; i++) { 8763 - pages[i] = alloc_page(gfp_flags); 8764 - if (!pages[i]) { 8765 - dprintk("%s: failed to allocate page\n", __func__); 8766 - nfs4_free_pages(pages, size); 8767 - return NULL; 8768 - } 8769 - } 8770 - 8771 - return pages; 8772 - } 8773 - 8774 8702 static void nfs4_layoutget_release(void *calldata) 8775 8703 { 8776 8704 struct nfs4_layoutget *lgp = calldata; 8777 - struct inode *inode = lgp->args.inode; 8778 - struct nfs_server *server = NFS_SERVER(inode); 8779 - size_t max_pages = max_response_pages(server); 8780 8705 8781 8706 dprintk("--> %s\n", __func__); 8782 8707 nfs4_sequence_free_slot(&lgp->res.seq_res); 8783 - nfs4_free_pages(lgp->args.layout.pages, max_pages); 8784 - pnfs_put_layout_hdr(NFS_I(inode)->layout); 8785 - put_nfs_open_context(lgp->args.ctx); 8786 - kfree(calldata); 8708 + pnfs_layoutget_free(lgp); 8787 8709 dprintk("<-- %s\n", __func__); 8788 8710 } 8789 8711 ··· 8750 8760 }; 8751 8761 8752 8762 struct pnfs_layout_segment * 8753 - nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags) 8763 + nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) 8754 8764 { 8755 8765 struct inode *inode = lgp->args.inode; 8756 8766 struct nfs_server *server = NFS_SERVER(inode); 8757 - size_t max_pages = max_response_pages(server); 8758 8767 struct rpc_task *task; 8759 8768 struct rpc_message msg = { 8760 8769 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], ··· 8780 8791 /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ 8781 8792 pnfs_get_layout_hdr(NFS_I(inode)->layout); 8782 8793 8783 - lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); 8784 - if (!lgp->args.layout.pages) { 8785 - nfs4_layoutget_release(lgp); 8786 - return ERR_PTR(-ENOMEM); 8787 - } 8788 - lgp->args.layout.pglen = max_pages * PAGE_SIZE; 8789 - 8790 - lgp->res.layoutp = &lgp->args.layout; 8791 - lgp->res.seq_res.sr_slot = NULL; 8792 - nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); 8794 + nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); 8793 8795 8794 8796 task = rpc_run_task(&task_setup_data); 8795 8797 if (IS_ERR(task)) ··· 8907 8927 } 8908 8928 task_setup_data.flags |= RPC_TASK_ASYNC; 8909 8929 } 8910 - nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); 8930 + nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, 0); 8911 8931 task = rpc_run_task(&task_setup_data); 8912 8932 if (IS_ERR(task)) 8913 8933 return PTR_ERR(task); ··· 9054 9074 } 9055 9075 task_setup_data.flags = RPC_TASK_ASYNC; 9056 9076 } 9057 - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 9077 + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); 9058 9078 task = rpc_run_task(&task_setup_data); 9059 9079 if (IS_ERR(task)) 9060 9080 return PTR_ERR(task); ··· 9234 9254 &rpc_client, &msg); 9235 9255 9236 9256 dprintk("NFS call test_stateid %p\n", stateid); 9237 - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); 9238 - nfs4_set_sequence_privileged(&args.seq_args); 9257 + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); 9239 9258 status = nfs4_call_sync_sequence(rpc_client, server, &msg, 9240 9259 &args.seq_args, &res.seq_res); 9241 9260 if (status != NFS_OK) { ··· 9326 9347 .rpc_release = nfs41_free_stateid_release, 9327 9348 }; 9328 9349 9329 - static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, 9350 + /** 9351 + * nfs41_free_stateid - perform a FREE_STATEID operation 9352 + * 9353 + * @server: server / transport on which to perform the operation 9354 + * @stateid: state ID to release 9355 + * @cred: credential 9356 + * @is_recovery: set to true if this call needs to be privileged 9357 + * 9358 + * Note: this function is always asynchronous. 9359 + */ 9360 + static int nfs41_free_stateid(struct nfs_server *server, 9330 9361 const nfs4_stateid *stateid, 9331 9362 struct rpc_cred *cred, 9332 9363 bool privileged) ··· 9352 9363 .flags = RPC_TASK_ASYNC, 9353 9364 }; 9354 9365 struct nfs_free_stateid_data *data; 9366 + struct rpc_task *task; 9355 9367 9356 9368 nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, 9357 9369 &task_setup.rpc_client, &msg); ··· 9360 9370 dprintk("NFS call free_stateid %p\n", stateid); 9361 9371 data = kmalloc(sizeof(*data), GFP_NOFS); 9362 9372 if (!data) 9363 - return ERR_PTR(-ENOMEM); 9373 + return -ENOMEM; 9364 9374 data->server = server; 9365 9375 nfs4_stateid_copy(&data->args.stateid, stateid); 9366 9376 ··· 9368 9378 9369 9379 msg.rpc_argp = &data->args; 9370 9380 msg.rpc_resp = &data->res; 9371 - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 9372 - if (privileged) 9373 - nfs4_set_sequence_privileged(&data->args.seq_args); 9374 - 9375 - return rpc_run_task(&task_setup); 9376 - } 9377 - 9378 - /** 9379 - * nfs41_free_stateid - perform a FREE_STATEID operation 9380 - * 9381 - * @server: server / transport on which to perform the operation 9382 - * @stateid: state ID to release 9383 - * @cred: credential 9384 - * @is_recovery: set to true if this call needs to be privileged 9385 - * 9386 - * Note: this function is always asynchronous. 9387 - */ 9388 - static int nfs41_free_stateid(struct nfs_server *server, 9389 - const nfs4_stateid *stateid, 9390 - struct rpc_cred *cred, 9391 - bool is_recovery) 9392 - { 9393 - struct rpc_task *task; 9394 - 9395 - task = _nfs41_free_stateid(server, stateid, cred, is_recovery); 9381 + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, privileged); 9382 + task = rpc_run_task(&task_setup); 9396 9383 if (IS_ERR(task)) 9397 9384 return PTR_ERR(task); 9398 9385 rpc_put_task(task); ··· 9506 9539 | NFS_CAP_ATOMIC_OPEN 9507 9540 | NFS_CAP_POSIX_LOCK 9508 9541 | NFS_CAP_STATEID_NFSV41 9509 - | NFS_CAP_ATOMIC_OPEN_V1, 9542 + | NFS_CAP_ATOMIC_OPEN_V1 9543 + | NFS_CAP_LGOPEN, 9510 9544 .init_client = nfs41_init_client, 9511 9545 .shutdown_client = nfs41_shutdown_client, 9512 9546 .match_stateid = nfs41_match_stateid, ··· 9532 9564 | NFS_CAP_POSIX_LOCK 9533 9565 | NFS_CAP_STATEID_NFSV41 9534 9566 | NFS_CAP_ATOMIC_OPEN_V1 9567 + | NFS_CAP_LGOPEN 9535 9568 | NFS_CAP_ALLOCATE 9536 9569 | NFS_CAP_COPY 9537 9570 | NFS_CAP_DEALLOCATE
+8
fs/nfs/nfs4state.c
··· 77 77 .type = NFS4_INVALID_STATEID_TYPE, 78 78 }; 79 79 80 + const nfs4_stateid current_stateid = { 81 + { 82 + /* Funky initialiser keeps older gcc versions happy */ 83 + .data = { 0x0, 0x0, 0x0, 0x1, 0 }, 84 + }, 85 + .type = NFS4_SPECIAL_STATEID_TYPE, 86 + }; 87 + 80 88 static DEFINE_MUTEX(nfs_clid_init_mutex); 81 89 82 90 int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
+56 -9
fs/nfs/nfs4xdr.c
··· 65 65 /* Mapping from NFS error code to "errno" error code. */ 66 66 #define errno_NFSERR_IO EIO 67 67 68 + struct compound_hdr; 68 69 static int nfs4_stat_to_errno(int); 70 + static void encode_layoutget(struct xdr_stream *xdr, 71 + const struct nfs4_layoutget_args *args, 72 + struct compound_hdr *hdr); 73 + static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, 74 + struct nfs4_layoutget_res *res); 69 75 70 76 /* NFSv4 COMPOUND tags are only wanted for debugging purposes */ 71 77 #ifdef DEBUG ··· 430 424 #define decode_sequence_maxsz 0 431 425 #define encode_layoutreturn_maxsz 0 432 426 #define decode_layoutreturn_maxsz 0 427 + #define encode_layoutget_maxsz 0 428 + #define decode_layoutget_maxsz 0 433 429 #endif /* CONFIG_NFS_V4_1 */ 434 430 435 431 #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ ··· 484 476 encode_open_maxsz + \ 485 477 encode_access_maxsz + \ 486 478 encode_getfh_maxsz + \ 487 - encode_getattr_maxsz) 479 + encode_getattr_maxsz + \ 480 + encode_layoutget_maxsz) 488 481 #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ 489 482 decode_sequence_maxsz + \ 490 483 decode_putfh_maxsz + \ 491 484 decode_open_maxsz + \ 492 485 decode_access_maxsz + \ 493 486 decode_getfh_maxsz + \ 494 - decode_getattr_maxsz) 487 + decode_getattr_maxsz + \ 488 + decode_layoutget_maxsz) 495 489 #define NFS4_enc_open_confirm_sz \ 496 490 (compound_encode_hdr_maxsz + \ 497 491 encode_putfh_maxsz + \ ··· 507 497 encode_putfh_maxsz + \ 508 498 encode_open_maxsz + \ 509 499 encode_access_maxsz + \ 510 - encode_getattr_maxsz) 500 + encode_getattr_maxsz + \ 501 + encode_layoutget_maxsz) 511 502 #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ 512 503 decode_sequence_maxsz + \ 513 504 decode_putfh_maxsz + \ 514 505 decode_open_maxsz + \ 515 506 decode_access_maxsz + \ 516 - decode_getattr_maxsz) 507 + decode_getattr_maxsz + \ 508 + decode_layoutget_maxsz) 517 509 #define NFS4_enc_open_downgrade_sz \ 518 510 (compound_encode_hdr_maxsz + \ 519 511 encode_sequence_maxsz + \ ··· 2082 2070 struct compound_hdr *hdr) 2083 2071 { 2084 2072 } 2073 + 2074 + static void 2075 + encode_layoutget(struct xdr_stream *xdr, 2076 + const struct nfs4_layoutget_args *args, 2077 + struct compound_hdr *hdr) 2078 + { 2079 + } 2085 2080 #endif /* CONFIG_NFS_V4_1 */ 2086 2081 2087 2082 /* ··· 2335 2316 if (args->access) 2336 2317 encode_access(xdr, args->access, &hdr); 2337 2318 encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); 2319 + if (args->lg_args) { 2320 + encode_layoutget(xdr, args->lg_args, &hdr); 2321 + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, 2322 + args->lg_args->layout.pages, 2323 + 0, args->lg_args->layout.pglen); 2324 + } 2338 2325 encode_nops(&hdr); 2339 2326 } 2340 2327 ··· 2381 2356 if (args->access) 2382 2357 encode_access(xdr, args->access, &hdr); 2383 2358 encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); 2359 + if (args->lg_args) { 2360 + encode_layoutget(xdr, args->lg_args, &hdr); 2361 + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, 2362 + args->lg_args->layout.pages, 2363 + 0, args->lg_args->layout.pglen); 2364 + } 2384 2365 encode_nops(&hdr); 2385 2366 } 2386 2367 ··· 6055 6024 6056 6025 status = decode_op_hdr(xdr, OP_LAYOUTGET); 6057 6026 if (status) 6058 - return status; 6027 + goto out; 6059 6028 p = xdr_inline_decode(xdr, 4); 6060 6029 if (unlikely(!p)) 6061 6030 goto out_overflow; ··· 6068 6037 if (!layout_count) { 6069 6038 dprintk("%s: server responded with empty layout array\n", 6070 6039 __func__); 6071 - return -EINVAL; 6040 + status = -EINVAL; 6041 + goto out; 6072 6042 } 6073 6043 6074 6044 p = xdr_inline_decode(xdr, 28); ··· 6094 6062 dprintk("NFS: server cheating in layoutget reply: " 6095 6063 "layout len %u > recvd %u\n", 6096 6064 res->layoutp->len, recvd); 6097 - return -EINVAL; 6065 + status = -EINVAL; 6066 + goto out; 6098 6067 } 6099 6068 6100 6069 if (layout_count > 1) { ··· 6108 6075 __func__, layout_count); 6109 6076 } 6110 6077 6111 - return 0; 6078 + out: 6079 + res->status = status; 6080 + return status; 6112 6081 out_overflow: 6113 6082 print_overflow_msg(__func__, xdr); 6114 - return -EIO; 6083 + status = -EIO; 6084 + goto out; 6115 6085 } 6116 6086 6117 6087 static int decode_layoutreturn(struct xdr_stream *xdr, ··· 6213 6177 { 6214 6178 return 0; 6215 6179 } 6180 + 6181 + static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, 6182 + struct nfs4_layoutget_res *res) 6183 + { 6184 + return 0; 6185 + } 6186 + 6216 6187 #endif /* CONFIG_NFS_V4_1 */ 6217 6188 6218 6189 /* ··· 6666 6623 if (res->access_request) 6667 6624 decode_access(xdr, &res->access_supported, &res->access_result); 6668 6625 decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server); 6626 + if (res->lg_res) 6627 + decode_layoutget(xdr, rqstp, res->lg_res); 6669 6628 out: 6670 6629 return status; 6671 6630 } ··· 6720 6675 if (res->access_request) 6721 6676 decode_access(xdr, &res->access_supported, &res->access_result); 6722 6677 decode_getfattr(xdr, res->f_attr, res->server); 6678 + if (res->lg_res) 6679 + decode_layoutget(xdr, rqstp, res->lg_res); 6723 6680 out: 6724 6681 return status; 6725 6682 }
+292 -43
fs/nfs/pnfs.c
··· 37 37 #include "nfs4trace.h" 38 38 #include "delegation.h" 39 39 #include "nfs42.h" 40 + #include "nfs4_fs.h" 40 41 41 42 #define NFSDBG_FACILITY NFSDBG_PNFS 42 43 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) ··· 916 915 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 917 916 } 918 917 919 - /* 920 - * Get layout from server. 921 - * for now, assume that whole file layouts are requested. 922 - * arg->offset: 0 923 - * arg->length: all ones 924 - */ 925 - static struct pnfs_layout_segment * 926 - send_layoutget(struct pnfs_layout_hdr *lo, 927 - struct nfs_open_context *ctx, 928 - nfs4_stateid *stateid, 929 - const struct pnfs_layout_range *range, 930 - long *timeout, gfp_t gfp_flags) 918 + static struct nfs_server * 919 + pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx) 931 920 { 932 - struct inode *ino = lo->plh_inode; 933 - struct nfs_server *server = NFS_SERVER(ino); 921 + struct nfs_server *server; 922 + 923 + if (inode) { 924 + server = NFS_SERVER(inode); 925 + } else { 926 + struct dentry *parent_dir = dget_parent(ctx->dentry); 927 + server = NFS_SERVER(parent_dir->d_inode); 928 + dput(parent_dir); 929 + } 930 + return server; 931 + } 932 + 933 + static void nfs4_free_pages(struct page **pages, size_t size) 934 + { 935 + int i; 936 + 937 + if (!pages) 938 + return; 939 + 940 + for (i = 0; i < size; i++) { 941 + if (!pages[i]) 942 + break; 943 + __free_page(pages[i]); 944 + } 945 + kfree(pages); 946 + } 947 + 948 + static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) 949 + { 950 + struct page **pages; 951 + int i; 952 + 953 + pages = kcalloc(size, sizeof(struct page *), gfp_flags); 954 + if (!pages) { 955 + dprintk("%s: can't alloc array of %zu pages\n", __func__, size); 956 + return NULL; 957 + } 958 + 959 + for (i = 0; i < size; i++) { 960 + pages[i] = alloc_page(gfp_flags); 961 + if (!pages[i]) { 962 + dprintk("%s: failed to allocate page\n", __func__); 963 + nfs4_free_pages(pages, size); 964 + return NULL; 965 + } 966 + } 967 + 968 + return pages; 969 + } 970 + 971 + static struct nfs4_layoutget * 972 + pnfs_alloc_init_layoutget_args(struct inode *ino, 973 + struct nfs_open_context *ctx, 974 + const nfs4_stateid *stateid, 975 + const struct pnfs_layout_range *range, 976 + gfp_t gfp_flags) 977 + { 978 + struct nfs_server *server = pnfs_find_server(ino, ctx); 979 + size_t max_pages = max_response_pages(server); 934 980 struct nfs4_layoutget *lgp; 935 - loff_t i_size; 936 981 937 982 dprintk("--> %s\n", __func__); 938 983 939 - /* 940 - * Synchronously retrieve layout information from server and 941 - * store in lseg. If we race with a concurrent seqid morphing 942 - * op, then re-send the LAYOUTGET. 943 - */ 944 984 lgp = kzalloc(sizeof(*lgp), gfp_flags); 945 985 if (lgp == NULL) 946 - return ERR_PTR(-ENOMEM); 986 + return NULL; 947 987 948 - i_size = i_size_read(ino); 988 + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); 989 + if (!lgp->args.layout.pages) { 990 + kfree(lgp); 991 + return NULL; 992 + } 993 + lgp->args.layout.pglen = max_pages * PAGE_SIZE; 994 + lgp->res.layoutp = &lgp->args.layout; 995 + 996 + /* Don't confuse uninitialised result and success */ 997 + lgp->res.status = -NFS4ERR_DELAY; 949 998 950 999 lgp->args.minlength = PAGE_SIZE; 951 1000 if (lgp->args.minlength > range->length) 952 1001 lgp->args.minlength = range->length; 953 - if (range->iomode == IOMODE_READ) { 954 - if (range->offset >= i_size) 955 - lgp->args.minlength = 0; 956 - else if (i_size - range->offset < lgp->args.minlength) 957 - lgp->args.minlength = i_size - range->offset; 1002 + if (ino) { 1003 + loff_t i_size = i_size_read(ino); 1004 + 1005 + if (range->iomode == IOMODE_READ) { 1006 + if (range->offset >= i_size) 1007 + lgp->args.minlength = 0; 1008 + else if (i_size - range->offset < lgp->args.minlength) 1009 + lgp->args.minlength = i_size - range->offset; 1010 + } 958 1011 } 959 1012 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 960 1013 pnfs_copy_range(&lgp->args.range, range); ··· 1017 962 lgp->args.ctx = get_nfs_open_context(ctx); 1018 963 nfs4_stateid_copy(&lgp->args.stateid, stateid); 1019 964 lgp->gfp_flags = gfp_flags; 1020 - lgp->cred = lo->plh_lc_cred; 965 + lgp->cred = get_rpccred(ctx->cred); 966 + lgp->callback_count = raw_seqcount_begin(&server->nfs_client->cl_callback_count); 967 + return lgp; 968 + } 1021 969 1022 - return nfs4_proc_layoutget(lgp, timeout, gfp_flags); 970 + void pnfs_layoutget_free(struct nfs4_layoutget *lgp) 971 + { 972 + size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE; 973 + 974 + nfs4_free_pages(lgp->args.layout.pages, max_pages); 975 + if (lgp->args.inode) 976 + pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout); 977 + put_rpccred(lgp->cred); 978 + put_nfs_open_context(lgp->args.ctx); 979 + kfree(lgp); 1023 980 } 1024 981 1025 982 static void pnfs_clear_layoutcommit(struct inode *inode, ··· 1211 1144 LIST_HEAD(tmp_list); 1212 1145 nfs4_stateid stateid; 1213 1146 int status = 0; 1214 - bool send; 1147 + bool send, valid_layout; 1215 1148 1216 1149 dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); 1217 1150 ··· 1232 1165 goto out_put_layout_hdr; 1233 1166 spin_lock(&ino->i_lock); 1234 1167 } 1168 + valid_layout = pnfs_layout_is_valid(lo); 1235 1169 pnfs_clear_layoutcommit(ino, &tmp_list); 1236 1170 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); 1237 1171 ··· 1246 1178 } 1247 1179 1248 1180 /* Don't send a LAYOUTRETURN if list was initially empty */ 1249 - if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { 1181 + if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) || 1182 + !valid_layout) { 1250 1183 spin_unlock(&ino->i_lock); 1251 1184 dprintk("NFS: %s no layout segments to return\n", __func__); 1252 1185 goto out_put_layout_hdr; ··· 1740 1671 wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); 1741 1672 } 1742 1673 1674 + static void _add_to_server_list(struct pnfs_layout_hdr *lo, 1675 + struct nfs_server *server) 1676 + { 1677 + if (list_empty(&lo->plh_layouts)) { 1678 + struct nfs_client *clp = server->nfs_client; 1679 + 1680 + /* The lo must be on the clp list if there is any 1681 + * chance of a CB_LAYOUTRECALL(FILE) coming in. 1682 + */ 1683 + spin_lock(&clp->cl_lock); 1684 + if (list_empty(&lo->plh_layouts)) 1685 + list_add_tail(&lo->plh_layouts, &server->layouts); 1686 + spin_unlock(&clp->cl_lock); 1687 + } 1688 + } 1689 + 1743 1690 /* 1744 1691 * Layout segment is retreived from the server if not cached. 1745 1692 * The appropriate layout segment is referenced and returned to the caller. ··· 1779 1694 struct nfs_client *clp = server->nfs_client; 1780 1695 struct pnfs_layout_hdr *lo = NULL; 1781 1696 struct pnfs_layout_segment *lseg = NULL; 1697 + struct nfs4_layoutget *lgp; 1782 1698 nfs4_stateid stateid; 1783 1699 long timeout = 0; 1784 1700 unsigned long giveup = jiffies + (clp->cl_lease_time << 1); ··· 1906 1820 atomic_inc(&lo->plh_outstanding); 1907 1821 spin_unlock(&ino->i_lock); 1908 1822 1909 - if (list_empty(&lo->plh_layouts)) { 1910 - /* The lo must be on the clp list if there is any 1911 - * chance of a CB_LAYOUTRECALL(FILE) coming in. 1912 - */ 1913 - spin_lock(&clp->cl_lock); 1914 - if (list_empty(&lo->plh_layouts)) 1915 - list_add_tail(&lo->plh_layouts, &server->layouts); 1916 - spin_unlock(&clp->cl_lock); 1917 - } 1823 + _add_to_server_list(lo, server); 1918 1824 1919 1825 pg_offset = arg.offset & ~PAGE_MASK; 1920 1826 if (pg_offset) { ··· 1916 1838 if (arg.length != NFS4_MAX_UINT64) 1917 1839 arg.length = PAGE_ALIGN(arg.length); 1918 1840 1919 - lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); 1841 + lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); 1842 + if (!lgp) { 1843 + trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, 1844 + PNFS_UPDATE_LAYOUT_NOMEM); 1845 + atomic_dec(&lo->plh_outstanding); 1846 + goto out_put_layout_hdr; 1847 + } 1848 + 1849 + lseg = nfs4_proc_layoutget(lgp, &timeout); 1920 1850 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1921 1851 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); 1922 1852 atomic_dec(&lo->plh_outstanding); ··· 2005 1919 return true; 2006 1920 } 2007 1921 1922 + static struct pnfs_layout_hdr * 1923 + _pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx) 1924 + { 1925 + struct pnfs_layout_hdr *lo; 1926 + 1927 + spin_lock(&ino->i_lock); 1928 + lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL); 1929 + if (!lo) 1930 + goto out_unlock; 1931 + if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) 1932 + goto out_unlock; 1933 + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 1934 + goto out_unlock; 1935 + if (pnfs_layoutgets_blocked(lo)) 1936 + goto out_unlock; 1937 + if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags)) 1938 + goto out_unlock; 1939 + atomic_inc(&lo->plh_outstanding); 1940 + spin_unlock(&ino->i_lock); 1941 + _add_to_server_list(lo, NFS_SERVER(ino)); 1942 + return lo; 1943 + 1944 + out_unlock: 1945 + spin_unlock(&ino->i_lock); 1946 + pnfs_put_layout_hdr(lo); 1947 + return NULL; 1948 + } 1949 + 1950 + extern const nfs4_stateid current_stateid; 1951 + 1952 + static void _lgopen_prepare_attached(struct nfs4_opendata *data, 1953 + struct nfs_open_context *ctx) 1954 + { 1955 + struct inode *ino = data->dentry->d_inode; 1956 + struct pnfs_layout_range rng = { 1957 + .iomode = (data->o_arg.fmode & FMODE_WRITE) ? 1958 + IOMODE_RW: IOMODE_READ, 1959 + .offset = 0, 1960 + .length = NFS4_MAX_UINT64, 1961 + }; 1962 + struct nfs4_layoutget *lgp; 1963 + struct pnfs_layout_hdr *lo; 1964 + 1965 + /* Heuristic: don't send layoutget if we have cached data */ 1966 + if (rng.iomode == IOMODE_READ && 1967 + (i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0)) 1968 + return; 1969 + 1970 + lo = _pnfs_grab_empty_layout(ino, ctx); 1971 + if (!lo) 1972 + return; 1973 + lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, 1974 + &rng, GFP_KERNEL); 1975 + if (!lgp) { 1976 + pnfs_clear_first_layoutget(lo); 1977 + pnfs_put_layout_hdr(lo); 1978 + return; 1979 + } 1980 + data->lgp = lgp; 1981 + data->o_arg.lg_args = &lgp->args; 1982 + data->o_res.lg_res = &lgp->res; 1983 + } 1984 + 1985 + static void _lgopen_prepare_floating(struct nfs4_opendata *data, 1986 + struct nfs_open_context *ctx) 1987 + { 1988 + struct pnfs_layout_range rng = { 1989 + .iomode = (data->o_arg.fmode & FMODE_WRITE) ? 1990 + IOMODE_RW: IOMODE_READ, 1991 + .offset = 0, 1992 + .length = NFS4_MAX_UINT64, 1993 + }; 1994 + struct nfs4_layoutget *lgp; 1995 + 1996 + lgp = pnfs_alloc_init_layoutget_args(NULL, ctx, &current_stateid, 1997 + &rng, GFP_KERNEL); 1998 + if (!lgp) 1999 + return; 2000 + data->lgp = lgp; 2001 + data->o_arg.lg_args = &lgp->args; 2002 + data->o_res.lg_res = &lgp->res; 2003 + } 2004 + 2005 + void pnfs_lgopen_prepare(struct nfs4_opendata *data, 2006 + struct nfs_open_context *ctx) 2007 + { 2008 + struct nfs_server *server = NFS_SERVER(data->dir->d_inode); 2009 + 2010 + if (!(pnfs_enabled_sb(server) && 2011 + server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN)) 2012 + return; 2013 + /* Could check on max_ops, but currently hardcoded high enough */ 2014 + if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN)) 2015 + return; 2016 + if (data->state) 2017 + _lgopen_prepare_attached(data, ctx); 2018 + else 2019 + _lgopen_prepare_floating(data, ctx); 2020 + } 2021 + 2022 + void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, 2023 + struct nfs_open_context *ctx) 2024 + { 2025 + struct pnfs_layout_hdr *lo; 2026 + struct pnfs_layout_segment *lseg; 2027 + struct nfs_server *srv = NFS_SERVER(ino); 2028 + u32 iomode; 2029 + 2030 + if (!lgp) 2031 + return; 2032 + dprintk("%s: entered with status %i\n", __func__, lgp->res.status); 2033 + if (lgp->res.status) { 2034 + switch (lgp->res.status) { 2035 + default: 2036 + break; 2037 + /* 2038 + * Halt lgopen attempts if the server doesn't recognise 2039 + * the "current stateid" value, the layout type, or the 2040 + * layoutget operation as being valid. 2041 + * Also if it complains about too many ops in the compound 2042 + * or of the request/reply being too big. 2043 + */ 2044 + case -NFS4ERR_BAD_STATEID: 2045 + case -NFS4ERR_NOTSUPP: 2046 + case -NFS4ERR_REP_TOO_BIG: 2047 + case -NFS4ERR_REP_TOO_BIG_TO_CACHE: 2048 + case -NFS4ERR_REQ_TOO_BIG: 2049 + case -NFS4ERR_TOO_MANY_OPS: 2050 + case -NFS4ERR_UNKNOWN_LAYOUTTYPE: 2051 + srv->caps &= ~NFS_CAP_LGOPEN; 2052 + } 2053 + return; 2054 + } 2055 + if (!lgp->args.inode) { 2056 + lo = _pnfs_grab_empty_layout(ino, ctx); 2057 + if (!lo) 2058 + return; 2059 + lgp->args.inode = ino; 2060 + } else 2061 + lo = NFS_I(lgp->args.inode)->layout; 2062 + 2063 + if (read_seqcount_retry(&srv->nfs_client->cl_callback_count, 2064 + lgp->callback_count)) 2065 + return; 2066 + lseg = pnfs_layout_process(lgp); 2067 + if (!IS_ERR(lseg)) { 2068 + iomode = lgp->args.range.iomode; 2069 + pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 2070 + pnfs_put_lseg(lseg); 2071 + } 2072 + } 2073 + 2074 + void nfs4_lgopen_release(struct nfs4_layoutget *lgp) 2075 + { 2076 + if (lgp != NULL) { 2077 + struct inode *inode = lgp->args.inode; 2078 + if (inode) { 2079 + struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; 2080 + atomic_dec(&lo->plh_outstanding); 2081 + pnfs_clear_first_layoutget(lo); 2082 + } 2083 + pnfs_layoutget_free(lgp); 2084 + } 2085 + } 2086 + 2008 2087 struct pnfs_layout_segment * 2009 2088 pnfs_layout_process(struct nfs4_layoutget *lgp) 2010 2089 { ··· 2235 1984 spin_unlock(&ino->i_lock); 2236 1985 lseg->pls_layout = lo; 2237 1986 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 2238 - if (!pnfs_layout_is_valid(lo)) 2239 - nfs_commit_inode(ino, 0); 2240 1987 return ERR_PTR(-EAGAIN); 2241 1988 } 2242 1989
+27 -1
fs/nfs/pnfs.h
··· 35 35 #include <linux/nfs_page.h> 36 36 #include <linux/workqueue.h> 37 37 38 + struct nfs4_opendata; 39 + 38 40 enum { 39 41 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 40 42 NFS_LSEG_ROC, /* roc bit received from server */ ··· 112 110 PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, 113 111 PNFS_LAYOUTRET_ON_ERROR = 1 << 1, 114 112 PNFS_READ_WHOLE_PAGE = 1 << 2, 113 + PNFS_LAYOUTGET_ON_OPEN = 1 << 3, 115 114 }; 116 115 117 116 struct nfs4_deviceid_node; ··· 226 223 extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); 227 224 228 225 /* nfs4proc.c */ 226 + extern size_t max_response_pages(struct nfs_server *server); 229 227 extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, 230 228 struct pnfs_device *dev, 231 229 struct rpc_cred *cred); 232 - extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags); 230 + extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout); 233 231 extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); 234 232 235 233 /* pnfs.c */ ··· 250 246 struct nfs_page *prev, struct nfs_page *req); 251 247 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); 252 248 struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); 249 + void pnfs_layoutget_free(struct nfs4_layoutget *lgp); 253 250 void pnfs_free_lseg_list(struct list_head *tmp_list); 254 251 void pnfs_destroy_layout(struct nfs_inode *); 255 252 void pnfs_destroy_all_layouts(struct nfs_client *); ··· 380 375 struct pnfs_layout_segment *lseg, 381 376 struct nfs_commit_info *cinfo, 382 377 u32 ds_commit_idx); 378 + void pnfs_lgopen_prepare(struct nfs4_opendata *data, 379 + struct nfs_open_context *ctx); 380 + void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, 381 + struct nfs_open_context *ctx); 382 + void nfs4_lgopen_release(struct nfs4_layoutget *lgp); 383 383 384 384 static inline bool nfs_have_layout(struct inode *inode) 385 385 { ··· 785 775 { 786 776 return false; 787 777 } 778 + 779 + static inline void pnfs_lgopen_prepare(struct nfs4_opendata *data, 780 + struct nfs_open_context *ctx) 781 + { 782 + } 783 + 784 + static inline void pnfs_parse_lgopen(struct inode *ino, 785 + struct nfs4_layoutget *lgp, 786 + struct nfs_open_context *ctx) 787 + { 788 + } 789 + 790 + static inline void nfs4_lgopen_release(struct nfs4_layoutget *lgp) 791 + { 792 + } 793 + 788 794 #endif /* CONFIG_NFS_V4_1 */ 789 795 790 796 #if IS_ENABLED(CONFIG_NFS_V4_2)
+9 -4
fs/nfs/proc.c
··· 99 99 */ 100 100 static int 101 101 nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, 102 - struct nfs_fattr *fattr, struct nfs4_label *label) 102 + struct nfs_fattr *fattr, struct nfs4_label *label, 103 + struct inode *inode) 103 104 { 104 105 struct rpc_message msg = { 105 106 .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], ··· 322 321 } 323 322 324 323 static void 325 - nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) 324 + nfs_proc_unlink_setup(struct rpc_message *msg, 325 + struct dentry *dentry, 326 + struct inode *inode) 326 327 { 327 328 msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; 328 329 } ··· 621 618 } 622 619 623 620 static void nfs_proc_write_setup(struct nfs_pgio_header *hdr, 624 - struct rpc_message *msg) 621 + struct rpc_message *msg, 622 + struct rpc_clnt **clnt) 625 623 { 626 624 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ 627 625 hdr->args.stable = NFS_FILE_SYNC; ··· 635 631 } 636 632 637 633 static void 638 - nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) 634 + nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg, 635 + struct rpc_clnt **clnt) 639 636 { 640 637 BUG(); 641 638 }
+15 -5
fs/nfs/unlink.c
··· 85 85 .rpc_call_prepare = nfs_unlink_prepare, 86 86 }; 87 87 88 - static void nfs_do_call_unlink(struct nfs_unlinkdata *data) 88 + static void nfs_do_call_unlink(struct inode *inode, struct nfs_unlinkdata *data) 89 89 { 90 90 struct rpc_message msg = { 91 91 .rpc_argp = &data->args, ··· 105 105 data->args.fh = NFS_FH(dir); 106 106 nfs_fattr_init(data->res.dir_attr); 107 107 108 - NFS_PROTO(dir)->unlink_setup(&msg, data->dentry); 108 + NFS_PROTO(dir)->unlink_setup(&msg, data->dentry, inode); 109 109 110 110 task_setup_data.rpc_client = NFS_CLIENT(dir); 111 111 task = rpc_run_task(&task_setup_data); ··· 113 113 rpc_put_task_async(task); 114 114 } 115 115 116 - static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) 116 + static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nfs_unlinkdata *data) 117 117 { 118 118 struct inode *dir = d_inode(dentry->d_parent); 119 119 struct dentry *alias; ··· 153 153 return ret; 154 154 } 155 155 data->dentry = alias; 156 - nfs_do_call_unlink(data); 156 + nfs_do_call_unlink(inode, data); 157 157 return 1; 158 158 } 159 159 ··· 231 231 dentry->d_fsdata = NULL; 232 232 spin_unlock(&dentry->d_lock); 233 233 234 - if (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)) 234 + if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data)) 235 235 nfs_free_unlinkdata(data); 236 236 } 237 237 ··· 448 448 unsigned char silly[SILLYNAME_LEN + 1]; 449 449 unsigned long long fileid; 450 450 struct dentry *sdentry; 451 + struct inode *inode = d_inode(dentry); 451 452 struct rpc_task *task; 452 453 int error = -EBUSY; 453 454 ··· 486 485 goto out; 487 486 } while (d_inode(sdentry) != NULL); /* need negative lookup */ 488 487 488 + ihold(inode); 489 + 489 490 /* queue unlink first. Can't do this from rpc_release as it 490 491 * has to allocate memory 491 492 */ ··· 512 509 case 0: 513 510 /* The rename succeeded */ 514 511 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 512 + spin_lock(&inode->i_lock); 513 + NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); 514 + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE 515 + | NFS_INO_INVALID_CTIME 516 + | NFS_INO_REVAL_FORCED; 517 + spin_unlock(&inode->i_lock); 515 518 d_move(dentry, sdentry); 516 519 break; 517 520 case -ERESTARTSYS: ··· 528 519 } 529 520 rpc_put_task(task); 530 521 out_dput: 522 + iput(inode); 531 523 dput(sdentry); 532 524 out: 533 525 return error;
+2 -8
fs/nfs/write.c
··· 1375 1375 int priority = flush_task_priority(how); 1376 1376 1377 1377 task_setup_data->priority = priority; 1378 - rpc_ops->write_setup(hdr, msg); 1378 + rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client); 1379 1379 trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes, 1380 1380 hdr->args.stable); 1381 - 1382 - nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, 1383 - &task_setup_data->rpc_client, msg, hdr); 1384 1381 } 1385 1382 1386 1383 /* If a nfs_flush_* function fails, it should remove reqs from @head and ··· 1666 1669 .priority = priority, 1667 1670 }; 1668 1671 /* Set up the initial task struct. */ 1669 - nfs_ops->commit_setup(data, &msg); 1672 + nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client); 1670 1673 trace_nfs_initiate_commit(data); 1671 1674 1672 1675 dprintk("NFS: initiated commit call\n"); 1673 - 1674 - nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, 1675 - NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); 1676 1676 1677 1677 task = rpc_run_task(&task_setup_data); 1678 1678 if (IS_ERR(task))
+2
include/linux/nfs_fs_sb.h
··· 28 28 struct nfs_client { 29 29 refcount_t cl_count; 30 30 atomic_t cl_mds_count; 31 + seqcount_t cl_callback_count; 31 32 int cl_cons_state; /* current construction state (-ve: init error) */ 32 33 #define NFS_CS_READY 0 /* ready to be used */ 33 34 #define NFS_CS_INITING 1 /* busy initialising */ ··· 236 235 #define NFS_CAP_ACLS (1U << 3) 237 236 #define NFS_CAP_ATOMIC_OPEN (1U << 4) 238 237 /* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ 238 + #define NFS_CAP_LGOPEN (1U << 5) 239 239 #define NFS_CAP_FILEID (1U << 6) 240 240 #define NFS_CAP_MODE (1U << 7) 241 241 #define NFS_CAP_NLINK (1U << 8)
+11 -4
include/linux/nfs_xdr.h
··· 259 259 260 260 struct nfs4_layoutget_res { 261 261 struct nfs4_sequence_res seq_res; 262 + int status; 262 263 __u32 return_on_close; 263 264 struct pnfs_layout_range range; 264 265 __u32 type; ··· 271 270 struct nfs4_layoutget_args args; 272 271 struct nfs4_layoutget_res res; 273 272 struct rpc_cred *cred; 273 + unsigned callback_count; 274 274 gfp_t gfp_flags; 275 275 }; 276 276 ··· 437 435 enum createmode4 createmode; 438 436 const struct nfs4_label *label; 439 437 umode_t umask; 438 + struct nfs4_layoutget_args *lg_args; 440 439 }; 441 440 442 441 struct nfs_openres { ··· 460 457 __u32 access_request; 461 458 __u32 access_supported; 462 459 __u32 access_result; 460 + struct nfs4_layoutget_res *lg_res; 463 461 }; 464 462 465 463 /* ··· 1581 1577 struct dentry *(*try_mount) (int, const char *, struct nfs_mount_info *, 1582 1578 struct nfs_subversion *); 1583 1579 int (*getattr) (struct nfs_server *, struct nfs_fh *, 1584 - struct nfs_fattr *, struct nfs4_label *); 1580 + struct nfs_fattr *, struct nfs4_label *, 1581 + struct inode *); 1585 1582 int (*setattr) (struct dentry *, struct nfs_fattr *, 1586 1583 struct iattr *); 1587 1584 int (*lookup) (struct inode *, const struct qstr *, ··· 1596 1591 int (*create) (struct inode *, struct dentry *, 1597 1592 struct iattr *, int); 1598 1593 int (*remove) (struct inode *, struct dentry *); 1599 - void (*unlink_setup) (struct rpc_message *, struct dentry *); 1594 + void (*unlink_setup) (struct rpc_message *, struct dentry *, struct inode *); 1600 1595 void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); 1601 1596 int (*unlink_done) (struct rpc_task *, struct inode *); 1602 1597 void (*rename_setup) (struct rpc_message *msg, ··· 1625 1620 struct nfs_pgio_header *); 1626 1621 void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *); 1627 1622 int (*read_done)(struct rpc_task *, struct nfs_pgio_header *); 1628 - void (*write_setup)(struct nfs_pgio_header *, struct rpc_message *); 1623 + void (*write_setup)(struct nfs_pgio_header *, struct rpc_message *, 1624 + struct rpc_clnt **); 1629 1625 int (*write_done)(struct rpc_task *, struct nfs_pgio_header *); 1630 - void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); 1626 + void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *, 1627 + struct rpc_clnt **); 1631 1628 void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); 1632 1629 int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); 1633 1630 int (*lock)(struct file *, int, struct file_lock *);
+13
include/linux/rculist.h
··· 404 404 pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) 405 405 406 406 /** 407 + * list_for_each_entry_from_rcu - iterate over a list from current point 408 + * @pos: the type * to use as a loop cursor. 409 + * @head: the head for your list. 410 + * @member: the name of the list_node within the struct. 411 + * 412 + * Iterate over the tail of a list starting from a given position, 413 + * which must have been in the list when the RCU read lock was taken. 414 + */ 415 + #define list_for_each_entry_from_rcu(pos, head, member) \ 416 + for (; &(pos)->member != (head); \ 417 + pos = list_entry_rcu(pos->member.next, typeof(*(pos)), member)) 418 + 419 + /** 407 420 * hlist_del_rcu - deletes entry from hash list without re-initialization 408 421 * @n: the element to delete from the hash list. 409 422 *
+1
include/linux/sunrpc/rpc_rdma.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 1 2 /* 2 3 * Copyright (c) 2015-2017 Oracle. All rights reserved. 3 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+5 -1
include/linux/sunrpc/xprt.h
··· 84 84 void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ 85 85 struct list_head rq_list; 86 86 87 - void *rq_xprtdata; /* Per-xprt private data */ 88 87 void *rq_buffer; /* Call XDR encode buffer */ 89 88 size_t rq_callsize; 90 89 void *rq_rbuffer; /* Reply XDR decode buffer */ ··· 126 127 int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); 127 128 void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); 128 129 void (*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task); 130 + void (*free_slot)(struct rpc_xprt *xprt, 131 + struct rpc_rqst *req); 129 132 void (*rpcbind)(struct rpc_task *task); 130 133 void (*set_port)(struct rpc_xprt *xprt, unsigned short port); 131 134 void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); ··· 325 324 struct rpc_xprt *xprt_create_transport(struct xprt_create *args); 326 325 void xprt_connect(struct rpc_task *task); 327 326 void xprt_reserve(struct rpc_task *task); 327 + void xprt_request_init(struct rpc_task *task); 328 328 void xprt_retry_reserve(struct rpc_task *task); 329 329 int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); 330 330 int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); 331 331 void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); 332 + void xprt_free_slot(struct rpc_xprt *xprt, 333 + struct rpc_rqst *req); 332 334 void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); 333 335 bool xprt_prepare_transmit(struct rpc_task *task); 334 336 void xprt_transmit(struct rpc_task *task);
+1
include/linux/sunrpc/xprtrdma.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 1 2 /* 2 3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 3 4 *
+56 -26
include/trace/events/rpcrdma.h
··· 530 530 531 531 TRACE_EVENT(xprtrdma_post_recv, 532 532 TP_PROTO( 533 - const struct rpcrdma_rep *rep, 534 - int status 533 + const struct ib_cqe *cqe 535 534 ), 536 535 537 - TP_ARGS(rep, status), 536 + TP_ARGS(cqe), 538 537 539 538 TP_STRUCT__entry( 540 - __field(const void *, rep) 541 - __field(int, status) 539 + __field(const void *, cqe) 542 540 ), 543 541 544 542 TP_fast_assign( 545 - __entry->rep = rep; 546 - __entry->status = status; 543 + __entry->cqe = cqe; 547 544 ), 548 545 549 - TP_printk("rep=%p status=%d", 550 - __entry->rep, __entry->status 546 + TP_printk("cqe=%p", 547 + __entry->cqe 548 + ) 549 + ); 550 + 551 + TRACE_EVENT(xprtrdma_post_recvs, 552 + TP_PROTO( 553 + const struct rpcrdma_xprt *r_xprt, 554 + unsigned int count, 555 + int status 556 + ), 557 + 558 + TP_ARGS(r_xprt, count, status), 559 + 560 + TP_STRUCT__entry( 561 + __field(const void *, r_xprt) 562 + __field(unsigned int, count) 563 + __field(int, status) 564 + __field(int, posted) 565 + __string(addr, rpcrdma_addrstr(r_xprt)) 566 + __string(port, rpcrdma_portstr(r_xprt)) 567 + ), 568 + 569 + TP_fast_assign( 570 + __entry->r_xprt = r_xprt; 571 + __entry->count = count; 572 + __entry->status = status; 573 + __entry->posted = r_xprt->rx_buf.rb_posted_receives; 574 + __assign_str(addr, rpcrdma_addrstr(r_xprt)); 575 + __assign_str(port, rpcrdma_portstr(r_xprt)); 576 + ), 577 + 578 + TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)", 579 + __get_str(addr), __get_str(port), __entry->r_xprt, 580 + __entry->count, __entry->posted, __entry->status 551 581 ) 552 582 ); 553 583 ··· 616 586 617 587 TRACE_EVENT(xprtrdma_wc_receive, 618 588 TP_PROTO( 619 - const struct rpcrdma_rep *rep, 620 589 const struct ib_wc *wc 621 590 ), 622 591 623 - TP_ARGS(rep, wc), 592 + TP_ARGS(wc), 624 593 625 594 TP_STRUCT__entry( 626 - __field(const void *, rep) 627 - __field(unsigned int, byte_len) 595 + __field(const void *, cqe) 596 + __field(u32, byte_len) 628 597 __field(unsigned int, status) 629 - __field(unsigned int, vendor_err) 598 + __field(u32, vendor_err) 630 599 ), 631 600 632 601 TP_fast_assign( 633 - __entry->rep = rep; 634 - __entry->byte_len = wc->byte_len; 602 + __entry->cqe = wc->wr_cqe; 635 603 __entry->status = wc->status; 636 - __entry->vendor_err = __entry->status ? wc->vendor_err : 0; 604 + if (wc->status) { 605 + __entry->byte_len = 0; 606 + __entry->vendor_err = wc->vendor_err; 607 + } else { 608 + __entry->byte_len = wc->byte_len; 609 + __entry->vendor_err = 0; 610 + } 637 611 ), 638 612 639 - TP_printk("rep=%p, %u bytes: %s (%u/0x%x)", 640 - __entry->rep, __entry->byte_len, 613 + TP_printk("cqe=%p %u bytes: %s (%u/0x%x)", 614 + __entry->cqe, __entry->byte_len, 641 615 rdma_show_wc_status(__entry->status), 642 616 __entry->status, __entry->vendor_err 643 617 ) ··· 652 618 DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); 653 619 654 620 DEFINE_MR_EVENT(xprtrdma_localinv); 621 + DEFINE_MR_EVENT(xprtrdma_dma_map); 655 622 DEFINE_MR_EVENT(xprtrdma_dma_unmap); 656 623 DEFINE_MR_EVENT(xprtrdma_remoteinv); 657 624 DEFINE_MR_EVENT(xprtrdma_recover_mr); ··· 836 801 __field(unsigned int, task_id) 837 802 __field(unsigned int, client_id) 838 803 __field(const void *, req) 839 - __field(const void *, rep) 840 804 __field(size_t, callsize) 841 805 __field(size_t, rcvsize) 842 806 ), ··· 844 810 __entry->task_id = task->tk_pid; 845 811 __entry->client_id = task->tk_client->cl_clid; 846 812 __entry->req = req; 847 - __entry->rep = req ? req->rl_reply : NULL; 848 813 __entry->callsize = task->tk_rqstp->rq_callsize; 849 814 __entry->rcvsize = task->tk_rqstp->rq_rcvsize; 850 815 ), 851 816 852 - TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)", 817 + TP_printk("task:%u@%u req=%p (%zu, %zu)", 853 818 __entry->task_id, __entry->client_id, 854 - __entry->req, __entry->rep, 855 - __entry->callsize, __entry->rcvsize 819 + __entry->req, __entry->callsize, __entry->rcvsize 856 820 ) 857 821 ); 858 822 ··· 881 849 __entry->req, __entry->rep 882 850 ) 883 851 ); 884 - 885 - DEFINE_RXPRT_EVENT(xprtrdma_noreps); 886 852 887 853 /** 888 854 ** Callback events
+1
net/sunrpc/clnt.c
··· 1546 1546 task->tk_status = 0; 1547 1547 if (status >= 0) { 1548 1548 if (task->tk_rqstp) { 1549 + xprt_request_init(task); 1549 1550 task->tk_action = call_refresh; 1550 1551 return; 1551 1552 }
+10 -7
net/sunrpc/xprt.c
··· 66 66 * Local functions 67 67 */ 68 68 static void xprt_init(struct rpc_xprt *xprt, struct net *net); 69 - static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); 69 + static __be32 xprt_alloc_xid(struct rpc_xprt *xprt); 70 70 static void xprt_connect_status(struct rpc_task *task); 71 71 static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); 72 72 static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *); ··· 987 987 task->tk_status = -EAGAIN; 988 988 goto out_unlock; 989 989 } 990 + if (!bc_prealloc(req) && !req->rq_xmit_bytes_sent) 991 + req->rq_xid = xprt_alloc_xid(xprt); 990 992 ret = true; 991 993 out_unlock: 992 994 spin_unlock_bh(&xprt->transport_lock); ··· 1165 1163 out_init_req: 1166 1164 xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots, 1167 1165 xprt->num_reqs); 1166 + spin_unlock(&xprt->reserve_lock); 1167 + 1168 1168 task->tk_status = 0; 1169 1169 task->tk_rqstp = req; 1170 - xprt_request_init(task, xprt); 1171 - spin_unlock(&xprt->reserve_lock); 1172 1170 } 1173 1171 EXPORT_SYMBOL_GPL(xprt_alloc_slot); 1174 1172 ··· 1186 1184 } 1187 1185 EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot); 1188 1186 1189 - static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 1187 + void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 1190 1188 { 1191 1189 spin_lock(&xprt->reserve_lock); 1192 1190 if (!xprt_dynamic_free_slot(xprt, req)) { ··· 1196 1194 xprt_wake_up_backlog(xprt); 1197 1195 spin_unlock(&xprt->reserve_lock); 1198 1196 } 1197 + EXPORT_SYMBOL_GPL(xprt_free_slot); 1199 1198 1200 1199 static void xprt_free_all_slots(struct rpc_xprt *xprt) 1201 1200 { ··· 1306 1303 xprt->xid = prandom_u32(); 1307 1304 } 1308 1305 1309 - static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) 1306 + void xprt_request_init(struct rpc_task *task) 1310 1307 { 1308 + struct rpc_xprt *xprt = task->tk_xprt; 1311 1309 struct rpc_rqst *req = task->tk_rqstp; 1312 1310 1313 1311 INIT_LIST_HEAD(&req->rq_list); ··· 1316 1312 req->rq_task = task; 1317 1313 req->rq_xprt = xprt; 1318 1314 req->rq_buffer = NULL; 1319 - req->rq_xid = xprt_alloc_xid(xprt); 1320 1315 req->rq_connect_cookie = xprt->connect_cookie - 1; 1321 1316 req->rq_bytes_sent = 0; 1322 1317 req->rq_snd_buf.len = 0; ··· 1376 1373 1377 1374 dprintk("RPC: %5u release request %p\n", task->tk_pid, req); 1378 1375 if (likely(!bc_prealloc(req))) 1379 - xprt_free_slot(xprt, req); 1376 + xprt->ops->free_slot(xprt, req); 1380 1377 else 1381 1378 xprt_free_bc_request(req); 1382 1379 }
+38 -65
net/sunrpc/xprtrdma/backchannel.c
··· 31 31 spin_unlock(&buf->rb_reqslock); 32 32 33 33 rpcrdma_destroy_req(req); 34 - 35 - kfree(rqst); 36 34 } 37 35 38 - static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, 39 - struct rpc_rqst *rqst) 36 + static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, 37 + unsigned int count) 40 38 { 41 - struct rpcrdma_regbuf *rb; 42 - struct rpcrdma_req *req; 43 - size_t size; 39 + struct rpc_xprt *xprt = &r_xprt->rx_xprt; 40 + struct rpc_rqst *rqst; 41 + unsigned int i; 44 42 45 - req = rpcrdma_create_req(r_xprt); 46 - if (IS_ERR(req)) 47 - return PTR_ERR(req); 43 + for (i = 0; i < (count << 1); i++) { 44 + struct rpcrdma_regbuf *rb; 45 + struct rpcrdma_req *req; 46 + size_t size; 48 47 49 - size = r_xprt->rx_data.inline_rsize; 50 - rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); 51 - if (IS_ERR(rb)) 52 - goto out_fail; 53 - req->rl_sendbuf = rb; 54 - xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, 55 - min_t(size_t, size, PAGE_SIZE)); 56 - rpcrdma_set_xprtdata(rqst, req); 48 + req = rpcrdma_create_req(r_xprt); 49 + if (IS_ERR(req)) 50 + return PTR_ERR(req); 51 + rqst = &req->rl_slot; 52 + 53 + rqst->rq_xprt = xprt; 54 + INIT_LIST_HEAD(&rqst->rq_list); 55 + INIT_LIST_HEAD(&rqst->rq_bc_list); 56 + __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); 57 + spin_lock_bh(&xprt->bc_pa_lock); 58 + list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); 59 + spin_unlock_bh(&xprt->bc_pa_lock); 60 + 61 + size = r_xprt->rx_data.inline_rsize; 62 + rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); 63 + if (IS_ERR(rb)) 64 + goto out_fail; 65 + req->rl_sendbuf = rb; 66 + xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, 67 + min_t(size_t, size, PAGE_SIZE)); 68 + } 57 69 return 0; 58 70 59 71 out_fail: 60 72 rpcrdma_bc_free_rqst(r_xprt, rqst); 61 73 return -ENOMEM; 62 - } 63 - 64 - /* Allocate and add receive buffers to the rpcrdma_buffer's 65 - * existing list of rep's. These are released when the 66 - * transport is destroyed. 67 - */ 68 - static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, 69 - unsigned int count) 70 - { 71 - int rc = 0; 72 - 73 - while (count--) { 74 - rc = rpcrdma_create_rep(r_xprt); 75 - if (rc) 76 - break; 77 - } 78 - return rc; 79 74 } 80 75 81 76 /** ··· 83 88 int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) 84 89 { 85 90 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 86 - struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; 87 - struct rpc_rqst *rqst; 88 - unsigned int i; 89 91 int rc; 90 92 91 93 /* The backchannel reply path returns each rpc_rqst to the ··· 97 105 if (reqs > RPCRDMA_BACKWARD_WRS >> 1) 98 106 goto out_err; 99 107 100 - for (i = 0; i < (reqs << 1); i++) { 101 - rqst = kzalloc(sizeof(*rqst), GFP_KERNEL); 102 - if (!rqst) 103 - goto out_free; 104 - 105 - dprintk("RPC: %s: new rqst %p\n", __func__, rqst); 106 - 107 - rqst->rq_xprt = &r_xprt->rx_xprt; 108 - INIT_LIST_HEAD(&rqst->rq_list); 109 - INIT_LIST_HEAD(&rqst->rq_bc_list); 110 - __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); 111 - 112 - if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) 113 - goto out_free; 114 - 115 - spin_lock_bh(&xprt->bc_pa_lock); 116 - list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); 117 - spin_unlock_bh(&xprt->bc_pa_lock); 118 - } 119 - 120 - rc = rpcrdma_bc_setup_reps(r_xprt, reqs); 108 + rc = rpcrdma_bc_setup_reqs(r_xprt, reqs); 121 109 if (rc) 122 110 goto out_free; 123 111 124 - rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs); 125 - if (rc) 126 - goto out_free; 127 - 128 - buffer->rb_bc_srv_max_requests = reqs; 112 + r_xprt->rx_buf.rb_bc_srv_max_requests = reqs; 129 113 request_module("svcrdma"); 130 114 trace_xprtrdma_cb_setup(r_xprt, reqs); 131 115 return 0; ··· 205 237 if (rc < 0) 206 238 goto failed_marshal; 207 239 240 + rpcrdma_post_recvs(r_xprt, true); 208 241 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) 209 242 goto drop_connection; 210 243 return 0; ··· 246 277 */ 247 278 void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) 248 279 { 280 + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 249 281 struct rpc_xprt *xprt = rqst->rq_xprt; 250 282 251 283 dprintk("RPC: %s: freeing rqst %p (req %p)\n", 252 - __func__, rqst, rpcr_to_rdmar(rqst)); 284 + __func__, rqst, req); 285 + 286 + rpcrdma_recv_buffer_put(req->rl_reply); 287 + req->rl_reply = NULL; 253 288 254 289 spin_lock_bh(&xprt->bc_pa_lock); 255 290 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
+23
net/sunrpc/xprtrdma/fmr_ops.c
··· 159 159 fmr_op_release_mr(mr); 160 160 } 161 161 162 + /* On success, sets: 163 + * ep->rep_attr.cap.max_send_wr 164 + * ep->rep_attr.cap.max_recv_wr 165 + * cdata->max_requests 166 + * ia->ri_max_segs 167 + */ 162 168 static int 163 169 fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 164 170 struct rpcrdma_create_data_internal *cdata) 165 171 { 172 + int max_qp_wr; 173 + 174 + max_qp_wr = ia->ri_device->attrs.max_qp_wr; 175 + max_qp_wr -= RPCRDMA_BACKWARD_WRS; 176 + max_qp_wr -= 1; 177 + if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) 178 + return -ENOMEM; 179 + if (cdata->max_requests > max_qp_wr) 180 + cdata->max_requests = max_qp_wr; 181 + ep->rep_attr.cap.max_send_wr = cdata->max_requests; 182 + ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; 183 + ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ 184 + ep->rep_attr.cap.max_recv_wr = cdata->max_requests; 185 + ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; 186 + ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ 187 + 166 188 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / 167 189 RPCRDMA_MAX_FMR_SGES); 168 190 return 0; ··· 244 222 mr->mr_sg, i, mr->mr_dir); 245 223 if (!mr->mr_nents) 246 224 goto out_dmamap_err; 225 + trace_xprtrdma_dma_map(mr); 247 226 248 227 for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) 249 228 dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
+27 -4
net/sunrpc/xprtrdma/frwr_ops.c
··· 204 204 frwr_op_release_mr(mr); 205 205 } 206 206 207 + /* On success, sets: 208 + * ep->rep_attr.cap.max_send_wr 209 + * ep->rep_attr.cap.max_recv_wr 210 + * cdata->max_requests 211 + * ia->ri_max_segs 212 + * 213 + * And these FRWR-related fields: 214 + * ia->ri_max_frwr_depth 215 + * ia->ri_mrtype 216 + */ 207 217 static int 208 218 frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 209 219 struct rpcrdma_create_data_internal *cdata) 210 220 { 211 221 struct ib_device_attr *attrs = &ia->ri_device->attrs; 212 - int depth, delta; 222 + int max_qp_wr, depth, delta; 213 223 214 224 ia->ri_mrtype = IB_MR_TYPE_MEM_REG; 215 225 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) ··· 253 243 } while (delta > 0); 254 244 } 255 245 256 - ep->rep_attr.cap.max_send_wr *= depth; 257 - if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) { 258 - cdata->max_requests = attrs->max_qp_wr / depth; 246 + max_qp_wr = ia->ri_device->attrs.max_qp_wr; 247 + max_qp_wr -= RPCRDMA_BACKWARD_WRS; 248 + max_qp_wr -= 1; 249 + if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) 250 + return -ENOMEM; 251 + if (cdata->max_requests > max_qp_wr) 252 + cdata->max_requests = max_qp_wr; 253 + ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth; 254 + if (ep->rep_attr.cap.max_send_wr > max_qp_wr) { 255 + cdata->max_requests = max_qp_wr / depth; 259 256 if (!cdata->max_requests) 260 257 return -EINVAL; 261 258 ep->rep_attr.cap.max_send_wr = cdata->max_requests * 262 259 depth; 263 260 } 261 + ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; 262 + ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ 263 + ep->rep_attr.cap.max_recv_wr = cdata->max_requests; 264 + ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; 265 + ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ 264 266 265 267 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / 266 268 ia->ri_max_frwr_depth); ··· 417 395 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); 418 396 if (!mr->mr_nents) 419 397 goto out_dmamap_err; 398 + trace_xprtrdma_dma_map(mr); 420 399 421 400 ibmr = frwr->fr_mr; 422 401 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
+1
net/sunrpc/xprtrdma/module.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 1 2 /* 2 3 * Copyright (c) 2015, 2017 Oracle. All rights reserved. 3 4 */
+22 -44
net/sunrpc/xprtrdma/rpc_rdma.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 1 2 /* 2 3 * Copyright (c) 2014-2017 Oracle. All rights reserved. 3 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. ··· 57 56 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 58 57 # define RPCDBG_FACILITY RPCDBG_TRANS 59 58 #endif 60 - 61 - static const char transfertypes[][12] = { 62 - "inline", /* no chunks */ 63 - "read list", /* some argument via rdma read */ 64 - "*read list", /* entire request via rdma read */ 65 - "write list", /* some result via rdma write */ 66 - "reply chunk" /* entire reply via rdma write */ 67 - }; 68 59 69 60 /* Returns size of largest RPC-over-RDMA header in a Call message 70 61 * ··· 226 233 */ 227 234 *ppages = alloc_page(GFP_ATOMIC); 228 235 if (!*ppages) 229 - return -EAGAIN; 236 + return -ENOBUFS; 230 237 } 231 238 seg->mr_page = *ppages; 232 239 seg->mr_offset = (char *)page_base; ··· 361 368 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 362 369 false, &mr); 363 370 if (IS_ERR(seg)) 364 - goto out_maperr; 371 + return PTR_ERR(seg); 365 372 rpcrdma_mr_push(mr, &req->rl_registered); 366 373 367 374 if (encode_read_segment(xdr, mr, pos) < 0) ··· 373 380 } while (nsegs); 374 381 375 382 return 0; 376 - 377 - out_maperr: 378 - if (PTR_ERR(seg) == -EAGAIN) 379 - xprt_wait_for_buffer_space(rqst->rq_task, NULL); 380 - return PTR_ERR(seg); 381 383 } 382 384 383 385 /* Register and XDR encode the Write list. Supports encoding a list ··· 419 431 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 420 432 true, &mr); 421 433 if (IS_ERR(seg)) 422 - goto out_maperr; 434 + return PTR_ERR(seg); 423 435 rpcrdma_mr_push(mr, &req->rl_registered); 424 436 425 437 if (encode_rdma_segment(xdr, mr) < 0) ··· 436 448 *segcount = cpu_to_be32(nchunks); 437 449 438 450 return 0; 439 - 440 - out_maperr: 441 - if (PTR_ERR(seg) == -EAGAIN) 442 - xprt_wait_for_buffer_space(rqst->rq_task, NULL); 443 - return PTR_ERR(seg); 444 451 } 445 452 446 453 /* Register and XDR encode the Reply chunk. Supports encoding an array ··· 477 494 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 478 495 true, &mr); 479 496 if (IS_ERR(seg)) 480 - goto out_maperr; 497 + return PTR_ERR(seg); 481 498 rpcrdma_mr_push(mr, &req->rl_registered); 482 499 483 500 if (encode_rdma_segment(xdr, mr) < 0) ··· 494 511 *segcount = cpu_to_be32(nchunks); 495 512 496 513 return 0; 497 - 498 - out_maperr: 499 - if (PTR_ERR(seg) == -EAGAIN) 500 - xprt_wait_for_buffer_space(rqst->rq_task, NULL); 501 - return PTR_ERR(seg); 502 514 } 503 515 504 516 /** ··· 690 712 { 691 713 req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf); 692 714 if (!req->rl_sendctx) 693 - return -ENOBUFS; 715 + return -EAGAIN; 694 716 req->rl_sendctx->sc_wr.num_sge = 0; 695 717 req->rl_sendctx->sc_unmap_count = 0; 696 718 req->rl_sendctx->sc_req = req; ··· 864 886 return 0; 865 887 866 888 out_err: 867 - r_xprt->rx_stats.failed_marshal_count++; 889 + switch (ret) { 890 + case -EAGAIN: 891 + xprt_wait_for_buffer_space(rqst->rq_task, NULL); 892 + break; 893 + case -ENOBUFS: 894 + break; 895 + default: 896 + r_xprt->rx_stats.failed_marshal_count++; 897 + } 868 898 return ret; 869 899 } 870 900 ··· 1015 1029 1016 1030 out_short: 1017 1031 pr_warn("RPC/RDMA short backward direction call\n"); 1018 - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) 1019 - xprt_disconnect_done(&r_xprt->rx_xprt); 1020 1032 return true; 1021 1033 } 1022 1034 #else /* CONFIG_SUNRPC_BACKCHANNEL */ ··· 1320 1336 u32 credits; 1321 1337 __be32 *p; 1322 1338 1339 + --buf->rb_posted_receives; 1340 + 1323 1341 if (rep->rr_hdrbuf.head[0].iov_len == 0) 1324 1342 goto out_badstatus; 1325 1343 1344 + /* Fixed transport header fields */ 1326 1345 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, 1327 1346 rep->rr_hdrbuf.head[0].iov_base); 1328 - 1329 - /* Fixed transport header fields */ 1330 1347 p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p)); 1331 1348 if (unlikely(!p)) 1332 1349 goto out_shortreply; ··· 1366 1381 1367 1382 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); 1368 1383 1384 + rpcrdma_post_recvs(r_xprt, false); 1369 1385 queue_work(rpcrdma_receive_wq, &rep->rr_work); 1370 - return; 1371 - 1372 - out_badstatus: 1373 - rpcrdma_recv_buffer_put(rep); 1374 - if (r_xprt->rx_ep.rep_connected == 1) { 1375 - r_xprt->rx_ep.rep_connected = -EIO; 1376 - rpcrdma_conn_func(&r_xprt->rx_ep); 1377 - } 1378 1386 return; 1379 1387 1380 1388 out_badversion: ··· 1389 1411 * receive buffer before returning. 1390 1412 */ 1391 1413 repost: 1392 - r_xprt->rx_stats.bad_reply_count++; 1393 - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) 1394 - rpcrdma_recv_buffer_put(rep); 1414 + rpcrdma_post_recvs(r_xprt, false); 1415 + out_badstatus: 1416 + rpcrdma_recv_buffer_put(rep); 1395 1417 }
+1
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
··· 263 263 .reserve_xprt = xprt_reserve_xprt_cong, 264 264 .release_xprt = xprt_release_xprt_cong, 265 265 .alloc_slot = xprt_alloc_slot, 266 + .free_slot = xprt_free_slot, 266 267 .release_request = xprt_release_rqst_cong, 267 268 .buf_alloc = xprt_rdma_bc_allocate, 268 269 .buf_free = xprt_rdma_bc_free,
+47 -17
net/sunrpc/xprtrdma/transport.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 1 2 /* 2 3 * Copyright (c) 2014-2017 Oracle. All rights reserved. 3 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. ··· 335 334 return ERR_PTR(-EBADF); 336 335 } 337 336 338 - xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 339 - xprt_rdma_slot_table_entries, 340 - xprt_rdma_slot_table_entries); 337 + xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); 341 338 if (xprt == NULL) { 342 339 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", 343 340 __func__); ··· 367 368 xprt_set_bound(xprt); 368 369 xprt_rdma_format_addresses(xprt, sap); 369 370 370 - cdata.max_requests = xprt->max_reqs; 371 + cdata.max_requests = xprt_rdma_slot_table_entries; 371 372 372 373 cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ 373 374 cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ ··· 540 541 } 541 542 } 542 543 544 + /** 545 + * xprt_rdma_alloc_slot - allocate an rpc_rqst 546 + * @xprt: controlling RPC transport 547 + * @task: RPC task requesting a fresh rpc_rqst 548 + * 549 + * tk_status values: 550 + * %0 if task->tk_rqstp points to a fresh rpc_rqst 551 + * %-EAGAIN if no rpc_rqst is available; queued on backlog 552 + */ 553 + static void 554 + xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) 555 + { 556 + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 557 + struct rpcrdma_req *req; 558 + 559 + req = rpcrdma_buffer_get(&r_xprt->rx_buf); 560 + if (!req) 561 + goto out_sleep; 562 + task->tk_rqstp = &req->rl_slot; 563 + task->tk_status = 0; 564 + return; 565 + 566 + out_sleep: 567 + rpc_sleep_on(&xprt->backlog, task, NULL); 568 + task->tk_status = -EAGAIN; 569 + } 570 + 571 + /** 572 + * xprt_rdma_free_slot - release an rpc_rqst 573 + * @xprt: controlling RPC transport 574 + * @rqst: rpc_rqst to release 575 + * 576 + */ 577 + static void 578 + xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) 579 + { 580 + memset(rqst, 0, sizeof(*rqst)); 581 + rpcrdma_buffer_put(rpcr_to_rdmar(rqst)); 582 + rpc_wake_up_next(&xprt->backlog); 583 + } 584 + 543 585 static bool 544 586 rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 545 587 size_t size, gfp_t flags) ··· 651 611 { 652 612 struct rpc_rqst *rqst = task->tk_rqstp; 653 613 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 654 - struct rpcrdma_req *req; 614 + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 655 615 gfp_t flags; 656 - 657 - req = rpcrdma_buffer_get(&r_xprt->rx_buf); 658 - if (req == NULL) 659 - goto out_get; 660 616 661 617 flags = RPCRDMA_DEF_GFP; 662 618 if (RPC_IS_SWAPPER(task)) ··· 663 627 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) 664 628 goto out_fail; 665 629 666 - rpcrdma_set_xprtdata(rqst, req); 667 630 rqst->rq_buffer = req->rl_sendbuf->rg_base; 668 631 rqst->rq_rbuffer = req->rl_recvbuf->rg_base; 669 632 trace_xprtrdma_allocate(task, req); 670 633 return 0; 671 634 672 635 out_fail: 673 - rpcrdma_buffer_put(req); 674 - out_get: 675 636 trace_xprtrdma_allocate(task, NULL); 676 637 return -ENOMEM; 677 638 } ··· 689 656 if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) 690 657 rpcrdma_release_rqst(r_xprt, req); 691 658 trace_xprtrdma_rpc_done(task, req); 692 - rpcrdma_buffer_put(req); 693 659 } 694 660 695 661 /** ··· 725 693 rc = rpcrdma_marshal_req(r_xprt, rqst); 726 694 if (rc < 0) 727 695 goto failed_marshal; 728 - 729 - if (req->rl_reply == NULL) /* e.g. reconnection */ 730 - rpcrdma_recv_buffer_get(req); 731 696 732 697 /* Must suppress retransmit to maintain credits */ 733 698 if (rqst->rq_connect_cookie == xprt->connect_cookie) ··· 812 783 static const struct rpc_xprt_ops xprt_rdma_procs = { 813 784 .reserve_xprt = xprt_reserve_xprt_cong, 814 785 .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ 815 - .alloc_slot = xprt_alloc_slot, 786 + .alloc_slot = xprt_rdma_alloc_slot, 787 + .free_slot = xprt_rdma_free_slot, 816 788 .release_request = xprt_release_rqst_cong, /* ditto */ 817 789 .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ 818 790 .timer = xprt_rdma_timer,
+112 -179
net/sunrpc/xprtrdma/verbs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 1 2 /* 2 3 * Copyright (c) 2014-2017 Oracle. All rights reserved. 3 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. ··· 73 72 /* 74 73 * internal functions 75 74 */ 75 + static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); 76 76 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 77 77 static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); 78 + static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); 78 79 static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); 79 80 80 81 struct workqueue_struct *rpcrdma_receive_wq __read_mostly; ··· 163 160 rr_cqe); 164 161 165 162 /* WARNING: Only wr_id and status are reliable at this point */ 166 - trace_xprtrdma_wc_receive(rep, wc); 163 + trace_xprtrdma_wc_receive(wc); 167 164 if (wc->status != IB_WC_SUCCESS) 168 165 goto out_fail; 169 166 ··· 235 232 complete(&ia->ri_done); 236 233 break; 237 234 case RDMA_CM_EVENT_ADDR_ERROR: 238 - ia->ri_async_rc = -EHOSTUNREACH; 235 + ia->ri_async_rc = -EPROTO; 239 236 complete(&ia->ri_done); 240 237 break; 241 238 case RDMA_CM_EVENT_ROUTE_ERROR: ··· 266 263 connstate = -ENOTCONN; 267 264 goto connected; 268 265 case RDMA_CM_EVENT_UNREACHABLE: 269 - connstate = -ENETDOWN; 266 + connstate = -ENETUNREACH; 270 267 goto connected; 271 268 case RDMA_CM_EVENT_REJECTED: 272 269 dprintk("rpcrdma: connection to %s:%s rejected: %s\n", ··· 309 306 init_completion(&ia->ri_done); 310 307 init_completion(&ia->ri_remove_done); 311 308 312 - id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, 313 - IB_QPT_RC); 309 + id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall, 310 + xprt, RDMA_PS_TCP, IB_QPT_RC); 314 311 if (IS_ERR(id)) { 315 312 rc = PTR_ERR(id); 316 313 dprintk("RPC: %s: rdma_create_id() failed %i\n", ··· 504 501 struct rpcrdma_create_data_internal *cdata) 505 502 { 506 503 struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; 507 - unsigned int max_qp_wr, max_sge; 508 504 struct ib_cq *sendcq, *recvcq; 505 + unsigned int max_sge; 509 506 int rc; 510 507 511 508 max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, ··· 516 513 } 517 514 ia->ri_max_send_sges = max_sge; 518 515 519 - if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { 520 - dprintk("RPC: %s: insufficient wqe's available\n", 521 - __func__); 522 - return -ENOMEM; 523 - } 524 - max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1; 525 - 526 - /* check provider's send/recv wr limits */ 527 - if (cdata->max_requests > max_qp_wr) 528 - cdata->max_requests = max_qp_wr; 516 + rc = ia->ri_ops->ro_open(ia, ep, cdata); 517 + if (rc) 518 + return rc; 529 519 530 520 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; 531 521 ep->rep_attr.qp_context = ep; 532 522 ep->rep_attr.srq = NULL; 533 - ep->rep_attr.cap.max_send_wr = cdata->max_requests; 534 - ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; 535 - ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */ 536 - rc = ia->ri_ops->ro_open(ia, ep, cdata); 537 - if (rc) 538 - return rc; 539 - ep->rep_attr.cap.max_recv_wr = cdata->max_requests; 540 - ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; 541 - ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ 542 523 ep->rep_attr.cap.max_send_sge = max_sge; 543 524 ep->rep_attr.cap.max_recv_sge = 1; 544 525 ep->rep_attr.cap.max_inline_data = 0; ··· 729 742 { 730 743 struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, 731 744 rx_ia); 732 - unsigned int extras; 733 745 int rc; 734 746 735 747 retry: ··· 772 786 } 773 787 774 788 dprintk("RPC: %s: connected\n", __func__); 775 - extras = r_xprt->rx_buf.rb_bc_srv_max_requests; 776 - if (extras) 777 - rpcrdma_ep_post_extra_recv(r_xprt, extras); 789 + 790 + rpcrdma_post_recvs(r_xprt, true); 778 791 779 792 out: 780 793 if (rc) ··· 879 894 sc->sc_xprt = r_xprt; 880 895 buf->rb_sc_ctxs[i] = sc; 881 896 } 897 + buf->rb_flags = 0; 882 898 883 899 return 0; 884 900 ··· 937 951 * completions recently. This is a sign the Send Queue is 938 952 * backing up. Cause the caller to pause and try again. 939 953 */ 940 - dprintk("RPC: %s: empty sendctx queue\n", __func__); 954 + set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags); 941 955 r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); 942 956 r_xprt->rx_stats.empty_sendctx_q++; 943 957 return NULL; ··· 952 966 * 953 967 * The caller serializes calls to this function (per rpcrdma_buffer). 954 968 */ 955 - void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) 969 + static void 970 + rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) 956 971 { 957 972 struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; 958 973 unsigned long next_tail; ··· 972 985 973 986 /* Paired with READ_ONCE */ 974 987 smp_store_release(&buf->rb_sc_tail, next_tail); 988 + 989 + if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) { 990 + smp_mb__after_atomic(); 991 + xprt_write_space(&sc->sc_xprt->rx_xprt); 992 + } 975 993 } 976 994 977 995 static void ··· 1090 1098 return req; 1091 1099 } 1092 1100 1093 - /** 1094 - * rpcrdma_create_rep - Allocate an rpcrdma_rep object 1095 - * @r_xprt: controlling transport 1096 - * 1097 - * Returns 0 on success or a negative errno on failure. 1098 - */ 1099 - int 1100 - rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) 1101 + static int 1102 + rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp) 1101 1103 { 1102 1104 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; 1103 1105 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; ··· 1119 1133 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 1120 1134 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1121 1135 rep->rr_recv_wr.num_sge = 1; 1136 + rep->rr_temp = temp; 1122 1137 1123 1138 spin_lock(&buf->rb_lock); 1124 1139 list_add(&rep->rr_list, &buf->rb_recv_bufs); ··· 1171 1184 list_add(&req->rl_list, &buf->rb_send_bufs); 1172 1185 } 1173 1186 1187 + buf->rb_posted_receives = 0; 1174 1188 INIT_LIST_HEAD(&buf->rb_recv_bufs); 1175 - for (i = 0; i <= buf->rb_max_requests; i++) { 1176 - rc = rpcrdma_create_rep(r_xprt); 1177 - if (rc) 1178 - goto out; 1179 - } 1180 1189 1181 1190 rc = rpcrdma_sendctxs_create(r_xprt); 1182 1191 if (rc) ··· 1182 1199 out: 1183 1200 rpcrdma_buffer_destroy(buf); 1184 1201 return rc; 1185 - } 1186 - 1187 - static struct rpcrdma_req * 1188 - rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf) 1189 - { 1190 - struct rpcrdma_req *req; 1191 - 1192 - req = list_first_entry(&buf->rb_send_bufs, 1193 - struct rpcrdma_req, rl_list); 1194 - list_del_init(&req->rl_list); 1195 - return req; 1196 - } 1197 - 1198 - static struct rpcrdma_rep * 1199 - rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf) 1200 - { 1201 - struct rpcrdma_rep *rep; 1202 - 1203 - rep = list_first_entry(&buf->rb_recv_bufs, 1204 - struct rpcrdma_rep, rr_list); 1205 - list_del(&rep->rr_list); 1206 - return rep; 1207 1202 } 1208 1203 1209 1204 static void ··· 1242 1281 while (!list_empty(&buf->rb_recv_bufs)) { 1243 1282 struct rpcrdma_rep *rep; 1244 1283 1245 - rep = rpcrdma_buffer_get_rep_locked(buf); 1284 + rep = list_first_entry(&buf->rb_recv_bufs, 1285 + struct rpcrdma_rep, rr_list); 1286 + list_del(&rep->rr_list); 1246 1287 rpcrdma_destroy_rep(rep); 1247 1288 } 1248 - buf->rb_send_count = 0; 1249 1289 1250 1290 spin_lock(&buf->rb_reqslock); 1251 1291 while (!list_empty(&buf->rb_allreqs)) { ··· 1261 1299 spin_lock(&buf->rb_reqslock); 1262 1300 } 1263 1301 spin_unlock(&buf->rb_reqslock); 1264 - buf->rb_recv_count = 0; 1265 1302 1266 1303 rpcrdma_mrs_destroy(buf); 1267 1304 } ··· 1333 1372 __rpcrdma_mr_put(&r_xprt->rx_buf, mr); 1334 1373 } 1335 1374 1336 - static struct rpcrdma_rep * 1337 - rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers) 1338 - { 1339 - /* If an RPC previously completed without a reply (say, a 1340 - * credential problem or a soft timeout occurs) then hold off 1341 - * on supplying more Receive buffers until the number of new 1342 - * pending RPCs catches up to the number of posted Receives. 1343 - */ 1344 - if (unlikely(buffers->rb_send_count < buffers->rb_recv_count)) 1345 - return NULL; 1346 - 1347 - if (unlikely(list_empty(&buffers->rb_recv_bufs))) 1348 - return NULL; 1349 - buffers->rb_recv_count++; 1350 - return rpcrdma_buffer_get_rep_locked(buffers); 1351 - } 1352 - 1353 - /* 1354 - * Get a set of request/reply buffers. 1375 + /** 1376 + * rpcrdma_buffer_get - Get a request buffer 1377 + * @buffers: Buffer pool from which to obtain a buffer 1355 1378 * 1356 - * Reply buffer (if available) is attached to send buffer upon return. 1379 + * Returns a fresh rpcrdma_req, or NULL if none are available. 1357 1380 */ 1358 1381 struct rpcrdma_req * 1359 1382 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) ··· 1345 1400 struct rpcrdma_req *req; 1346 1401 1347 1402 spin_lock(&buffers->rb_lock); 1348 - if (list_empty(&buffers->rb_send_bufs)) 1349 - goto out_reqbuf; 1350 - buffers->rb_send_count++; 1351 - req = rpcrdma_buffer_get_req_locked(buffers); 1352 - req->rl_reply = rpcrdma_buffer_get_rep(buffers); 1403 + req = list_first_entry_or_null(&buffers->rb_send_bufs, 1404 + struct rpcrdma_req, rl_list); 1405 + if (req) 1406 + list_del_init(&req->rl_list); 1353 1407 spin_unlock(&buffers->rb_lock); 1354 - 1355 1408 return req; 1356 - 1357 - out_reqbuf: 1358 - spin_unlock(&buffers->rb_lock); 1359 - return NULL; 1360 1409 } 1361 1410 1362 - /* 1363 - * Put request/reply buffers back into pool. 1364 - * Pre-decrement counter/array index. 1411 + /** 1412 + * rpcrdma_buffer_put - Put request/reply buffers back into pool 1413 + * @req: object to return 1414 + * 1365 1415 */ 1366 1416 void 1367 1417 rpcrdma_buffer_put(struct rpcrdma_req *req) ··· 1367 1427 req->rl_reply = NULL; 1368 1428 1369 1429 spin_lock(&buffers->rb_lock); 1370 - buffers->rb_send_count--; 1371 - list_add_tail(&req->rl_list, &buffers->rb_send_bufs); 1430 + list_add(&req->rl_list, &buffers->rb_send_bufs); 1372 1431 if (rep) { 1373 - buffers->rb_recv_count--; 1374 - list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); 1432 + if (!rep->rr_temp) { 1433 + list_add(&rep->rr_list, &buffers->rb_recv_bufs); 1434 + rep = NULL; 1435 + } 1375 1436 } 1376 1437 spin_unlock(&buffers->rb_lock); 1377 - } 1378 - 1379 - /* 1380 - * Recover reply buffers from pool. 1381 - * This happens when recovering from disconnect. 1382 - */ 1383 - void 1384 - rpcrdma_recv_buffer_get(struct rpcrdma_req *req) 1385 - { 1386 - struct rpcrdma_buffer *buffers = req->rl_buffer; 1387 - 1388 - spin_lock(&buffers->rb_lock); 1389 - req->rl_reply = rpcrdma_buffer_get_rep(buffers); 1390 - spin_unlock(&buffers->rb_lock); 1438 + if (rep) 1439 + rpcrdma_destroy_rep(rep); 1391 1440 } 1392 1441 1393 1442 /* ··· 1388 1459 { 1389 1460 struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; 1390 1461 1391 - spin_lock(&buffers->rb_lock); 1392 - buffers->rb_recv_count--; 1393 - list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); 1394 - spin_unlock(&buffers->rb_lock); 1462 + if (!rep->rr_temp) { 1463 + spin_lock(&buffers->rb_lock); 1464 + list_add(&rep->rr_list, &buffers->rb_recv_bufs); 1465 + spin_unlock(&buffers->rb_lock); 1466 + } else { 1467 + rpcrdma_destroy_rep(rep); 1468 + } 1395 1469 } 1396 1470 1397 1471 /** ··· 1490 1558 struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; 1491 1559 int rc; 1492 1560 1493 - if (req->rl_reply) { 1494 - rc = rpcrdma_ep_post_recv(ia, req->rl_reply); 1495 - if (rc) 1496 - return rc; 1497 - req->rl_reply = NULL; 1498 - } 1499 - 1500 1561 if (!ep->rep_send_count || 1501 1562 test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { 1502 1563 send_wr->send_flags |= IB_SEND_SIGNALED; ··· 1506 1581 return 0; 1507 1582 } 1508 1583 1509 - int 1510 - rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, 1511 - struct rpcrdma_rep *rep) 1512 - { 1513 - struct ib_recv_wr *recv_wr_fail; 1514 - int rc; 1515 - 1516 - if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) 1517 - goto out_map; 1518 - rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); 1519 - trace_xprtrdma_post_recv(rep, rc); 1520 - if (rc) 1521 - return -ENOTCONN; 1522 - return 0; 1523 - 1524 - out_map: 1525 - pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); 1526 - return -EIO; 1527 - } 1528 - 1529 1584 /** 1530 - * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests 1531 - * @r_xprt: transport associated with these backchannel resources 1532 - * @count: minimum number of incoming requests expected 1585 + * rpcrdma_post_recvs - Maybe post some Receive buffers 1586 + * @r_xprt: controlling transport 1587 + * @temp: when true, allocate temp rpcrdma_rep objects 1533 1588 * 1534 - * Returns zero if all requested buffers were posted, or a negative errno. 1535 1589 */ 1536 - int 1537 - rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) 1590 + void 1591 + rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) 1538 1592 { 1539 - struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; 1540 - struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1541 - struct rpcrdma_rep *rep; 1542 - int rc; 1593 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1594 + struct ib_recv_wr *wr, *bad_wr; 1595 + int needed, count, rc; 1543 1596 1544 - while (count--) { 1545 - spin_lock(&buffers->rb_lock); 1546 - if (list_empty(&buffers->rb_recv_bufs)) 1547 - goto out_reqbuf; 1548 - rep = rpcrdma_buffer_get_rep_locked(buffers); 1549 - spin_unlock(&buffers->rb_lock); 1597 + needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); 1598 + if (buf->rb_posted_receives > needed) 1599 + return; 1600 + needed -= buf->rb_posted_receives; 1550 1601 1551 - rc = rpcrdma_ep_post_recv(ia, rep); 1552 - if (rc) 1553 - goto out_rc; 1602 + count = 0; 1603 + wr = NULL; 1604 + while (needed) { 1605 + struct rpcrdma_regbuf *rb; 1606 + struct rpcrdma_rep *rep; 1607 + 1608 + spin_lock(&buf->rb_lock); 1609 + rep = list_first_entry_or_null(&buf->rb_recv_bufs, 1610 + struct rpcrdma_rep, rr_list); 1611 + if (likely(rep)) 1612 + list_del(&rep->rr_list); 1613 + spin_unlock(&buf->rb_lock); 1614 + if (!rep) { 1615 + if (rpcrdma_create_rep(r_xprt, temp)) 1616 + break; 1617 + continue; 1618 + } 1619 + 1620 + rb = rep->rr_rdmabuf; 1621 + if (!rpcrdma_regbuf_is_mapped(rb)) { 1622 + if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) { 1623 + rpcrdma_recv_buffer_put(rep); 1624 + break; 1625 + } 1626 + } 1627 + 1628 + trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); 1629 + rep->rr_recv_wr.next = wr; 1630 + wr = &rep->rr_recv_wr; 1631 + ++count; 1632 + --needed; 1554 1633 } 1634 + if (!count) 1635 + return; 1555 1636 1556 - return 0; 1637 + rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr); 1638 + if (rc) { 1639 + for (wr = bad_wr; wr; wr = wr->next) { 1640 + struct rpcrdma_rep *rep; 1557 1641 1558 - out_reqbuf: 1559 - spin_unlock(&buffers->rb_lock); 1560 - trace_xprtrdma_noreps(r_xprt); 1561 - return -ENOMEM; 1562 - 1563 - out_rc: 1564 - rpcrdma_recv_buffer_put(rep); 1565 - return rc; 1642 + rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); 1643 + rpcrdma_recv_buffer_put(rep); 1644 + --count; 1645 + } 1646 + } 1647 + buf->rb_posted_receives += count; 1648 + trace_xprtrdma_post_recvs(r_xprt, count, rc); 1566 1649 }
+12 -14
net/sunrpc/xprtrdma/xprt_rdma.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 1 2 /* 2 3 * Copyright (c) 2014-2017 Oracle. All rights reserved. 3 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. ··· 197 196 __be32 rr_proc; 198 197 int rr_wc_flags; 199 198 u32 rr_inv_rkey; 199 + bool rr_temp; 200 200 struct rpcrdma_regbuf *rr_rdmabuf; 201 201 struct rpcrdma_xprt *rr_rxprt; 202 202 struct work_struct rr_work; ··· 336 334 struct rpcrdma_buffer; 337 335 struct rpcrdma_req { 338 336 struct list_head rl_list; 337 + struct rpc_rqst rl_slot; 339 338 struct rpcrdma_buffer *rl_buffer; 340 339 struct rpcrdma_rep *rl_reply; 341 340 struct xdr_stream rl_stream; ··· 359 356 RPCRDMA_REQ_F_TX_RESOURCES, 360 357 }; 361 358 362 - static inline void 363 - rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) 364 - { 365 - rqst->rq_xprtdata = req; 366 - } 367 - 368 359 static inline struct rpcrdma_req * 369 360 rpcr_to_rdmar(const struct rpc_rqst *rqst) 370 361 { 371 - return rqst->rq_xprtdata; 362 + return container_of(rqst, struct rpcrdma_req, rl_slot); 372 363 } 373 364 374 365 static inline void ··· 398 401 struct rpcrdma_sendctx **rb_sc_ctxs; 399 402 400 403 spinlock_t rb_lock; /* protect buf lists */ 401 - int rb_send_count, rb_recv_count; 402 404 struct list_head rb_send_bufs; 403 405 struct list_head rb_recv_bufs; 406 + unsigned long rb_flags; 404 407 u32 rb_max_requests; 405 408 u32 rb_credits; /* most recent credit grant */ 409 + int rb_posted_receives; 406 410 407 411 u32 rb_bc_srv_max_requests; 408 412 spinlock_t rb_reqslock; /* protect rb_allreqs */ ··· 417 419 struct delayed_work rb_refresh_worker; 418 420 }; 419 421 #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) 422 + 423 + /* rb_flags */ 424 + enum { 425 + RPCRDMA_BUF_F_EMPTY_SCQ = 0, 426 + }; 420 427 421 428 /* 422 429 * Internal structure for transport instance creation. This ··· 564 561 565 562 int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, 566 563 struct rpcrdma_req *); 567 - int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); 564 + void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); 568 565 569 566 /* 570 567 * Buffer calls - xprtrdma/verbs.c 571 568 */ 572 569 struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); 573 570 void rpcrdma_destroy_req(struct rpcrdma_req *); 574 - int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt); 575 571 int rpcrdma_buffer_create(struct rpcrdma_xprt *); 576 572 void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); 577 573 struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); 578 - void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); 579 574 580 575 struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); 581 576 void rpcrdma_mr_put(struct rpcrdma_mr *mr); ··· 582 581 583 582 struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 584 583 void rpcrdma_buffer_put(struct rpcrdma_req *); 585 - void rpcrdma_recv_buffer_get(struct rpcrdma_req *); 586 584 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); 587 585 588 586 struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, ··· 602 602 return true; 603 603 return __rpcrdma_dma_map_regbuf(ia, rb); 604 604 } 605 - 606 - int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); 607 605 608 606 int rpcrdma_alloc_wq(void); 609 607 void rpcrdma_destroy_wq(void);
+4
net/sunrpc/xprtsock.c
··· 2763 2763 .reserve_xprt = xprt_reserve_xprt, 2764 2764 .release_xprt = xs_tcp_release_xprt, 2765 2765 .alloc_slot = xprt_alloc_slot, 2766 + .free_slot = xprt_free_slot, 2766 2767 .rpcbind = xs_local_rpcbind, 2767 2768 .set_port = xs_local_set_port, 2768 2769 .connect = xs_local_connect, ··· 2783 2782 .reserve_xprt = xprt_reserve_xprt_cong, 2784 2783 .release_xprt = xprt_release_xprt_cong, 2785 2784 .alloc_slot = xprt_alloc_slot, 2785 + .free_slot = xprt_free_slot, 2786 2786 .rpcbind = rpcb_getport_async, 2787 2787 .set_port = xs_set_port, 2788 2788 .connect = xs_connect, ··· 2805 2803 .reserve_xprt = xprt_reserve_xprt, 2806 2804 .release_xprt = xs_tcp_release_xprt, 2807 2805 .alloc_slot = xprt_lock_and_alloc_slot, 2806 + .free_slot = xprt_free_slot, 2808 2807 .rpcbind = rpcb_getport_async, 2809 2808 .set_port = xs_set_port, 2810 2809 .connect = xs_connect, ··· 2837 2834 .reserve_xprt = xprt_reserve_xprt, 2838 2835 .release_xprt = xprt_release_xprt, 2839 2836 .alloc_slot = xprt_alloc_slot, 2837 + .free_slot = xprt_free_slot, 2840 2838 .buf_alloc = bc_malloc, 2841 2839 .buf_free = bc_free, 2842 2840 .send_request = bc_send_request,