Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfs-for-4.17-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client updates from Anna Schumaker:
"Stable bugfixes:
- xprtrdma: Fix corner cases when handling device removal # v4.12+
- xprtrdma: Fix latency regression on NUMA NFS/RDMA clients # v4.15+

Features:
- New sunrpc tracepoint for RPC pings
- Finer grained NFSv4 attribute checking
- Don't unnecessarily return NFS v4 delegations

Other bugfixes and cleanups:
- Several other small NFSoRDMA cleanups
- Improvements to the sunrpc RTT measurements
- A few sunrpc tracepoint cleanups
- Various fixes for NFS v4 lock notifications
- Various sunrpc and NFS v4 XDR encoding cleanups
- Switch to the ida_simple API
- Fix NFSv4.1 exclusive create
- Forget acl cache after setattr operation
- Don't advance the nfs_entry readdir cookie if xdr decoding fails"

* tag 'nfs-for-4.17-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (47 commits)
NFS: advance nfs_entry cookie only after decoding completes successfully
NFSv3/acl: forget acl cache after setattr
NFSv4.1: Fix exclusive create
NFSv4: Declare the size up to date after it was set.
nfs: Use ida_simple API
NFSv4: Fix the nfs_inode_set_delegation() arguments
NFSv4: Clean up CB_GETATTR encoding
NFSv4: Don't ask for attributes when ACCESS is protected by a delegation
NFSv4: Add a helper to encode/decode struct timespec
NFSv4: Clean up encode_attrs
NFSv4; Clean up XDR encoding of type bitmap4
NFSv4: Allow GFP_NOIO sleeps in decode_attr_owner/decode_attr_group
SUNRPC: Add a helper for encoding opaque data inline
SUNRPC: Add helpers for decoding opaque and string types
NFSv4: Ignore change attribute invalidations if we hold a delegation
NFS: More fine grained attribute tracking
NFS: Don't force unnecessary cache invalidation in nfs_update_inode()
NFS: Don't redirty the attribute cache in nfs_wcc_update_inode()
NFS: Don't force a revalidation of all attributes if change is missing
NFS: Convert NFS_INO_INVALID flags to unsigned long
...

+866 -493
+8 -29
fs/nfs/callback_xdr.c
··· 535 535 return 0; 536 536 } 537 537 538 - #define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) 539 - #define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) 540 - static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, __be32 **savep) 538 + static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, size_t sz) 541 539 { 542 - __be32 bm[2]; 543 - __be32 *p; 544 - 545 - bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0); 546 - bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1); 547 - if (bm[1] != 0) { 548 - p = xdr_reserve_space(xdr, 16); 549 - if (unlikely(p == NULL)) 550 - return htonl(NFS4ERR_RESOURCE); 551 - *p++ = htonl(2); 552 - *p++ = bm[0]; 553 - *p++ = bm[1]; 554 - } else if (bm[0] != 0) { 555 - p = xdr_reserve_space(xdr, 12); 556 - if (unlikely(p == NULL)) 557 - return htonl(NFS4ERR_RESOURCE); 558 - *p++ = htonl(1); 559 - *p++ = bm[0]; 560 - } else { 561 - p = xdr_reserve_space(xdr, 8); 562 - if (unlikely(p == NULL)) 563 - return htonl(NFS4ERR_RESOURCE); 564 - *p++ = htonl(0); 565 - } 566 - *savep = p; 540 + if (xdr_stream_encode_uint32_array(xdr, bitmap, sz) < 0) 541 + return cpu_to_be32(NFS4ERR_RESOURCE); 567 542 return 0; 568 543 } 569 544 ··· 631 656 632 657 if (unlikely(status != 0)) 633 658 goto out; 634 - status = encode_attr_bitmap(xdr, res->bitmap, &savep); 659 + status = encode_attr_bitmap(xdr, res->bitmap, ARRAY_SIZE(res->bitmap)); 635 660 if (unlikely(status != 0)) 661 + goto out; 662 + status = cpu_to_be32(NFS4ERR_RESOURCE); 663 + savep = xdr_reserve_space(xdr, sizeof(*savep)); 664 + if (unlikely(!savep)) 636 665 goto out; 637 666 status = encode_attr_change(xdr, res->bitmap, res->change_attr); 638 667 if (unlikely(status != 0))
+39 -13
fs/nfs/delegation.c
··· 19 19 #include <linux/nfs_xdr.h> 20 20 21 21 #include "nfs4_fs.h" 22 + #include "nfs4session.h" 22 23 #include "delegation.h" 23 24 #include "internal.h" 24 25 #include "nfs4trace.h" ··· 172 171 * nfs_inode_reclaim_delegation - process a delegation reclaim request 173 172 * @inode: inode to process 174 173 * @cred: credential to use for request 175 - * @res: new delegation state from server 174 + * @type: delegation type 175 + * @stateid: delegation stateid 176 + * @pagemod_limit: write delegation "space_limit" 176 177 * 177 178 */ 178 179 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, 179 - struct nfs_openres *res) 180 + fmode_t type, 181 + const nfs4_stateid *stateid, 182 + unsigned long pagemod_limit) 180 183 { 181 184 struct nfs_delegation *delegation; 182 185 struct rpc_cred *oldcred = NULL; ··· 190 185 if (delegation != NULL) { 191 186 spin_lock(&delegation->lock); 192 187 if (delegation->inode != NULL) { 193 - nfs4_stateid_copy(&delegation->stateid, &res->delegation); 194 - delegation->type = res->delegation_type; 195 - delegation->pagemod_limit = res->pagemod_limit; 188 + nfs4_stateid_copy(&delegation->stateid, stateid); 189 + delegation->type = type; 190 + delegation->pagemod_limit = pagemod_limit; 196 191 oldcred = delegation->cred; 197 192 delegation->cred = get_rpccred(cred); 198 193 clear_bit(NFS_DELEGATION_NEED_RECLAIM, ··· 200 195 spin_unlock(&delegation->lock); 201 196 rcu_read_unlock(); 202 197 put_rpccred(oldcred); 203 - trace_nfs4_reclaim_delegation(inode, res->delegation_type); 198 + trace_nfs4_reclaim_delegation(inode, type); 204 199 return; 205 200 } 206 201 /* We appear to have raced with a delegation return. */ 207 202 spin_unlock(&delegation->lock); 208 203 } 209 204 rcu_read_unlock(); 210 - nfs_inode_set_delegation(inode, cred, res); 205 + nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit); 211 206 } 212 207 213 208 static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) ··· 334 329 * nfs_inode_set_delegation - set up a delegation on an inode 335 330 * @inode: inode to which delegation applies 336 331 * @cred: cred to use for subsequent delegation processing 337 - * @res: new delegation state from server 332 + * @type: delegation type 333 + * @stateid: delegation stateid 334 + * @pagemod_limit: write delegation "space_limit" 338 335 * 339 336 * Returns zero on success, or a negative errno value. 340 337 */ 341 - int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 338 + int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, 339 + fmode_t type, 340 + const nfs4_stateid *stateid, 341 + unsigned long pagemod_limit) 342 342 { 343 343 struct nfs_server *server = NFS_SERVER(inode); 344 344 struct nfs_client *clp = server->nfs_client; ··· 355 345 delegation = kmalloc(sizeof(*delegation), GFP_NOFS); 356 346 if (delegation == NULL) 357 347 return -ENOMEM; 358 - nfs4_stateid_copy(&delegation->stateid, &res->delegation); 359 - delegation->type = res->delegation_type; 360 - delegation->pagemod_limit = res->pagemod_limit; 348 + nfs4_stateid_copy(&delegation->stateid, stateid); 349 + delegation->type = type; 350 + delegation->pagemod_limit = pagemod_limit; 361 351 delegation->change_attr = inode_peek_iversion_raw(inode); 362 352 delegation->cred = get_rpccred(cred); 363 353 delegation->inode = inode; ··· 402 392 rcu_assign_pointer(nfsi->delegation, delegation); 403 393 delegation = NULL; 404 394 405 - trace_nfs4_set_delegation(inode, res->delegation_type); 395 + trace_nfs4_set_delegation(inode, type); 406 396 407 397 out: 408 398 spin_unlock(&clp->cl_lock); ··· 555 545 if (delegation != NULL) 556 546 err = nfs_end_delegation_return(inode, delegation, 1); 557 547 return err; 548 + } 549 + 550 + /** 551 + * nfs4_inode_make_writeable 552 + * @inode: pointer to inode 553 + * 554 + * Make the inode writeable by returning the delegation if necessary 555 + * 556 + * Returns zero on success, or a negative errno value. 557 + */ 558 + int nfs4_inode_make_writeable(struct inode *inode) 559 + { 560 + if (!nfs4_has_session(NFS_SERVER(inode)->nfs_client) || 561 + !nfs4_check_delegation(inode, FMODE_WRITE)) 562 + return nfs4_inode_return_delegation(inode); 563 + return 0; 558 564 } 559 565 560 566 static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
+5 -2
fs/nfs/delegation.h
··· 36 36 NFS_DELEGATION_TEST_EXPIRED, 37 37 }; 38 38 39 - int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 40 - void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 39 + int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, 40 + fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); 41 + void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, 42 + fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); 41 43 int nfs4_inode_return_delegation(struct inode *inode); 42 44 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); 43 45 void nfs_inode_return_delegation_noreclaim(struct inode *inode); ··· 72 70 bool nfs4_delegation_flush_on_close(const struct inode *inode); 73 71 void nfs_inode_find_delegation_state_and_recover(struct inode *inode, 74 72 const nfs4_stateid *stateid); 73 + int nfs4_inode_make_writeable(struct inode *inode); 75 74 76 75 #endif 77 76
+5 -10
fs/nfs/dir.c
··· 1272 1272 /* drop the inode if we're reasonably sure this is the last link */ 1273 1273 if (inode->i_nlink == 1) 1274 1274 clear_nlink(inode); 1275 - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; 1275 + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE 1276 + | NFS_INO_INVALID_CTIME 1277 + | NFS_INO_INVALID_OTHER; 1276 1278 spin_unlock(&inode->i_lock); 1277 1279 } 1278 1280 ··· 1800 1798 1801 1799 trace_nfs_remove_enter(dir, dentry); 1802 1800 if (inode != NULL) { 1803 - NFS_PROTO(inode)->return_delegation(inode); 1804 - error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); 1801 + error = NFS_PROTO(dir)->remove(dir, dentry); 1805 1802 if (error == 0) 1806 1803 nfs_drop_nlink(inode); 1807 1804 } else 1808 - error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); 1805 + error = NFS_PROTO(dir)->remove(dir, dentry); 1809 1806 if (error == -ENOENT) 1810 1807 nfs_dentry_handle_enoent(dentry); 1811 1808 trace_nfs_remove_exit(dir, dentry, error); ··· 1933 1932 old_dentry, dentry); 1934 1933 1935 1934 trace_nfs_link_enter(inode, dir, dentry); 1936 - NFS_PROTO(inode)->return_delegation(inode); 1937 - 1938 1935 d_drop(dentry); 1939 1936 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); 1940 1937 if (error == 0) { ··· 2021 2022 new_inode = NULL; 2022 2023 } 2023 2024 } 2024 - 2025 - NFS_PROTO(old_inode)->return_delegation(old_inode); 2026 - if (new_inode != NULL) 2027 - NFS_PROTO(new_inode)->return_delegation(new_inode); 2028 2025 2029 2026 task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); 2030 2027 if (IS_ERR(task)) {
+76 -62
fs/nfs/inode.c
··· 195 195 static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) 196 196 { 197 197 struct nfs_inode *nfsi = NFS_I(inode); 198 + bool have_delegation = nfs_have_delegated_attributes(inode); 198 199 200 + if (have_delegation) 201 + flags &= ~(NFS_INO_INVALID_CHANGE|NFS_INO_REVAL_PAGECACHE); 199 202 if (inode->i_mapping->nrpages == 0) 200 203 flags &= ~NFS_INO_INVALID_DATA; 201 204 nfsi->cache_validity |= flags; ··· 450 447 inode->i_mode = fattr->mode; 451 448 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 452 449 && nfs_server_capable(inode, NFS_CAP_MODE)) 453 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); 450 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); 454 451 /* Why so? Because we want revalidate for devices/FIFOs, and 455 452 * that's precisely what we have in nfs_file_inode_operations. 456 453 */ ··· 496 493 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 497 494 inode->i_atime = fattr->atime; 498 495 else if (nfs_server_capable(inode, NFS_CAP_ATIME)) 499 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); 496 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); 500 497 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 501 498 inode->i_mtime = fattr->mtime; 502 499 else if (nfs_server_capable(inode, NFS_CAP_MTIME)) 503 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); 500 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); 504 501 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 505 502 inode->i_ctime = fattr->ctime; 506 503 else if (nfs_server_capable(inode, NFS_CAP_CTIME)) 507 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); 504 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); 508 505 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 509 506 inode_set_iversion_raw(inode, fattr->change_attr); 510 507 else 511 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR 512 - | NFS_INO_REVAL_PAGECACHE); 508 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE); 513 509 if (fattr->valid & NFS_ATTR_FATTR_SIZE) 514 510 inode->i_size = nfs_size_to_loff_t(fattr->size); 515 511 else 516 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR 517 - | NFS_INO_REVAL_PAGECACHE); 512 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE); 518 513 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 519 514 set_nlink(inode, fattr->nlink); 520 515 else if (nfs_server_capable(inode, NFS_CAP_NLINK)) 521 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); 516 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); 522 517 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 523 518 inode->i_uid = fattr->uid; 524 519 else if (nfs_server_capable(inode, NFS_CAP_OWNER)) 525 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); 520 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); 526 521 if (fattr->valid & NFS_ATTR_FATTR_GROUP) 527 522 inode->i_gid = fattr->gid; 528 523 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) 529 - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); 524 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); 530 525 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 531 526 inode->i_blocks = fattr->du.nfs2.blocks; 532 527 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { ··· 609 608 goto out; 610 609 } 611 610 612 - /* 613 - * Return any delegations if we're going to change ACLs 614 - */ 615 - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) 616 - NFS_PROTO(inode)->return_delegation(inode); 617 611 error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); 618 612 if (error == 0) 619 613 error = nfs_refresh_inode(inode, fattr); ··· 641 645 /* Optimisation */ 642 646 if (offset == 0) 643 647 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; 648 + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; 644 649 645 650 spin_unlock(&inode->i_lock); 646 651 truncate_pagecache(inode, offset); ··· 654 657 * nfs_setattr_update_inode - Update inode metadata after a setattr call. 655 658 * @inode: pointer to struct inode 656 659 * @attr: pointer to struct iattr 660 + * @fattr: pointer to struct nfs_fattr 657 661 * 658 662 * Note: we do this in the *proc.c in order to ensure that 659 663 * it works for things like exclusive creates too. ··· 667 669 668 670 spin_lock(&inode->i_lock); 669 671 NFS_I(inode)->attr_gencount = fattr->gencount; 672 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE 673 + | NFS_INO_INVALID_CTIME); 670 674 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { 671 675 if ((attr->ia_valid & ATTR_MODE) != 0) { 672 676 int mode = attr->ia_mode & S_IALLUGO; ··· 683 683 | NFS_INO_INVALID_ACL); 684 684 } 685 685 if ((attr->ia_valid & ATTR_SIZE) != 0) { 686 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); 686 687 nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); 687 688 nfs_vmtruncate(inode, attr->ia_size); 688 689 } 689 690 if (fattr->valid) 690 691 nfs_update_inode(inode, fattr); 691 - else 692 - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; 693 692 spin_unlock(&inode->i_lock); 694 693 } 695 694 EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); ··· 1302 1303 return nfs_file_has_writers(nfsi) && nfs_file_io_is_buffered(nfsi); 1303 1304 } 1304 1305 1305 - static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 1306 + static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 1306 1307 { 1307 - unsigned long ret = 0; 1308 - 1309 1308 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) 1310 1309 && (fattr->valid & NFS_ATTR_FATTR_CHANGE) 1311 1310 && inode_eq_iversion_raw(inode, fattr->pre_change_attr)) { 1312 1311 inode_set_iversion_raw(inode, fattr->change_attr); 1313 1312 if (S_ISDIR(inode->i_mode)) 1314 1313 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); 1315 - ret |= NFS_INO_INVALID_ATTR; 1316 1314 } 1317 1315 /* If we have atomic WCC data, we may update some attributes */ 1318 1316 if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) 1319 1317 && (fattr->valid & NFS_ATTR_FATTR_CTIME) 1320 1318 && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { 1321 1319 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 1322 - ret |= NFS_INO_INVALID_ATTR; 1323 1320 } 1324 1321 1325 1322 if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) ··· 1324 1329 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1325 1330 if (S_ISDIR(inode->i_mode)) 1326 1331 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); 1327 - ret |= NFS_INO_INVALID_ATTR; 1328 1332 } 1329 1333 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) 1330 1334 && (fattr->valid & NFS_ATTR_FATTR_SIZE) 1331 1335 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) 1332 1336 && !nfs_have_writebacks(inode)) { 1333 1337 i_size_write(inode, nfs_size_to_loff_t(fattr->size)); 1334 - ret |= NFS_INO_INVALID_ATTR; 1335 1338 } 1336 - 1337 - return ret; 1338 1339 } 1339 1340 1340 1341 /** ··· 1360 1369 if (!nfs_file_has_buffered_writers(nfsi)) { 1361 1370 /* Verify a few of the more important attributes */ 1362 1371 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr)) 1363 - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE; 1372 + invalid |= NFS_INO_INVALID_CHANGE 1373 + | NFS_INO_REVAL_PAGECACHE; 1364 1374 1365 1375 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) 1366 - invalid |= NFS_INO_INVALID_ATTR; 1376 + invalid |= NFS_INO_INVALID_MTIME; 1367 1377 1368 1378 if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&inode->i_ctime, &fattr->ctime)) 1369 - invalid |= NFS_INO_INVALID_ATTR; 1379 + invalid |= NFS_INO_INVALID_CTIME; 1370 1380 1371 1381 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 1372 1382 cur_size = i_size_read(inode); 1373 1383 new_isize = nfs_size_to_loff_t(fattr->size); 1374 1384 if (cur_size != new_isize) 1375 - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 1385 + invalid |= NFS_INO_INVALID_SIZE 1386 + | NFS_INO_REVAL_PAGECACHE; 1376 1387 } 1377 1388 } 1378 1389 1379 1390 /* Have any file permissions changed? */ 1380 1391 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) 1381 - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 1392 + invalid |= NFS_INO_INVALID_ACCESS 1393 + | NFS_INO_INVALID_ACL 1394 + | NFS_INO_INVALID_OTHER; 1382 1395 if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid)) 1383 - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 1396 + invalid |= NFS_INO_INVALID_ACCESS 1397 + | NFS_INO_INVALID_ACL 1398 + | NFS_INO_INVALID_OTHER; 1384 1399 if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid)) 1385 - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 1400 + invalid |= NFS_INO_INVALID_ACCESS 1401 + | NFS_INO_INVALID_ACL 1402 + | NFS_INO_INVALID_OTHER; 1386 1403 1387 1404 /* Has the link count changed? */ 1388 1405 if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) 1389 - invalid |= NFS_INO_INVALID_ATTR; 1406 + invalid |= NFS_INO_INVALID_OTHER; 1390 1407 1391 1408 if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime)) 1392 1409 invalid |= NFS_INO_INVALID_ATIME; ··· 1596 1597 } 1597 1598 EXPORT_SYMBOL_GPL(nfs_refresh_inode); 1598 1599 1599 - static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) 1600 + static int nfs_post_op_update_inode_locked(struct inode *inode, 1601 + struct nfs_fattr *fattr, unsigned int invalid) 1600 1602 { 1601 - unsigned long invalid = NFS_INO_INVALID_ATTR; 1602 - 1603 1603 if (S_ISDIR(inode->i_mode)) 1604 1604 invalid |= NFS_INO_INVALID_DATA; 1605 1605 nfs_set_cache_invalid(inode, invalid); ··· 1627 1629 1628 1630 spin_lock(&inode->i_lock); 1629 1631 nfs_fattr_set_barrier(fattr); 1630 - status = nfs_post_op_update_inode_locked(inode, fattr); 1632 + status = nfs_post_op_update_inode_locked(inode, fattr, 1633 + NFS_INO_INVALID_CHANGE 1634 + | NFS_INO_INVALID_CTIME); 1631 1635 spin_unlock(&inode->i_lock); 1632 1636 1633 1637 return status; ··· 1681 1681 fattr->valid |= NFS_ATTR_FATTR_PRESIZE; 1682 1682 } 1683 1683 out_noforce: 1684 - status = nfs_post_op_update_inode_locked(inode, fattr); 1684 + status = nfs_post_op_update_inode_locked(inode, fattr, 1685 + NFS_INO_INVALID_CHANGE 1686 + | NFS_INO_INVALID_CTIME 1687 + | NFS_INO_INVALID_MTIME); 1685 1688 return status; 1686 1689 } 1687 1690 ··· 1792 1789 | NFS_INO_REVAL_PAGECACHE); 1793 1790 1794 1791 /* Do atomic weak cache consistency updates */ 1795 - invalid |= nfs_wcc_update_inode(inode, fattr); 1792 + nfs_wcc_update_inode(inode, fattr); 1796 1793 1797 1794 if (pnfs_layoutcommit_outstanding(inode)) { 1798 1795 nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR; ··· 1806 1803 inode->i_sb->s_id, inode->i_ino); 1807 1804 /* Could it be a race with writeback? */ 1808 1805 if (!have_writers) { 1809 - invalid |= NFS_INO_INVALID_ATTR 1806 + invalid |= NFS_INO_INVALID_CHANGE 1810 1807 | NFS_INO_INVALID_DATA 1811 1808 | NFS_INO_INVALID_ACCESS 1812 1809 | NFS_INO_INVALID_ACL; 1810 + /* Force revalidate of all attributes */ 1811 + save_cache_validity |= NFS_INO_INVALID_CTIME 1812 + | NFS_INO_INVALID_MTIME 1813 + | NFS_INO_INVALID_SIZE 1814 + | NFS_INO_INVALID_OTHER; 1813 1815 if (S_ISDIR(inode->i_mode)) 1814 1816 nfs_force_lookup_revalidate(inode); 1815 1817 } 1816 1818 inode_set_iversion_raw(inode, fattr->change_attr); 1817 1819 } 1818 1820 } else { 1819 - nfsi->cache_validity |= save_cache_validity; 1821 + nfsi->cache_validity |= save_cache_validity & 1822 + (NFS_INO_INVALID_CHANGE 1823 + | NFS_INO_REVAL_PAGECACHE 1824 + | NFS_INO_REVAL_FORCED); 1820 1825 cache_revalidated = false; 1821 1826 } 1822 1827 ··· 1832 1821 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1833 1822 } else if (server->caps & NFS_CAP_MTIME) { 1834 1823 nfsi->cache_validity |= save_cache_validity & 1835 - (NFS_INO_INVALID_ATTR 1824 + (NFS_INO_INVALID_MTIME 1836 1825 | NFS_INO_REVAL_FORCED); 1837 1826 cache_revalidated = false; 1838 1827 } ··· 1841 1830 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 1842 1831 } else if (server->caps & NFS_CAP_CTIME) { 1843 1832 nfsi->cache_validity |= save_cache_validity & 1844 - (NFS_INO_INVALID_ATTR 1833 + (NFS_INO_INVALID_CTIME 1845 1834 | NFS_INO_REVAL_FORCED); 1846 1835 cache_revalidated = false; 1847 1836 } ··· 1856 1845 if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { 1857 1846 i_size_write(inode, new_isize); 1858 1847 if (!have_writers) 1859 - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; 1848 + invalid |= NFS_INO_INVALID_DATA; 1860 1849 } 1861 1850 dprintk("NFS: isize change on server for file %s/%ld " 1862 1851 "(%Ld to %Ld)\n", ··· 1867 1856 } 1868 1857 } else { 1869 1858 nfsi->cache_validity |= save_cache_validity & 1870 - (NFS_INO_INVALID_ATTR 1859 + (NFS_INO_INVALID_SIZE 1871 1860 | NFS_INO_REVAL_PAGECACHE 1872 1861 | NFS_INO_REVAL_FORCED); 1873 1862 cache_revalidated = false; ··· 1888 1877 umode_t newmode = inode->i_mode & S_IFMT; 1889 1878 newmode |= fattr->mode & S_IALLUGO; 1890 1879 inode->i_mode = newmode; 1891 - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1880 + invalid |= NFS_INO_INVALID_ACCESS 1881 + | NFS_INO_INVALID_ACL 1882 + | NFS_INO_INVALID_OTHER; 1892 1883 } 1893 1884 } else if (server->caps & NFS_CAP_MODE) { 1894 1885 nfsi->cache_validity |= save_cache_validity & 1895 - (NFS_INO_INVALID_ATTR 1896 - | NFS_INO_INVALID_ACCESS 1886 + (NFS_INO_INVALID_ACCESS 1897 1887 | NFS_INO_INVALID_ACL 1888 + | NFS_INO_INVALID_OTHER 1898 1889 | NFS_INO_REVAL_FORCED); 1899 1890 cache_revalidated = false; 1900 1891 } 1901 1892 1902 1893 if (fattr->valid & NFS_ATTR_FATTR_OWNER) { 1903 1894 if (!uid_eq(inode->i_uid, fattr->uid)) { 1904 - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1895 + invalid |= NFS_INO_INVALID_ACCESS 1896 + | NFS_INO_INVALID_ACL 1897 + | NFS_INO_INVALID_OTHER; 1905 1898 inode->i_uid = fattr->uid; 1906 1899 } 1907 1900 } else if (server->caps & NFS_CAP_OWNER) { 1908 1901 nfsi->cache_validity |= save_cache_validity & 1909 - (NFS_INO_INVALID_ATTR 1910 - | NFS_INO_INVALID_ACCESS 1902 + (NFS_INO_INVALID_ACCESS 1911 1903 | NFS_INO_INVALID_ACL 1904 + | NFS_INO_INVALID_OTHER 1912 1905 | NFS_INO_REVAL_FORCED); 1913 1906 cache_revalidated = false; 1914 1907 } 1915 1908 1916 1909 if (fattr->valid & NFS_ATTR_FATTR_GROUP) { 1917 1910 if (!gid_eq(inode->i_gid, fattr->gid)) { 1918 - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1911 + invalid |= NFS_INO_INVALID_ACCESS 1912 + | NFS_INO_INVALID_ACL 1913 + | NFS_INO_INVALID_OTHER; 1919 1914 inode->i_gid = fattr->gid; 1920 1915 } 1921 1916 } else if (server->caps & NFS_CAP_OWNER_GROUP) { 1922 1917 nfsi->cache_validity |= save_cache_validity & 1923 - (NFS_INO_INVALID_ATTR 1924 - | NFS_INO_INVALID_ACCESS 1918 + (NFS_INO_INVALID_ACCESS 1925 1919 | NFS_INO_INVALID_ACL 1920 + | NFS_INO_INVALID_OTHER 1926 1921 | NFS_INO_REVAL_FORCED); 1927 1922 cache_revalidated = false; 1928 1923 } 1929 1924 1930 1925 if (fattr->valid & NFS_ATTR_FATTR_NLINK) { 1931 1926 if (inode->i_nlink != fattr->nlink) { 1932 - invalid |= NFS_INO_INVALID_ATTR; 1927 + invalid |= NFS_INO_INVALID_OTHER; 1933 1928 if (S_ISDIR(inode->i_mode)) 1934 1929 invalid |= NFS_INO_INVALID_DATA; 1935 1930 set_nlink(inode, fattr->nlink); 1936 1931 } 1937 1932 } else if (server->caps & NFS_CAP_NLINK) { 1938 1933 nfsi->cache_validity |= save_cache_validity & 1939 - (NFS_INO_INVALID_ATTR 1934 + (NFS_INO_INVALID_OTHER 1940 1935 | NFS_INO_REVAL_FORCED); 1941 1936 cache_revalidated = false; 1942 1937 } ··· 1959 1942 1960 1943 /* Update attrtimeo value if we're out of the unstable period */ 1961 1944 if (invalid & NFS_INO_INVALID_ATTR) { 1945 + invalid &= ~NFS_INO_INVALID_ATTR; 1962 1946 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); 1963 1947 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 1964 1948 nfsi->attrtimeo_timestamp = now; ··· 1979 1961 if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) 1980 1962 nfsi->attr_gencount = fattr->gencount; 1981 1963 } 1982 - 1983 - /* Don't declare attrcache up to date if there were no attrs! */ 1984 - if (cache_revalidated) 1985 - invalid &= ~NFS_INO_INVALID_ATTR; 1986 1964 1987 1965 /* Don't invalidate the data if we were to blame */ 1988 1966 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
+11 -13
fs/nfs/nfs3proc.c
··· 138 138 msg.rpc_cred = nfs_file_cred(sattr->ia_file); 139 139 nfs_fattr_init(fattr); 140 140 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 141 - if (status == 0) 141 + if (status == 0) { 142 + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) 143 + nfs_zap_acl_cache(inode); 142 144 nfs_setattr_update_inode(inode, sattr, fattr); 145 + } 143 146 dprintk("NFS reply setattr: %d\n", status); 144 147 return status; 145 148 } ··· 386 383 } 387 384 388 385 static int 389 - nfs3_proc_remove(struct inode *dir, const struct qstr *name) 386 + nfs3_proc_remove(struct inode *dir, struct dentry *dentry) 390 387 { 391 388 struct nfs_removeargs arg = { 392 389 .fh = NFS_FH(dir), 393 - .name = *name, 390 + .name = dentry->d_name, 394 391 }; 395 392 struct nfs_removeres res; 396 393 struct rpc_message msg = { ··· 400 397 }; 401 398 int status = -ENOMEM; 402 399 403 - dprintk("NFS call remove %s\n", name->name); 400 + dprintk("NFS call remove %pd2\n", dentry); 404 401 res.dir_attr = nfs_alloc_fattr(); 405 402 if (res.dir_attr == NULL) 406 403 goto out; ··· 414 411 } 415 412 416 413 static void 417 - nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) 414 + nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) 418 415 { 419 416 msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; 420 417 } ··· 436 433 } 437 434 438 435 static void 439 - nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir) 436 + nfs3_proc_rename_setup(struct rpc_message *msg, 437 + struct dentry *old_dentry, 438 + struct dentry *new_dentry) 440 439 { 441 440 msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; 442 441 } ··· 913 908 return 0; 914 909 } 915 910 916 - static int nfs3_return_delegation(struct inode *inode) 917 - { 918 - nfs_wb_all(inode); 919 - return 0; 920 - } 921 - 922 911 static const struct inode_operations nfs3_dir_inode_operations = { 923 912 .create = nfs_create, 924 913 .lookup = nfs_lookup, ··· 989 990 .clear_acl_cache = forget_all_cached_acls, 990 991 .close_context = nfs_close_context, 991 992 .have_delegation = nfs3_have_delegation, 992 - .return_delegation = nfs3_return_delegation, 993 993 .alloc_client = nfs_alloc_client, 994 994 .init_client = nfs_init_client, 995 995 .free_client = nfs_free_client,
+5 -2
fs/nfs/nfs3xdr.c
··· 1997 1997 struct nfs_entry old = *entry; 1998 1998 __be32 *p; 1999 1999 int error; 2000 + u64 new_cookie; 2000 2001 2001 2002 p = xdr_inline_decode(xdr, 4); 2002 2003 if (unlikely(p == NULL)) ··· 2020 2019 if (unlikely(error)) 2021 2020 return error; 2022 2021 2023 - entry->prev_cookie = entry->cookie; 2024 - error = decode_cookie3(xdr, &entry->cookie); 2022 + error = decode_cookie3(xdr, &new_cookie); 2025 2023 if (unlikely(error)) 2026 2024 return error; 2027 2025 ··· 2053 2053 } else 2054 2054 zero_nfs_fh3(entry->fh); 2055 2055 } 2056 + 2057 + entry->prev_cookie = entry->cookie; 2058 + entry->cookie = new_cookie; 2056 2059 2057 2060 return 0; 2058 2061
+119 -49
fs/nfs/nfs4proc.c
··· 1045 1045 struct nfs_inode *nfsi = NFS_I(dir); 1046 1046 1047 1047 spin_lock(&dir->i_lock); 1048 - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; 1048 + nfsi->cache_validity |= NFS_INO_INVALID_CTIME 1049 + | NFS_INO_INVALID_MTIME 1050 + | NFS_INO_INVALID_DATA; 1049 1051 if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) { 1050 1052 nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; 1051 1053 nfsi->attrtimeo_timestamp = jiffies; ··· 1671 1669 { 1672 1670 struct nfs_delegation *delegation; 1673 1671 1672 + fmode &= FMODE_READ|FMODE_WRITE; 1674 1673 rcu_read_lock(); 1675 1674 delegation = rcu_dereference(NFS_I(inode)->delegation); 1676 1675 if (delegation == NULL || (delegation->type & fmode) == fmode) { ··· 1754 1751 } 1755 1752 if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) 1756 1753 nfs_inode_set_delegation(state->inode, 1757 - data->owner->so_cred, 1758 - &data->o_res); 1754 + data->owner->so_cred, 1755 + data->o_res.delegation_type, 1756 + &data->o_res.delegation, 1757 + data->o_res.pagemod_limit); 1759 1758 else 1760 1759 nfs_inode_reclaim_delegation(state->inode, 1761 - data->owner->so_cred, 1762 - &data->o_res); 1760 + data->owner->so_cred, 1761 + data->o_res.delegation_type, 1762 + &data->o_res.delegation, 1763 + data->o_res.pagemod_limit); 1763 1764 } 1764 1765 1765 1766 /* ··· 2750 2743 * fields corresponding to attributes that were used to store the verifier. 2751 2744 * Make sure we clobber those fields in the later setattr call 2752 2745 */ 2753 - static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, 2746 + static unsigned nfs4_exclusive_attrset(struct nfs4_opendata *opendata, 2754 2747 struct iattr *sattr, struct nfs4_label **label) 2755 2748 { 2756 - const u32 *attrset = opendata->o_res.attrset; 2749 + const __u32 *bitmask = opendata->o_arg.server->exclcreat_bitmask; 2750 + __u32 attrset[3]; 2751 + unsigned ret; 2752 + unsigned i; 2757 2753 2758 - if ((attrset[1] & FATTR4_WORD1_TIME_ACCESS) && 2759 - !(sattr->ia_valid & ATTR_ATIME_SET)) 2760 - sattr->ia_valid |= ATTR_ATIME; 2754 + for (i = 0; i < ARRAY_SIZE(attrset); i++) { 2755 + attrset[i] = opendata->o_res.attrset[i]; 2756 + if (opendata->o_arg.createmode == NFS4_CREATE_EXCLUSIVE4_1) 2757 + attrset[i] &= ~bitmask[i]; 2758 + } 2761 2759 2762 - if ((attrset[1] & FATTR4_WORD1_TIME_MODIFY) && 2763 - !(sattr->ia_valid & ATTR_MTIME_SET)) 2764 - sattr->ia_valid |= ATTR_MTIME; 2760 + ret = (opendata->o_arg.createmode == NFS4_CREATE_EXCLUSIVE) ? 2761 + sattr->ia_valid : 0; 2765 2762 2766 - /* Except MODE, it seems harmless of setting twice. */ 2767 - if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && 2768 - (attrset[1] & FATTR4_WORD1_MODE || 2769 - attrset[2] & FATTR4_WORD2_MODE_UMASK)) 2770 - sattr->ia_valid &= ~ATTR_MODE; 2763 + if ((attrset[1] & (FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET))) { 2764 + if (sattr->ia_valid & ATTR_ATIME_SET) 2765 + ret |= ATTR_ATIME_SET; 2766 + else 2767 + ret |= ATTR_ATIME; 2768 + } 2771 2769 2772 - if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) 2770 + if ((attrset[1] & (FATTR4_WORD1_TIME_MODIFY|FATTR4_WORD1_TIME_MODIFY_SET))) { 2771 + if (sattr->ia_valid & ATTR_MTIME_SET) 2772 + ret |= ATTR_MTIME_SET; 2773 + else 2774 + ret |= ATTR_MTIME; 2775 + } 2776 + 2777 + if (!(attrset[2] & FATTR4_WORD2_SECURITY_LABEL)) 2773 2778 *label = NULL; 2779 + return ret; 2774 2780 } 2775 2781 2776 2782 static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, ··· 2912 2892 2913 2893 if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && 2914 2894 (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { 2915 - nfs4_exclusive_attrset(opendata, sattr, &label); 2895 + unsigned attrs = nfs4_exclusive_attrset(opendata, sattr, &label); 2916 2896 /* 2917 2897 * send create attributes which was not set by open 2918 2898 * with an extra setattr. 2919 2899 */ 2920 - if (sattr->ia_valid & NFS4_VALID_ATTRS) { 2900 + if (attrs || label) { 2901 + unsigned ia_old = sattr->ia_valid; 2902 + 2903 + sattr->ia_valid = attrs; 2921 2904 nfs_fattr_init(opendata->o_res.f_attr); 2922 2905 status = nfs4_do_setattr(state->inode, cred, 2923 2906 opendata->o_res.f_attr, sattr, ··· 2930 2907 opendata->o_res.f_attr); 2931 2908 nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); 2932 2909 } 2910 + sattr->ia_valid = ia_old; 2933 2911 } 2934 2912 } 2935 2913 if (opened && opendata->file_created) ··· 3898 3874 if (IS_ERR(label)) 3899 3875 return PTR_ERR(label); 3900 3876 3877 + /* Return any delegations if we're going to change ACLs */ 3878 + if ((sattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) 3879 + nfs4_inode_make_writeable(inode); 3880 + 3901 3881 status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL, label); 3902 3882 if (status == 0) { 3903 3883 nfs_setattr_update_inode(inode, sattr, fattr); ··· 4076 4048 struct nfs_server *server = NFS_SERVER(inode); 4077 4049 struct nfs4_accessargs args = { 4078 4050 .fh = NFS_FH(inode), 4079 - .bitmask = server->cache_consistency_bitmask, 4080 4051 .access = entry->mask, 4081 4052 }; 4082 4053 struct nfs4_accessres res = { ··· 4089 4062 }; 4090 4063 int status = 0; 4091 4064 4092 - res.fattr = nfs_alloc_fattr(); 4093 - if (res.fattr == NULL) 4094 - return -ENOMEM; 4065 + if (!nfs_have_delegated_attributes(inode)) { 4066 + res.fattr = nfs_alloc_fattr(); 4067 + if (res.fattr == NULL) 4068 + return -ENOMEM; 4069 + args.bitmask = server->cache_consistency_bitmask; 4070 + } 4095 4071 4096 4072 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); 4097 4073 if (!status) { 4098 4074 nfs_access_set_mask(entry, res.access); 4099 - nfs_refresh_inode(inode, res.fattr); 4075 + if (res.fattr) 4076 + nfs_refresh_inode(inode, res.fattr); 4100 4077 } 4101 4078 nfs_free_fattr(res.fattr); 4102 4079 return status; ··· 4230 4199 return status; 4231 4200 } 4232 4201 4233 - static int nfs4_proc_remove(struct inode *dir, const struct qstr *name) 4202 + static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry) 4203 + { 4204 + struct nfs4_exception exception = { }; 4205 + struct inode *inode = d_inode(dentry); 4206 + int err; 4207 + 4208 + if (inode) { 4209 + if (inode->i_nlink == 1) 4210 + nfs4_inode_return_delegation(inode); 4211 + else 4212 + nfs4_inode_make_writeable(inode); 4213 + } 4214 + do { 4215 + err = _nfs4_proc_remove(dir, &dentry->d_name); 4216 + trace_nfs4_remove(dir, &dentry->d_name, err); 4217 + err = nfs4_handle_exception(NFS_SERVER(dir), err, 4218 + &exception); 4219 + } while (exception.retry); 4220 + return err; 4221 + } 4222 + 4223 + static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name) 4234 4224 { 4235 4225 struct nfs4_exception exception = { }; 4236 4226 int err; 4227 + 4237 4228 do { 4238 4229 err = _nfs4_proc_remove(dir, name); 4239 4230 trace_nfs4_remove(dir, name, err); ··· 4265 4212 return err; 4266 4213 } 4267 4214 4268 - static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) 4215 + static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) 4269 4216 { 4270 - struct nfs_server *server = NFS_SERVER(dir); 4271 4217 struct nfs_removeargs *args = msg->rpc_argp; 4272 4218 struct nfs_removeres *res = msg->rpc_resp; 4219 + struct inode *inode = d_inode(dentry); 4273 4220 4274 - res->server = server; 4221 + res->server = NFS_SB(dentry->d_sb); 4275 4222 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 4276 4223 nfs4_init_sequence(&args->seq_args, &res->seq_res, 1); 4277 4224 4278 4225 nfs_fattr_init(res->dir_attr); 4226 + 4227 + if (inode) 4228 + nfs4_inode_return_delegation(inode); 4279 4229 } 4280 4230 4281 4231 static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) ··· 4304 4248 return 1; 4305 4249 } 4306 4250 4307 - static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) 4251 + static void nfs4_proc_rename_setup(struct rpc_message *msg, 4252 + struct dentry *old_dentry, 4253 + struct dentry *new_dentry) 4308 4254 { 4309 - struct nfs_server *server = NFS_SERVER(dir); 4310 4255 struct nfs_renameargs *arg = msg->rpc_argp; 4311 4256 struct nfs_renameres *res = msg->rpc_resp; 4257 + struct inode *old_inode = d_inode(old_dentry); 4258 + struct inode *new_inode = d_inode(new_dentry); 4312 4259 4260 + if (old_inode) 4261 + nfs4_inode_make_writeable(old_inode); 4262 + if (new_inode) 4263 + nfs4_inode_return_delegation(new_inode); 4313 4264 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; 4314 - res->server = server; 4265 + res->server = NFS_SB(old_dentry->d_sb); 4315 4266 nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1); 4316 4267 } 4317 4268 ··· 4379 4316 goto out; 4380 4317 } 4381 4318 arg.bitmask = nfs4_bitmask(server, res.label); 4319 + 4320 + nfs4_inode_make_writeable(inode); 4382 4321 4383 4322 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 4384 4323 if (!status) { ··· 5375 5310 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); 5376 5311 if (i < 0) 5377 5312 return i; 5378 - nfs4_inode_return_delegation(inode); 5313 + nfs4_inode_make_writeable(inode); 5379 5314 ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 5380 5315 5381 5316 /* ··· 5390 5325 * so mark the attribute cache invalid. 5391 5326 */ 5392 5327 spin_lock(&inode->i_lock); 5393 - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; 5328 + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE 5329 + | NFS_INO_INVALID_CTIME; 5394 5330 spin_unlock(&inode->i_lock); 5395 5331 nfs_access_zap_cache(inode); 5396 5332 nfs_zap_acl_cache(inode); ··· 6687 6621 nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key) 6688 6622 { 6689 6623 int ret; 6690 - struct cb_notify_lock_args *cbnl = key; 6691 6624 struct nfs4_lock_waiter *waiter = wait->private; 6692 - struct nfs_lowner *lowner = &cbnl->cbnl_owner, 6693 - *wowner = waiter->owner; 6694 6625 6695 - /* Only wake if the callback was for the same owner */ 6696 - if (lowner->clientid != wowner->clientid || 6697 - lowner->id != wowner->id || 6698 - lowner->s_dev != wowner->s_dev) 6699 - return 0; 6626 + /* NULL key means to wake up everyone */ 6627 + if (key) { 6628 + struct cb_notify_lock_args *cbnl = key; 6629 + struct nfs_lowner *lowner = &cbnl->cbnl_owner, 6630 + *wowner = waiter->owner; 6700 6631 6701 - /* Make sure it's for the right inode */ 6702 - if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh)) 6703 - return 0; 6632 + /* Only wake if the callback was for the same owner. */ 6633 + if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev) 6634 + return 0; 6704 6635 6705 - waiter->notified = true; 6636 + /* Make sure it's for the right inode */ 6637 + if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh)) 6638 + return 0; 6639 + 6640 + waiter->notified = true; 6641 + } 6706 6642 6707 6643 /* override "private" so we can use default_wake_function */ 6708 6644 wait->private = waiter->task; ··· 6741 6673 add_wait_queue(q, &wait); 6742 6674 6743 6675 while(!signalled()) { 6676 + waiter.notified = false; 6744 6677 status = nfs4_proc_setlk(state, cmd, request); 6745 6678 if ((status != -EAGAIN) || IS_SETLK(cmd)) 6746 6679 break; ··· 8483 8414 { 8484 8415 switch(task->tk_status) { 8485 8416 case 0: 8417 + wake_up_all(&clp->cl_lock_waitq); 8418 + /* Fallthrough */ 8486 8419 case -NFS4ERR_COMPLETE_ALREADY: 8487 8420 case -NFS4ERR_WRONG_CRED: /* What to do here? */ 8488 8421 break; ··· 9664 9593 .link = nfs4_proc_link, 9665 9594 .symlink = nfs4_proc_symlink, 9666 9595 .mkdir = nfs4_proc_mkdir, 9667 - .rmdir = nfs4_proc_remove, 9596 + .rmdir = nfs4_proc_rmdir, 9668 9597 .readdir = nfs4_proc_readdir, 9669 9598 .mknod = nfs4_proc_mknod, 9670 9599 .statfs = nfs4_proc_statfs, ··· 9685 9614 .close_context = nfs4_close_context, 9686 9615 .open_context = nfs4_atomic_open, 9687 9616 .have_delegation = nfs4_have_delegation, 9688 - .return_delegation = nfs4_inode_return_delegation, 9689 9617 .alloc_client = nfs4_alloc_client, 9690 9618 .init_client = nfs4_init_client, 9691 9619 .free_client = nfs4_free_client,
+10 -12
fs/nfs/nfs4state.c
··· 428 428 struct rb_node **p = &server->state_owners.rb_node, 429 429 *parent = NULL; 430 430 struct nfs4_state_owner *sp; 431 - int err; 432 431 433 432 while (*p != NULL) { 434 433 parent = *p; ··· 444 445 return sp; 445 446 } 446 447 } 447 - err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id); 448 - if (err) 449 - return ERR_PTR(err); 450 448 rb_link_node(&new->so_server_node, parent, p); 451 449 rb_insert_color(&new->so_server_node, &server->state_owners); 452 450 return new; ··· 456 460 457 461 if (!RB_EMPTY_NODE(&sp->so_server_node)) 458 462 rb_erase(&sp->so_server_node, &server->state_owners); 459 - ida_remove(&server->openowner_id, sp->so_seqid.owner_id); 460 463 } 461 464 462 465 static void ··· 490 495 sp = kzalloc(sizeof(*sp), gfp_flags); 491 496 if (!sp) 492 497 return NULL; 498 + sp->so_seqid.owner_id = ida_simple_get(&server->openowner_id, 0, 0, 499 + gfp_flags); 500 + if (sp->so_seqid.owner_id < 0) { 501 + kfree(sp); 502 + return NULL; 503 + } 493 504 sp->so_server = server; 494 505 sp->so_cred = get_rpccred(cred); 495 506 spin_lock_init(&sp->so_lock); ··· 527 526 { 528 527 nfs4_destroy_seqid_counter(&sp->so_seqid); 529 528 put_rpccred(sp->so_cred); 529 + ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id); 530 530 kfree(sp); 531 531 } 532 532 ··· 578 576 new = nfs4_alloc_state_owner(server, cred, gfp_flags); 579 577 if (new == NULL) 580 578 goto out; 581 - do { 582 - if (ida_pre_get(&server->openowner_id, gfp_flags) == 0) 583 - break; 584 - spin_lock(&clp->cl_lock); 585 - sp = nfs4_insert_state_owner_locked(new); 586 - spin_unlock(&clp->cl_lock); 587 - } while (sp == ERR_PTR(-EAGAIN)); 579 + spin_lock(&clp->cl_lock); 580 + sp = nfs4_insert_state_owner_locked(new); 581 + spin_unlock(&clp->cl_lock); 588 582 if (sp != new) 589 583 nfs4_free_state_owner(new); 590 584 out:
+115 -130
fs/nfs/nfs4xdr.c
··· 98 98 ((3+NFS4_FHSIZE) >> 2)) 99 99 #define nfs4_fattr_bitmap_maxsz 4 100 100 #define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) 101 + #define nfstime4_maxsz (3) 101 102 #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) 102 103 #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) 103 104 #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) ··· 113 112 #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) 114 113 /* This is based on getfattr, which uses the most attributes: */ 115 114 #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ 116 - 3 + 3 + 3 + nfs4_owner_maxsz + \ 115 + 3*nfstime4_maxsz + \ 116 + nfs4_owner_maxsz + \ 117 117 nfs4_group_maxsz + nfs4_label_maxsz + \ 118 118 decode_mdsthreshold_maxsz)) 119 119 #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ ··· 125 123 nfs4_owner_maxsz + \ 126 124 nfs4_group_maxsz + \ 127 125 nfs4_label_maxsz + \ 128 - 4 + 4) 126 + 1 + nfstime4_maxsz + \ 127 + 1 + nfstime4_maxsz) 129 128 #define encode_savefh_maxsz (op_encode_hdr_maxsz) 130 129 #define decode_savefh_maxsz (op_decode_hdr_maxsz) 131 130 #define encode_restorefh_maxsz (op_encode_hdr_maxsz) ··· 960 957 WARN_ON_ONCE(xdr_stream_encode_u64(xdr, n) < 0); 961 958 } 962 959 960 + static ssize_t xdr_encode_bitmap4(struct xdr_stream *xdr, 961 + const __u32 *bitmap, size_t len) 962 + { 963 + ssize_t ret; 964 + 965 + /* Trim empty words */ 966 + while (len > 0 && bitmap[len-1] == 0) 967 + len--; 968 + ret = xdr_stream_encode_uint32_array(xdr, bitmap, len); 969 + if (WARN_ON_ONCE(ret < 0)) 970 + return ret; 971 + return len; 972 + } 973 + 974 + static size_t mask_bitmap4(const __u32 *bitmap, const __u32 *mask, 975 + __u32 *res, size_t len) 976 + { 977 + size_t i; 978 + __u32 tmp; 979 + 980 + while (len > 0 && (bitmap[len-1] == 0 || mask[len-1] == 0)) 981 + len--; 982 + for (i = len; i-- > 0;) { 983 + tmp = bitmap[i] & mask[i]; 984 + res[i] = tmp; 985 + } 986 + return len; 987 + } 988 + 963 989 static void encode_nfs4_seqid(struct xdr_stream *xdr, 964 990 const struct nfs_seqid *seqid) 965 991 { ··· 1043 1011 encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); 1044 1012 } 1045 1013 1014 + static __be32 * 1015 + xdr_encode_nfstime4(__be32 *p, const struct timespec *t) 1016 + { 1017 + p = xdr_encode_hyper(p, (__s64)t->tv_sec); 1018 + *p++ = cpu_to_be32(t->tv_nsec); 1019 + return p; 1020 + } 1021 + 1046 1022 static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, 1047 1023 const struct nfs4_label *label, 1048 1024 const umode_t *umask, ··· 1062 1022 int owner_namelen = 0; 1063 1023 int owner_grouplen = 0; 1064 1024 __be32 *p; 1065 - unsigned i; 1066 1025 uint32_t len = 0; 1067 - uint32_t bmval_len; 1068 1026 uint32_t bmval[3] = { 0 }; 1069 1027 1070 1028 /* ··· 1110 1072 if (attrmask[1] & FATTR4_WORD1_TIME_ACCESS_SET) { 1111 1073 if (iap->ia_valid & ATTR_ATIME_SET) { 1112 1074 bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; 1113 - len += 16; 1075 + len += 4 + (nfstime4_maxsz << 2); 1114 1076 } else if (iap->ia_valid & ATTR_ATIME) { 1115 1077 bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; 1116 1078 len += 4; ··· 1119 1081 if (attrmask[1] & FATTR4_WORD1_TIME_MODIFY_SET) { 1120 1082 if (iap->ia_valid & ATTR_MTIME_SET) { 1121 1083 bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; 1122 - len += 16; 1084 + len += 4 + (nfstime4_maxsz << 2); 1123 1085 } else if (iap->ia_valid & ATTR_MTIME) { 1124 1086 bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; 1125 1087 len += 4; ··· 1131 1093 bmval[2] |= FATTR4_WORD2_SECURITY_LABEL; 1132 1094 } 1133 1095 1134 - if (bmval[2] != 0) 1135 - bmval_len = 3; 1136 - else if (bmval[1] != 0) 1137 - bmval_len = 2; 1138 - else 1139 - bmval_len = 1; 1140 - 1141 - p = reserve_space(xdr, 4 + (bmval_len << 2) + 4 + len); 1142 - 1143 - *p++ = cpu_to_be32(bmval_len); 1144 - for (i = 0; i < bmval_len; i++) 1145 - *p++ = cpu_to_be32(bmval[i]); 1146 - *p++ = cpu_to_be32(len); 1096 + xdr_encode_bitmap4(xdr, bmval, ARRAY_SIZE(bmval)); 1097 + xdr_stream_encode_opaque_inline(xdr, (void **)&p, len); 1147 1098 1148 1099 if (bmval[0] & FATTR4_WORD0_SIZE) 1149 1100 p = xdr_encode_hyper(p, iap->ia_size); ··· 1145 1118 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { 1146 1119 if (iap->ia_valid & ATTR_ATIME_SET) { 1147 1120 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); 1148 - p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec); 1149 - *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); 1121 + p = xdr_encode_nfstime4(p, &iap->ia_atime); 1150 1122 } else 1151 1123 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); 1152 1124 } 1153 1125 if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { 1154 1126 if (iap->ia_valid & ATTR_MTIME_SET) { 1155 1127 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); 1156 - p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec); 1157 - *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); 1128 + p = xdr_encode_nfstime4(p, &iap->ia_mtime); 1158 1129 } else 1159 1130 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); 1160 1131 } ··· 1224 1199 create->server, create->server->attr_bitmask); 1225 1200 } 1226 1201 1227 - static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) 1202 + static void encode_getattr(struct xdr_stream *xdr, 1203 + const __u32 *bitmap, const __u32 *mask, size_t len, 1204 + struct compound_hdr *hdr) 1228 1205 { 1229 - __be32 *p; 1206 + __u32 masked_bitmap[nfs4_fattr_bitmap_maxsz]; 1230 1207 1231 1208 encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); 1232 - p = reserve_space(xdr, 8); 1233 - *p++ = cpu_to_be32(1); 1234 - *p = cpu_to_be32(bitmap); 1235 - } 1236 - 1237 - static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) 1238 - { 1239 - __be32 *p; 1240 - 1241 - encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); 1242 - p = reserve_space(xdr, 12); 1243 - *p++ = cpu_to_be32(2); 1244 - *p++ = cpu_to_be32(bm0); 1245 - *p = cpu_to_be32(bm1); 1246 - } 1247 - 1248 - static void 1249 - encode_getattr_three(struct xdr_stream *xdr, 1250 - uint32_t bm0, uint32_t bm1, uint32_t bm2, 1251 - struct compound_hdr *hdr) 1252 - { 1253 - __be32 *p; 1254 - 1255 - encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); 1256 - if (bm2) { 1257 - p = reserve_space(xdr, 16); 1258 - *p++ = cpu_to_be32(3); 1259 - *p++ = cpu_to_be32(bm0); 1260 - *p++ = cpu_to_be32(bm1); 1261 - *p = cpu_to_be32(bm2); 1262 - } else if (bm1) { 1263 - p = reserve_space(xdr, 12); 1264 - *p++ = cpu_to_be32(2); 1265 - *p++ = cpu_to_be32(bm0); 1266 - *p = cpu_to_be32(bm1); 1267 - } else { 1268 - p = reserve_space(xdr, 8); 1269 - *p++ = cpu_to_be32(1); 1270 - *p = cpu_to_be32(bm0); 1209 + if (mask) { 1210 + if (WARN_ON_ONCE(len > ARRAY_SIZE(masked_bitmap))) 1211 + len = ARRAY_SIZE(masked_bitmap); 1212 + len = mask_bitmap4(bitmap, mask, masked_bitmap, len); 1213 + bitmap = masked_bitmap; 1271 1214 } 1215 + xdr_encode_bitmap4(xdr, bitmap, len); 1272 1216 } 1273 1217 1274 1218 static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) 1275 1219 { 1276 - encode_getattr_three(xdr, bitmask[0] & nfs4_fattr_bitmap[0], 1277 - bitmask[1] & nfs4_fattr_bitmap[1], 1278 - bitmask[2] & nfs4_fattr_bitmap[2], 1279 - hdr); 1220 + encode_getattr(xdr, nfs4_fattr_bitmap, bitmask, 1221 + ARRAY_SIZE(nfs4_fattr_bitmap), hdr); 1280 1222 } 1281 1223 1282 1224 static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, 1283 1225 const u32 *open_bitmap, 1284 1226 struct compound_hdr *hdr) 1285 1227 { 1286 - encode_getattr_three(xdr, 1287 - bitmask[0] & open_bitmap[0], 1288 - bitmask[1] & open_bitmap[1], 1289 - bitmask[2] & open_bitmap[2], 1290 - hdr); 1228 + encode_getattr(xdr, open_bitmap, bitmask, 3, hdr); 1291 1229 } 1292 1230 1293 1231 static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) 1294 1232 { 1295 - encode_getattr_three(xdr, 1296 - bitmask[0] & nfs4_fsinfo_bitmap[0], 1297 - bitmask[1] & nfs4_fsinfo_bitmap[1], 1298 - bitmask[2] & nfs4_fsinfo_bitmap[2], 1299 - hdr); 1233 + encode_getattr(xdr, nfs4_fsinfo_bitmap, bitmask, 1234 + ARRAY_SIZE(nfs4_fsinfo_bitmap), hdr); 1300 1235 } 1301 1236 1302 1237 static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) 1303 1238 { 1304 - encode_getattr_two(xdr, bitmask[0] & nfs4_fs_locations_bitmap[0], 1305 - bitmask[1] & nfs4_fs_locations_bitmap[1], hdr); 1239 + encode_getattr(xdr, nfs4_fs_locations_bitmap, bitmask, 1240 + ARRAY_SIZE(nfs4_fs_locations_bitmap), hdr); 1306 1241 } 1307 1242 1308 1243 static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) ··· 2101 2116 encode_sequence(xdr, &args->seq_args, &hdr); 2102 2117 encode_putfh(xdr, args->fh, &hdr); 2103 2118 encode_access(xdr, args->access, &hdr); 2104 - encode_getfattr(xdr, args->bitmask, &hdr); 2119 + if (args->bitmask) 2120 + encode_getfattr(xdr, args->bitmask, &hdr); 2105 2121 encode_nops(&hdr); 2106 2122 } 2107 2123 ··· 2544 2558 struct compound_hdr hdr = { 2545 2559 .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2546 2560 }; 2561 + const __u32 nfs4_acl_bitmap[1] = { 2562 + [0] = FATTR4_WORD0_ACL, 2563 + }; 2547 2564 uint32_t replen; 2548 2565 2549 2566 encode_compound_hdr(xdr, req, &hdr); 2550 2567 encode_sequence(xdr, &args->seq_args, &hdr); 2551 2568 encode_putfh(xdr, args->fh, &hdr); 2552 2569 replen = hdr.replen + op_decode_hdr_maxsz; 2553 - encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); 2570 + encode_getattr(xdr, nfs4_acl_bitmap, NULL, 2571 + ARRAY_SIZE(nfs4_acl_bitmap), &hdr); 2554 2572 2555 2573 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, 2556 2574 args->acl_pages, 0, args->acl_len); ··· 2633 2643 encode_compound_hdr(xdr, req, &hdr); 2634 2644 encode_sequence(xdr, &args->seq_args, &hdr); 2635 2645 encode_putfh(xdr, args->fh, &hdr); 2636 - encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0], 2637 - &hdr); 2646 + encode_getattr(xdr, nfs4_pathconf_bitmap, args->bitmask, 2647 + ARRAY_SIZE(nfs4_pathconf_bitmap), &hdr); 2638 2648 encode_nops(&hdr); 2639 2649 } 2640 2650 ··· 2652 2662 encode_compound_hdr(xdr, req, &hdr); 2653 2663 encode_sequence(xdr, &args->seq_args, &hdr); 2654 2664 encode_putfh(xdr, args->fh, &hdr); 2655 - encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], 2656 - args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); 2665 + encode_getattr(xdr, nfs4_statfs_bitmap, args->bitmask, 2666 + ARRAY_SIZE(nfs4_statfs_bitmap), &hdr); 2657 2667 encode_nops(&hdr); 2658 2668 } 2659 2669 ··· 2673 2683 encode_compound_hdr(xdr, req, &hdr); 2674 2684 encode_sequence(xdr, &args->seq_args, &hdr); 2675 2685 encode_putfh(xdr, args->fhandle, &hdr); 2676 - encode_getattr_three(xdr, bitmask[0], bitmask[1], bitmask[2], &hdr); 2686 + encode_getattr(xdr, bitmask, NULL, 3, &hdr); 2677 2687 encode_nops(&hdr); 2678 2688 } 2679 2689 ··· 3207 3217 return -EIO; 3208 3218 } 3209 3219 3210 - static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) 3220 + static ssize_t 3221 + decode_bitmap4(struct xdr_stream *xdr, uint32_t *bitmap, size_t sz) 3211 3222 { 3212 - uint32_t bmlen; 3213 - __be32 *p; 3223 + ssize_t ret; 3214 3224 3215 - p = xdr_inline_decode(xdr, 4); 3216 - if (unlikely(!p)) 3217 - goto out_overflow; 3218 - bmlen = be32_to_cpup(p); 3219 - 3220 - bitmap[0] = bitmap[1] = bitmap[2] = 0; 3221 - p = xdr_inline_decode(xdr, (bmlen << 2)); 3222 - if (unlikely(!p)) 3223 - goto out_overflow; 3224 - if (bmlen > 0) { 3225 - bitmap[0] = be32_to_cpup(p++); 3226 - if (bmlen > 1) { 3227 - bitmap[1] = be32_to_cpup(p++); 3228 - if (bmlen > 2) 3229 - bitmap[2] = be32_to_cpup(p); 3230 - } 3231 - } 3232 - return 0; 3233 - out_overflow: 3225 + ret = xdr_stream_decode_uint32_array(xdr, bitmap, sz); 3226 + if (likely(ret >= 0)) 3227 + return ret; 3228 + if (ret == -EMSGSIZE) 3229 + return sz; 3234 3230 print_overflow_msg(__func__, xdr); 3235 3231 return -EIO; 3232 + } 3233 + 3234 + static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) 3235 + { 3236 + ssize_t ret; 3237 + ret = decode_bitmap4(xdr, bitmap, 3); 3238 + return ret < 0 ? ret : 0; 3236 3239 } 3237 3240 3238 3241 static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigned int *savep) ··· 3963 3980 bitmap[1] &= ~FATTR4_WORD1_OWNER; 3964 3981 3965 3982 if (owner_name != NULL) { 3966 - len = decode_nfs4_string(xdr, owner_name, GFP_NOWAIT); 3983 + len = decode_nfs4_string(xdr, owner_name, GFP_NOIO); 3967 3984 if (len <= 0) 3968 3985 goto out; 3969 3986 dprintk("%s: name=%s\n", __func__, owner_name->data); ··· 3998 4015 bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; 3999 4016 4000 4017 if (group_name != NULL) { 4001 - len = decode_nfs4_string(xdr, group_name, GFP_NOWAIT); 4018 + len = decode_nfs4_string(xdr, group_name, GFP_NOIO); 4002 4019 if (len <= 0) 4003 4020 goto out; 4004 4021 dprintk("%s: name=%s\n", __func__, group_name->data); ··· 4138 4155 return -EIO; 4139 4156 } 4140 4157 4158 + static __be32 * 4159 + xdr_decode_nfstime4(__be32 *p, struct timespec *t) 4160 + { 4161 + __u64 sec; 4162 + 4163 + p = xdr_decode_hyper(p, &sec); 4164 + t-> tv_sec = (time_t)sec; 4165 + t->tv_nsec = be32_to_cpup(p++); 4166 + return p; 4167 + } 4168 + 4141 4169 static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) 4142 4170 { 4143 4171 __be32 *p; 4144 - uint64_t sec; 4145 - uint32_t nsec; 4146 4172 4147 - p = xdr_inline_decode(xdr, 12); 4173 + p = xdr_inline_decode(xdr, nfstime4_maxsz << 2); 4148 4174 if (unlikely(!p)) 4149 4175 goto out_overflow; 4150 - p = xdr_decode_hyper(p, &sec); 4151 - nsec = be32_to_cpup(p); 4152 - time->tv_sec = (time_t)sec; 4153 - time->tv_nsec = (long)nsec; 4176 + xdr_decode_nfstime4(p, time); 4154 4177 return 0; 4155 4178 out_overflow: 4156 4179 print_overflow_msg(__func__, xdr); ··· 5459 5470 5460 5471 static int decode_setattr(struct xdr_stream *xdr) 5461 5472 { 5462 - __be32 *p; 5463 - uint32_t bmlen; 5464 5473 int status; 5465 5474 5466 5475 status = decode_op_hdr(xdr, OP_SETATTR); 5467 5476 if (status) 5468 5477 return status; 5469 - p = xdr_inline_decode(xdr, 4); 5470 - if (unlikely(!p)) 5471 - goto out_overflow; 5472 - bmlen = be32_to_cpup(p); 5473 - p = xdr_inline_decode(xdr, bmlen << 2); 5474 - if (likely(p)) 5478 + if (decode_bitmap4(xdr, NULL, 0) >= 0) 5475 5479 return 0; 5476 - out_overflow: 5477 5480 print_overflow_msg(__func__, xdr); 5478 5481 return -EIO; 5479 5482 } ··· 6236 6255 status = decode_access(xdr, &res->supported, &res->access); 6237 6256 if (status != 0) 6238 6257 goto out; 6239 - decode_getfattr(xdr, res->fattr, res->server); 6258 + if (res->fattr) 6259 + decode_getfattr(xdr, res->fattr, res->server); 6240 6260 out: 6241 6261 return status; 6242 6262 } ··· 7517 7535 unsigned int savep; 7518 7536 uint32_t bitmap[3] = {0}; 7519 7537 uint32_t len; 7538 + uint64_t new_cookie; 7520 7539 __be32 *p = xdr_inline_decode(xdr, 4); 7521 7540 if (unlikely(!p)) 7522 7541 goto out_overflow; ··· 7534 7551 p = xdr_inline_decode(xdr, 12); 7535 7552 if (unlikely(!p)) 7536 7553 goto out_overflow; 7537 - entry->prev_cookie = entry->cookie; 7538 - p = xdr_decode_hyper(p, &entry->cookie); 7554 + p = xdr_decode_hyper(p, &new_cookie); 7539 7555 entry->len = be32_to_cpup(p); 7540 7556 7541 7557 p = xdr_inline_decode(xdr, entry->len); ··· 7567 7585 entry->d_type = DT_UNKNOWN; 7568 7586 if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) 7569 7587 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); 7588 + 7589 + entry->prev_cookie = entry->cookie; 7590 + entry->cookie = new_cookie; 7570 7591 7571 7592 return 0; 7572 7593
+7 -12
fs/nfs/proc.c
··· 300 300 } 301 301 302 302 static int 303 - nfs_proc_remove(struct inode *dir, const struct qstr *name) 303 + nfs_proc_remove(struct inode *dir, struct dentry *dentry) 304 304 { 305 305 struct nfs_removeargs arg = { 306 306 .fh = NFS_FH(dir), 307 - .name = *name, 307 + .name = dentry->d_name, 308 308 }; 309 309 struct rpc_message msg = { 310 310 .rpc_proc = &nfs_procedures[NFSPROC_REMOVE], ··· 312 312 }; 313 313 int status; 314 314 315 - dprintk("NFS call remove %s\n", name->name); 315 + dprintk("NFS call remove %pd2\n",dentry); 316 316 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 317 317 nfs_mark_for_revalidate(dir); 318 318 ··· 321 321 } 322 322 323 323 static void 324 - nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) 324 + nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry) 325 325 { 326 326 msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; 327 327 } ··· 338 338 } 339 339 340 340 static void 341 - nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir) 341 + nfs_proc_rename_setup(struct rpc_message *msg, 342 + struct dentry *old_dentry, 343 + struct dentry *new_dentry) 342 344 { 343 345 msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; 344 346 } ··· 673 671 return 0; 674 672 } 675 673 676 - static int nfs_return_delegation(struct inode *inode) 677 - { 678 - nfs_wb_all(inode); 679 - return 0; 680 - } 681 - 682 674 static const struct inode_operations nfs_dir_inode_operations = { 683 675 .create = nfs_create, 684 676 .lookup = nfs_lookup, ··· 737 741 .lock_check_bounds = nfs_lock_check_bounds, 738 742 .close_context = nfs_close_context, 739 743 .have_delegation = nfs_have_delegation, 740 - .return_delegation = nfs_return_delegation, 741 744 .alloc_client = nfs_alloc_client, 742 745 .init_client = nfs_init_client, 743 746 .free_client = nfs_free_client,
+2 -5
fs/nfs/unlink.c
··· 105 105 data->args.fh = NFS_FH(dir); 106 106 nfs_fattr_init(data->res.dir_attr); 107 107 108 - NFS_PROTO(dir)->unlink_setup(&msg, dir); 108 + NFS_PROTO(dir)->unlink_setup(&msg, data->dentry); 109 109 110 110 task_setup_data.rpc_client = NFS_CLIENT(dir); 111 111 task = rpc_run_task(&task_setup_data); ··· 386 386 387 387 nfs_sb_active(old_dir->i_sb); 388 388 389 - NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir); 389 + NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dentry, new_dentry); 390 390 391 391 return rpc_run_task(&task_setup_data); 392 392 } ··· 462 462 goto out; 463 463 464 464 fileid = NFS_FILEID(d_inode(dentry)); 465 - 466 - /* Return delegation in anticipation of the rename */ 467 - NFS_PROTO(d_inode(dentry))->return_delegation(d_inode(dentry)); 468 465 469 466 sdentry = NULL; 470 467 do {
+6 -2
fs/nfs/write.c
··· 231 231 if (i_size >= end) 232 232 goto out; 233 233 i_size_write(inode, end); 234 + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; 234 235 nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); 235 236 out: 236 237 spin_unlock(&inode->i_lock); ··· 1563 1562 } 1564 1563 1565 1564 /* Deal with the suid/sgid bit corner case */ 1566 - if (nfs_should_remove_suid(inode)) 1567 - nfs_mark_for_revalidate(inode); 1565 + if (nfs_should_remove_suid(inode)) { 1566 + spin_lock(&inode->i_lock); 1567 + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER; 1568 + spin_unlock(&inode->i_lock); 1569 + } 1568 1570 return 0; 1569 1571 } 1570 1572
+23 -12
include/linux/nfs_fs.h
··· 198 198 /* 199 199 * Cache validity bit flags 200 200 */ 201 - #define NFS_INO_INVALID_ATTR 0x0001 /* cached attrs are invalid */ 202 - #define NFS_INO_INVALID_DATA 0x0002 /* cached data is invalid */ 203 - #define NFS_INO_INVALID_ATIME 0x0004 /* cached atime is invalid */ 204 - #define NFS_INO_INVALID_ACCESS 0x0008 /* cached access cred invalid */ 205 - #define NFS_INO_INVALID_ACL 0x0010 /* cached acls are invalid */ 206 - #define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */ 207 - #define NFS_INO_REVAL_FORCED 0x0040 /* force revalidation ignoring a delegation */ 208 - #define NFS_INO_INVALID_LABEL 0x0080 /* cached label is invalid */ 201 + #define NFS_INO_INVALID_DATA BIT(1) /* cached data is invalid */ 202 + #define NFS_INO_INVALID_ATIME BIT(2) /* cached atime is invalid */ 203 + #define NFS_INO_INVALID_ACCESS BIT(3) /* cached access cred invalid */ 204 + #define NFS_INO_INVALID_ACL BIT(4) /* cached acls are invalid */ 205 + #define NFS_INO_REVAL_PAGECACHE BIT(5) /* must revalidate pagecache */ 206 + #define NFS_INO_REVAL_FORCED BIT(6) /* force revalidation ignoring a delegation */ 207 + #define NFS_INO_INVALID_LABEL BIT(7) /* cached label is invalid */ 208 + #define NFS_INO_INVALID_CHANGE BIT(8) /* cached change is invalid */ 209 + #define NFS_INO_INVALID_CTIME BIT(9) /* cached ctime is invalid */ 210 + #define NFS_INO_INVALID_MTIME BIT(10) /* cached mtime is invalid */ 211 + #define NFS_INO_INVALID_SIZE BIT(11) /* cached size is invalid */ 212 + #define NFS_INO_INVALID_OTHER BIT(12) /* other attrs are invalid */ 213 + 214 + #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ 215 + | NFS_INO_INVALID_CTIME \ 216 + | NFS_INO_INVALID_MTIME \ 217 + | NFS_INO_INVALID_SIZE \ 218 + | NFS_INO_INVALID_OTHER) /* inode metadata is invalid */ 209 219 210 220 /* 211 221 * Bit offsets in flags field ··· 302 292 struct nfs_inode *nfsi = NFS_I(inode); 303 293 304 294 spin_lock(&inode->i_lock); 305 - nfsi->cache_validity |= NFS_INO_INVALID_ATTR | 306 - NFS_INO_REVAL_PAGECACHE | 307 - NFS_INO_INVALID_ACCESS | 308 - NFS_INO_INVALID_ACL; 295 + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE 296 + | NFS_INO_INVALID_ACCESS 297 + | NFS_INO_INVALID_ACL 298 + | NFS_INO_INVALID_CHANGE 299 + | NFS_INO_INVALID_CTIME; 309 300 if (S_ISDIR(inode->i_mode)) 310 301 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 311 302 spin_unlock(&inode->i_lock);
+5 -4
include/linux/nfs_xdr.h
··· 1590 1590 unsigned int); 1591 1591 int (*create) (struct inode *, struct dentry *, 1592 1592 struct iattr *, int); 1593 - int (*remove) (struct inode *, const struct qstr *); 1594 - void (*unlink_setup) (struct rpc_message *, struct inode *dir); 1593 + int (*remove) (struct inode *, struct dentry *); 1594 + void (*unlink_setup) (struct rpc_message *, struct dentry *); 1595 1595 void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); 1596 1596 int (*unlink_done) (struct rpc_task *, struct inode *); 1597 - void (*rename_setup) (struct rpc_message *msg, struct inode *dir); 1597 + void (*rename_setup) (struct rpc_message *msg, 1598 + struct dentry *old_dentry, 1599 + struct dentry *new_dentry); 1598 1600 void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); 1599 1601 int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); 1600 1602 int (*link) (struct inode *, struct inode *, const struct qstr *); ··· 1635 1633 struct iattr *iattr, 1636 1634 int *); 1637 1635 int (*have_delegation)(struct inode *, fmode_t); 1638 - int (*return_delegation)(struct inode *); 1639 1636 struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); 1640 1637 struct nfs_client *(*init_client) (struct nfs_client *, 1641 1638 const struct nfs_client_initdata *);
+7
include/linux/sunrpc/clnt.h
··· 217 217 bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, 218 218 const struct sockaddr *sap); 219 219 void rpc_cleanup_clids(void); 220 + 221 + static inline int rpc_reply_expected(struct rpc_task *task) 222 + { 223 + return (task->tk_msg.rpc_proc != NULL) && 224 + (task->tk_msg.rpc_proc->p_decode != NULL); 225 + } 226 + 220 227 #endif /* __KERNEL__ */ 221 228 #endif /* _LINUX_SUNRPC_CLNT_H */
+94
include/linux/sunrpc/xdr.h
··· 253 253 return xdr->nwords << 2; 254 254 } 255 255 256 + ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, 257 + size_t size); 258 + ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, 259 + size_t maxlen, gfp_t gfp_flags); 260 + ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, 261 + size_t size); 256 262 ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, 257 263 size_t maxlen, gfp_t gfp_flags); 258 264 /** ··· 319 313 } 320 314 321 315 /** 316 + * xdr_stream_encode_opaque_inline - Encode opaque xdr data 317 + * @xdr: pointer to xdr_stream 318 + * @ptr: pointer to void pointer 319 + * @len: size of object 320 + * 321 + * Return values: 322 + * On success, returns length in bytes of XDR buffer consumed 323 + * %-EMSGSIZE on XDR buffer overflow 324 + */ 325 + static inline ssize_t 326 + xdr_stream_encode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t len) 327 + { 328 + size_t count = sizeof(__u32) + xdr_align_size(len); 329 + __be32 *p = xdr_reserve_space(xdr, count); 330 + 331 + if (unlikely(!p)) { 332 + *ptr = NULL; 333 + return -EMSGSIZE; 334 + } 335 + xdr_encode_opaque(p, NULL, len); 336 + *ptr = ++p; 337 + return count; 338 + } 339 + 340 + /** 322 341 * xdr_stream_encode_opaque_fixed - Encode fixed length opaque xdr data 323 342 * @xdr: pointer to xdr_stream 324 343 * @ptr: pointer to opaque data object ··· 384 353 return -EMSGSIZE; 385 354 xdr_encode_opaque(p, ptr, len); 386 355 return count; 356 + } 357 + 358 + /** 359 + * xdr_stream_encode_uint32_array - Encode variable length array of integers 360 + * @xdr: pointer to xdr_stream 361 + * @array: array of integers 362 + * @array_size: number of elements in @array 363 + * 364 + * Return values: 365 + * On success, returns length in bytes of XDR buffer consumed 366 + * %-EMSGSIZE on XDR buffer overflow 367 + */ 368 + static inline ssize_t 369 + xdr_stream_encode_uint32_array(struct xdr_stream *xdr, 370 + const __u32 *array, size_t array_size) 371 + { 372 + ssize_t ret = (array_size+1) * sizeof(__u32); 373 + __be32 *p = xdr_reserve_space(xdr, ret); 374 + 375 + if (unlikely(!p)) 376 + return -EMSGSIZE; 377 + *p++ = cpu_to_be32(array_size); 378 + for (; array_size > 0; p++, array++, array_size--) 379 + *p = cpu_to_be32p(array); 380 + return ret; 387 381 } 388 382 389 383 /** ··· 487 431 *ptr = p; 488 432 } 489 433 return len; 434 + } 435 + 436 + /** 437 + * xdr_stream_decode_uint32_array - Decode variable length array of integers 438 + * @xdr: pointer to xdr_stream 439 + * @array: location to store the integer array or NULL 440 + * @array_size: number of elements to store 441 + * 442 + * Return values: 443 + * On success, returns number of elements stored in @array 444 + * %-EBADMSG on XDR buffer overflow 445 + * %-EMSGSIZE if the size of the array exceeds @array_size 446 + */ 447 + static inline ssize_t 448 + xdr_stream_decode_uint32_array(struct xdr_stream *xdr, 449 + __u32 *array, size_t array_size) 450 + { 451 + __be32 *p; 452 + __u32 len; 453 + ssize_t retval; 454 + 455 + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) 456 + return -EBADMSG; 457 + p = xdr_inline_decode(xdr, len * sizeof(*p)); 458 + if (unlikely(!p)) 459 + return -EBADMSG; 460 + if (array == NULL) 461 + return len; 462 + if (len <= array_size) { 463 + if (len < array_size) 464 + memset(array+len, 0, (array_size-len)*sizeof(*array)); 465 + array_size = len; 466 + retval = len; 467 + } else 468 + retval = -EMSGSIZE; 469 + for (; array_size > 0; p++, array++, array_size--) 470 + *array = be32_to_cpup(p); 471 + return retval; 490 472 } 491 473 #endif /* __KERNEL__ */ 492 474
+2 -1
include/linux/sunrpc/xprt.h
··· 197 197 struct list_head free; /* free slots */ 198 198 unsigned int max_reqs; /* max number of slots */ 199 199 unsigned int min_reqs; /* min number of slots */ 200 - atomic_t num_reqs; /* total slots */ 200 + unsigned int num_reqs; /* total slots */ 201 201 unsigned long state; /* transport state */ 202 202 unsigned char resvport : 1; /* use a reserved port */ 203 203 atomic_t swapper; /* we're swapping over this ··· 373 373 void xprt_write_space(struct rpc_xprt *xprt); 374 374 void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); 375 375 struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); 376 + void xprt_update_rtt(struct rpc_task *task); 376 377 void xprt_complete_rqst(struct rpc_task *task, int copied); 377 378 void xprt_pin_rqst(struct rpc_rqst *req); 378 379 void xprt_unpin_rqst(struct rpc_rqst *req);
+87 -19
include/trace/events/sunrpc.h
··· 50 50 ); 51 51 52 52 TRACE_EVENT(rpc_connect_status, 53 - TP_PROTO(struct rpc_task *task, int status), 53 + TP_PROTO(const struct rpc_task *task), 54 54 55 - TP_ARGS(task, status), 55 + TP_ARGS(task), 56 56 57 57 TP_STRUCT__entry( 58 58 __field(unsigned int, task_id) ··· 63 63 TP_fast_assign( 64 64 __entry->task_id = task->tk_pid; 65 65 __entry->client_id = task->tk_client->cl_clid; 66 - __entry->status = status; 66 + __entry->status = task->tk_status; 67 67 ), 68 68 69 69 TP_printk("task:%u@%u status=%d", ··· 103 103 104 104 DECLARE_EVENT_CLASS(rpc_task_running, 105 105 106 - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), 106 + TP_PROTO(const struct rpc_task *task, const void *action), 107 107 108 - TP_ARGS(clnt, task, action), 108 + TP_ARGS(task, action), 109 109 110 110 TP_STRUCT__entry( 111 111 __field(unsigned int, task_id) ··· 117 117 ), 118 118 119 119 TP_fast_assign( 120 - __entry->client_id = clnt ? clnt->cl_clid : -1; 120 + __entry->client_id = task->tk_client ? 121 + task->tk_client->cl_clid : -1; 121 122 __entry->task_id = task->tk_pid; 122 123 __entry->action = action; 123 124 __entry->runstate = task->tk_runstate; ··· 137 136 138 137 DEFINE_EVENT(rpc_task_running, rpc_task_begin, 139 138 140 - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), 139 + TP_PROTO(const struct rpc_task *task, const void *action), 141 140 142 - TP_ARGS(clnt, task, action) 141 + TP_ARGS(task, action) 143 142 144 143 ); 145 144 146 145 DEFINE_EVENT(rpc_task_running, rpc_task_run_action, 147 146 148 - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), 147 + TP_PROTO(const struct rpc_task *task, const void *action), 149 148 150 - TP_ARGS(clnt, task, action) 149 + TP_ARGS(task, action) 151 150 152 151 ); 153 152 154 153 DEFINE_EVENT(rpc_task_running, rpc_task_complete, 155 154 156 - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action), 155 + TP_PROTO(const struct rpc_task *task, const void *action), 157 156 158 - TP_ARGS(clnt, task, action) 157 + TP_ARGS(task, action) 159 158 160 159 ); 161 160 162 161 DECLARE_EVENT_CLASS(rpc_task_queued, 163 162 164 - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), 163 + TP_PROTO(const struct rpc_task *task, const struct rpc_wait_queue *q), 165 164 166 - TP_ARGS(clnt, task, q), 165 + TP_ARGS(task, q), 167 166 168 167 TP_STRUCT__entry( 169 168 __field(unsigned int, task_id) ··· 176 175 ), 177 176 178 177 TP_fast_assign( 179 - __entry->client_id = clnt ? clnt->cl_clid : -1; 178 + __entry->client_id = task->tk_client ? 179 + task->tk_client->cl_clid : -1; 180 180 __entry->task_id = task->tk_pid; 181 181 __entry->timeout = task->tk_timeout; 182 182 __entry->runstate = task->tk_runstate; ··· 198 196 199 197 DEFINE_EVENT(rpc_task_queued, rpc_task_sleep, 200 198 201 - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), 199 + TP_PROTO(const struct rpc_task *task, const struct rpc_wait_queue *q), 202 200 203 - TP_ARGS(clnt, task, q) 201 + TP_ARGS(task, q) 204 202 205 203 ); 206 204 207 205 DEFINE_EVENT(rpc_task_queued, rpc_task_wakeup, 208 206 209 - TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q), 207 + TP_PROTO(const struct rpc_task *task, const struct rpc_wait_queue *q), 210 208 211 - TP_ARGS(clnt, task, q) 209 + TP_ARGS(task, q) 212 210 211 + ); 212 + 213 + TRACE_EVENT(rpc_stats_latency, 214 + 215 + TP_PROTO( 216 + const struct rpc_task *task, 217 + ktime_t backlog, 218 + ktime_t rtt, 219 + ktime_t execute 220 + ), 221 + 222 + TP_ARGS(task, backlog, rtt, execute), 223 + 224 + TP_STRUCT__entry( 225 + __field(u32, xid) 226 + __field(int, version) 227 + __string(progname, task->tk_client->cl_program->name) 228 + __string(procname, rpc_proc_name(task)) 229 + __field(unsigned long, backlog) 230 + __field(unsigned long, rtt) 231 + __field(unsigned long, execute) 232 + __string(addr, 233 + task->tk_xprt->address_strings[RPC_DISPLAY_ADDR]) 234 + __string(port, 235 + task->tk_xprt->address_strings[RPC_DISPLAY_PORT]) 236 + ), 237 + 238 + TP_fast_assign( 239 + __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); 240 + __entry->version = task->tk_client->cl_vers; 241 + __assign_str(progname, task->tk_client->cl_program->name) 242 + __assign_str(procname, rpc_proc_name(task)) 243 + __entry->backlog = ktime_to_us(backlog); 244 + __entry->rtt = ktime_to_us(rtt); 245 + __entry->execute = ktime_to_us(execute); 246 + __assign_str(addr, 247 + task->tk_xprt->address_strings[RPC_DISPLAY_ADDR]); 248 + __assign_str(port, 249 + task->tk_xprt->address_strings[RPC_DISPLAY_PORT]); 250 + ), 251 + 252 + TP_printk("peer=[%s]:%s xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", 253 + __get_str(addr), __get_str(port), __entry->xid, 254 + __get_str(progname), __entry->version, __get_str(procname), 255 + __entry->backlog, __entry->rtt, __entry->execute) 213 256 ); 214 257 215 258 /* ··· 452 405 DEFINE_EVENT(rpc_xprt_event, xprt_complete_rqst, 453 406 TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), 454 407 TP_ARGS(xprt, xid, status)); 408 + 409 + TRACE_EVENT(xprt_ping, 410 + TP_PROTO(const struct rpc_xprt *xprt, int status), 411 + 412 + TP_ARGS(xprt, status), 413 + 414 + TP_STRUCT__entry( 415 + __field(int, status) 416 + __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) 417 + __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) 418 + ), 419 + 420 + TP_fast_assign( 421 + __entry->status = status; 422 + __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); 423 + __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); 424 + ), 425 + 426 + TP_printk("peer=[%s]:%s status=%d", 427 + __get_str(addr), __get_str(port), __entry->status) 428 + ); 455 429 456 430 TRACE_EVENT(xs_tcp_data_ready, 457 431 TP_PROTO(struct rpc_xprt *xprt, int err, unsigned int total),
+7 -1
net/sunrpc/clnt.c
··· 1887 1887 1888 1888 dprint_status(task); 1889 1889 1890 - trace_rpc_connect_status(task, status); 1890 + trace_rpc_connect_status(task); 1891 1891 task->tk_status = 0; 1892 1892 switch (status) { 1893 1893 case -ECONNREFUSED: ··· 2014 2014 case -EPERM: 2015 2015 if (RPC_IS_SOFTCONN(task)) { 2016 2016 xprt_end_transmit(task); 2017 + if (!task->tk_msg.rpc_proc->p_proc) 2018 + trace_xprt_ping(task->tk_xprt, 2019 + task->tk_status); 2017 2020 rpc_exit(task, task->tk_status); 2018 2021 break; 2019 2022 } ··· 2114 2111 struct rpc_clnt *clnt = task->tk_client; 2115 2112 struct rpc_rqst *req = task->tk_rqstp; 2116 2113 int status; 2114 + 2115 + if (!task->tk_msg.rpc_proc->p_proc) 2116 + trace_xprt_ping(task->tk_xprt, task->tk_status); 2117 2117 2118 2118 if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent) 2119 2119 task->tk_status = req->rq_reply_bytes_recvd;
+5 -5
net/sunrpc/sched.c
··· 276 276 { 277 277 rpc_task_set_debuginfo(task); 278 278 set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); 279 - trace_rpc_task_begin(task->tk_client, task, NULL); 279 + trace_rpc_task_begin(task, NULL); 280 280 } 281 281 282 282 /* ··· 291 291 unsigned long flags; 292 292 int ret; 293 293 294 - trace_rpc_task_complete(task->tk_client, task, NULL); 294 + trace_rpc_task_complete(task, NULL); 295 295 296 296 spin_lock_irqsave(&wq->lock, flags); 297 297 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); ··· 358 358 dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", 359 359 task->tk_pid, rpc_qname(q), jiffies); 360 360 361 - trace_rpc_task_sleep(task->tk_client, task, q); 361 + trace_rpc_task_sleep(task, q); 362 362 363 363 __rpc_add_wait_queue(q, task, queue_priority); 364 364 ··· 428 428 return; 429 429 } 430 430 431 - trace_rpc_task_wakeup(task->tk_client, task, queue); 431 + trace_rpc_task_wakeup(task, queue); 432 432 433 433 __rpc_remove_wait_queue(queue, task); 434 434 ··· 780 780 } 781 781 if (!do_action) 782 782 break; 783 - trace_rpc_task_run_action(task->tk_client, task, do_action); 783 + trace_rpc_task_run_action(task, do_action); 784 784 do_action(task); 785 785 786 786 /*
+11 -5
net/sunrpc/stats.c
··· 24 24 #include <linux/sunrpc/metrics.h> 25 25 #include <linux/rcupdate.h> 26 26 27 + #include <trace/events/sunrpc.h> 28 + 27 29 #include "netns.h" 28 30 29 31 #define RPCDBG_FACILITY RPCDBG_MISC ··· 150 148 struct rpc_iostats *op_metrics) 151 149 { 152 150 struct rpc_rqst *req = task->tk_rqstp; 153 - ktime_t delta, now; 151 + ktime_t backlog, execute, now; 154 152 155 153 if (!op_metrics || !req) 156 154 return; ··· 166 164 op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent; 167 165 op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; 168 166 167 + backlog = 0; 169 168 if (ktime_to_ns(req->rq_xtime)) { 170 - delta = ktime_sub(req->rq_xtime, task->tk_start); 171 - op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta); 169 + backlog = ktime_sub(req->rq_xtime, task->tk_start); 170 + op_metrics->om_queue = ktime_add(op_metrics->om_queue, backlog); 172 171 } 172 + 173 173 op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt); 174 174 175 - delta = ktime_sub(now, task->tk_start); 176 - op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta); 175 + execute = ktime_sub(now, task->tk_start); 176 + op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); 177 177 178 178 spin_unlock(&op_metrics->om_lock); 179 + 180 + trace_rpc_stats_latency(req->rq_task, backlog, req->rq_rtt, execute); 179 181 } 180 182 EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics); 181 183
-6
net/sunrpc/sunrpc.h
··· 37 37 char data[]; 38 38 }; 39 39 40 - static inline int rpc_reply_expected(struct rpc_task *task) 41 - { 42 - return (task->tk_msg.rpc_proc != NULL) && 43 - (task->tk_msg.rpc_proc->p_decode != NULL); 44 - } 45 - 46 40 static inline int sock_is_loopback(struct sock *sk) 47 41 { 48 42 struct dst_entry *dst;
+82
net/sunrpc/xdr.c
··· 1519 1519 EXPORT_SYMBOL_GPL(xdr_process_buf); 1520 1520 1521 1521 /** 1522 + * xdr_stream_decode_opaque - Decode variable length opaque 1523 + * @xdr: pointer to xdr_stream 1524 + * @ptr: location to store opaque data 1525 + * @size: size of storage buffer @ptr 1526 + * 1527 + * Return values: 1528 + * On success, returns size of object stored in *@ptr 1529 + * %-EBADMSG on XDR buffer overflow 1530 + * %-EMSGSIZE on overflow of storage buffer @ptr 1531 + */ 1532 + ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size) 1533 + { 1534 + ssize_t ret; 1535 + void *p; 1536 + 1537 + ret = xdr_stream_decode_opaque_inline(xdr, &p, size); 1538 + if (ret <= 0) 1539 + return ret; 1540 + memcpy(ptr, p, ret); 1541 + return ret; 1542 + } 1543 + EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque); 1544 + 1545 + /** 1546 + * xdr_stream_decode_opaque_dup - Decode and duplicate variable length opaque 1547 + * @xdr: pointer to xdr_stream 1548 + * @ptr: location to store pointer to opaque data 1549 + * @maxlen: maximum acceptable object size 1550 + * @gfp_flags: GFP mask to use 1551 + * 1552 + * Return values: 1553 + * On success, returns size of object stored in *@ptr 1554 + * %-EBADMSG on XDR buffer overflow 1555 + * %-EMSGSIZE if the size of the object would exceed @maxlen 1556 + * %-ENOMEM on memory allocation failure 1557 + */ 1558 + ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, 1559 + size_t maxlen, gfp_t gfp_flags) 1560 + { 1561 + ssize_t ret; 1562 + void *p; 1563 + 1564 + ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen); 1565 + if (ret > 0) { 1566 + *ptr = kmemdup(p, ret, gfp_flags); 1567 + if (*ptr != NULL) 1568 + return ret; 1569 + ret = -ENOMEM; 1570 + } 1571 + *ptr = NULL; 1572 + return ret; 1573 + } 1574 + EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_dup); 1575 + 1576 + /** 1577 + * xdr_stream_decode_string - Decode variable length string 1578 + * @xdr: pointer to xdr_stream 1579 + * @str: location to store string 1580 + * @size: size of storage buffer @str 1581 + * 1582 + * Return values: 1583 + * On success, returns length of NUL-terminated string stored in *@str 1584 + * %-EBADMSG on XDR buffer overflow 1585 + * %-EMSGSIZE on overflow of storage buffer @str 1586 + */ 1587 + ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size) 1588 + { 1589 + ssize_t ret; 1590 + void *p; 1591 + 1592 + ret = xdr_stream_decode_opaque_inline(xdr, &p, size); 1593 + if (ret > 0) { 1594 + memcpy(str, p, ret); 1595 + str[ret] = '\0'; 1596 + return strlen(str); 1597 + } 1598 + *str = '\0'; 1599 + return ret; 1600 + } 1601 + EXPORT_SYMBOL_GPL(xdr_stream_decode_string); 1602 + 1603 + /** 1522 1604 * xdr_stream_decode_string_dup - Decode and duplicate variable length string 1523 1605 * @xdr: pointer to xdr_stream 1524 1606 * @str: location to store pointer to string
+20 -14
net/sunrpc/xprt.c
··· 826 826 * @xprt: transport on which the original request was transmitted 827 827 * @xid: RPC XID of incoming reply 828 828 * 829 + * Caller holds xprt->recv_lock. 829 830 */ 830 831 struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) 831 832 { ··· 835 834 list_for_each_entry(entry, &xprt->recv, rq_list) 836 835 if (entry->rq_xid == xid) { 837 836 trace_xprt_lookup_rqst(xprt, xid, 0); 837 + entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime); 838 838 return entry; 839 839 } 840 840 ··· 891 889 } 892 890 } 893 891 894 - static void xprt_update_rtt(struct rpc_task *task) 892 + /** 893 + * xprt_update_rtt - Update RPC RTT statistics 894 + * @task: RPC request that recently completed 895 + * 896 + * Caller holds xprt->recv_lock. 897 + */ 898 + void xprt_update_rtt(struct rpc_task *task) 895 899 { 896 900 struct rpc_rqst *req = task->tk_rqstp; 897 901 struct rpc_rtt *rtt = task->tk_client->cl_rtt; ··· 910 902 rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); 911 903 } 912 904 } 905 + EXPORT_SYMBOL_GPL(xprt_update_rtt); 913 906 914 907 /** 915 908 * xprt_complete_rqst - called when reply processing is complete 916 909 * @task: RPC request that recently completed 917 910 * @copied: actual number of bytes received from the transport 918 911 * 919 - * Caller holds transport lock. 912 + * Caller holds xprt->recv_lock. 920 913 */ 921 914 void xprt_complete_rqst(struct rpc_task *task, int copied) 922 915 { ··· 929 920 trace_xprt_complete_rqst(xprt, req->rq_xid, copied); 930 921 931 922 xprt->stat.recvs++; 932 - req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime); 933 - if (xprt->ops->timer != NULL) 934 - xprt_update_rtt(task); 935 923 936 924 list_del_init(&req->rq_list); 937 925 req->rq_private_buf.len = copied; ··· 1009 1003 struct rpc_rqst *req = task->tk_rqstp; 1010 1004 struct rpc_xprt *xprt = req->rq_xprt; 1011 1005 unsigned int connect_cookie; 1012 - int status, numreqs; 1006 + int status; 1013 1007 1014 1008 dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); 1015 1009 ··· 1033 1027 return; 1034 1028 1035 1029 connect_cookie = xprt->connect_cookie; 1036 - req->rq_xtime = ktime_get(); 1037 1030 status = xprt->ops->send_request(task); 1038 1031 trace_xprt_transmit(xprt, req->rq_xid, status); 1039 1032 if (status != 0) { ··· 1047 1042 1048 1043 xprt->ops->set_retrans_timeout(task); 1049 1044 1050 - numreqs = atomic_read(&xprt->num_reqs); 1051 - if (numreqs > xprt->stat.max_slots) 1052 - xprt->stat.max_slots = numreqs; 1053 1045 xprt->stat.sends++; 1054 1046 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; 1055 1047 xprt->stat.bklog_u += xprt->backlog.qlen; ··· 1108 1106 { 1109 1107 struct rpc_rqst *req = ERR_PTR(-EAGAIN); 1110 1108 1111 - if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs)) 1109 + if (xprt->num_reqs >= xprt->max_reqs) 1112 1110 goto out; 1111 + ++xprt->num_reqs; 1113 1112 spin_unlock(&xprt->reserve_lock); 1114 1113 req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS); 1115 1114 spin_lock(&xprt->reserve_lock); 1116 1115 if (req != NULL) 1117 1116 goto out; 1118 - atomic_dec(&xprt->num_reqs); 1117 + --xprt->num_reqs; 1119 1118 req = ERR_PTR(-ENOMEM); 1120 1119 out: 1121 1120 return req; ··· 1124 1121 1125 1122 static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 1126 1123 { 1127 - if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) { 1124 + if (xprt->num_reqs > xprt->min_reqs) { 1125 + --xprt->num_reqs; 1128 1126 kfree(req); 1129 1127 return true; 1130 1128 } ··· 1161 1157 spin_unlock(&xprt->reserve_lock); 1162 1158 return; 1163 1159 out_init_req: 1160 + xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots, 1161 + xprt->num_reqs); 1164 1162 task->tk_status = 0; 1165 1163 task->tk_rqstp = req; 1166 1164 xprt_request_init(task, xprt); ··· 1230 1224 else 1231 1225 xprt->max_reqs = num_prealloc; 1232 1226 xprt->min_reqs = num_prealloc; 1233 - atomic_set(&xprt->num_reqs, num_prealloc); 1227 + xprt->num_reqs = num_prealloc; 1234 1228 1235 1229 return xprt; 1236 1230
-7
net/sunrpc/xprtrdma/backchannel.c
··· 44 44 if (IS_ERR(req)) 45 45 return PTR_ERR(req); 46 46 47 - rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, 48 - DMA_TO_DEVICE, GFP_KERNEL); 49 - if (IS_ERR(rb)) 50 - goto out_fail; 51 - req->rl_rdmabuf = rb; 52 - xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); 53 - 54 47 size = r_xprt->rx_data.inline_rsize; 55 48 rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); 56 49 if (IS_ERR(rb))
+12 -1
net/sunrpc/xprtrdma/fmr_ops.c
··· 191 191 192 192 mr = rpcrdma_mr_get(r_xprt); 193 193 if (!mr) 194 - return ERR_PTR(-ENOBUFS); 194 + return ERR_PTR(-EAGAIN); 195 195 196 196 pageoff = offset_in_page(seg1->mr_offset); 197 197 seg1->mr_offset -= pageoff; /* start of page */ ··· 251 251 return ERR_PTR(-EIO); 252 252 } 253 253 254 + /* Post Send WR containing the RPC Call message. 255 + */ 256 + static int 257 + fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) 258 + { 259 + struct ib_send_wr *bad_wr; 260 + 261 + return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr); 262 + } 263 + 254 264 /* Invalidate all memory regions that were registered for "req". 255 265 * 256 266 * Sleeps until it is safe for the host CPU to access the ··· 315 305 316 306 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { 317 307 .ro_map = fmr_op_map, 308 + .ro_send = fmr_op_send, 318 309 .ro_unmap_sync = fmr_op_unmap_sync, 319 310 .ro_recover_mr = fmr_op_recover_mr, 320 311 .ro_open = fmr_op_open,
+36 -17
net/sunrpc/xprtrdma/frwr_ops.c
··· 357 357 struct rpcrdma_mr *mr; 358 358 struct ib_mr *ibmr; 359 359 struct ib_reg_wr *reg_wr; 360 - struct ib_send_wr *bad_wr; 361 - int rc, i, n; 360 + int i, n; 362 361 u8 key; 363 362 364 363 mr = NULL; ··· 366 367 rpcrdma_mr_defer_recovery(mr); 367 368 mr = rpcrdma_mr_get(r_xprt); 368 369 if (!mr) 369 - return ERR_PTR(-ENOBUFS); 370 + return ERR_PTR(-EAGAIN); 370 371 } while (mr->frwr.fr_state != FRWR_IS_INVALID); 371 372 frwr = &mr->frwr; 372 373 frwr->fr_state = FRWR_IS_VALID; ··· 406 407 ib_update_fast_reg_key(ibmr, ++key); 407 408 408 409 reg_wr = &frwr->fr_regwr; 409 - reg_wr->wr.next = NULL; 410 - reg_wr->wr.opcode = IB_WR_REG_MR; 411 - frwr->fr_cqe.done = frwr_wc_fastreg; 412 - reg_wr->wr.wr_cqe = &frwr->fr_cqe; 413 - reg_wr->wr.num_sge = 0; 414 - reg_wr->wr.send_flags = 0; 415 410 reg_wr->mr = ibmr; 416 411 reg_wr->key = ibmr->rkey; 417 412 reg_wr->access = writing ? 418 413 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 419 414 IB_ACCESS_REMOTE_READ; 420 - 421 - rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr); 422 - if (rc) 423 - goto out_senderr; 424 415 425 416 mr->mr_handle = ibmr->rkey; 426 417 mr->mr_length = ibmr->length; ··· 431 442 frwr->fr_mr, n, mr->mr_nents); 432 443 rpcrdma_mr_defer_recovery(mr); 433 444 return ERR_PTR(-EIO); 445 + } 434 446 435 - out_senderr: 436 - pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc); 437 - rpcrdma_mr_defer_recovery(mr); 438 - return ERR_PTR(-ENOTCONN); 447 + /* Post Send WR containing the RPC Call message. 448 + * 449 + * For FRMR, chain any FastReg WRs to the Send WR. Only a 450 + * single ib_post_send call is needed to register memory 451 + * and then post the Send WR. 452 + */ 453 + static int 454 + frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) 455 + { 456 + struct ib_send_wr *post_wr, *bad_wr; 457 + struct rpcrdma_mr *mr; 458 + 459 + post_wr = &req->rl_sendctx->sc_wr; 460 + list_for_each_entry(mr, &req->rl_registered, mr_list) { 461 + struct rpcrdma_frwr *frwr; 462 + 463 + frwr = &mr->frwr; 464 + 465 + frwr->fr_cqe.done = frwr_wc_fastreg; 466 + frwr->fr_regwr.wr.next = post_wr; 467 + frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe; 468 + frwr->fr_regwr.wr.num_sge = 0; 469 + frwr->fr_regwr.wr.opcode = IB_WR_REG_MR; 470 + frwr->fr_regwr.wr.send_flags = 0; 471 + 472 + post_wr = &frwr->fr_regwr.wr; 473 + } 474 + 475 + /* If ib_post_send fails, the next ->send_request for 476 + * @req will queue these MWs for recovery. 477 + */ 478 + return ib_post_send(ia->ri_id->qp, post_wr, &bad_wr); 439 479 } 440 480 441 481 /* Handle a remotely invalidated mr on the @mrs list ··· 579 561 580 562 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { 581 563 .ro_map = frwr_op_map, 564 + .ro_send = frwr_op_send, 582 565 .ro_reminv = frwr_op_reminv, 583 566 .ro_unmap_sync = frwr_op_unmap_sync, 584 567 .ro_recover_mr = frwr_op_recover_mr,
+22 -10
net/sunrpc/xprtrdma/rpc_rdma.c
··· 365 365 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 366 366 false, &mr); 367 367 if (IS_ERR(seg)) 368 - return PTR_ERR(seg); 368 + goto out_maperr; 369 369 rpcrdma_mr_push(mr, &req->rl_registered); 370 370 371 371 if (encode_read_segment(xdr, mr, pos) < 0) ··· 377 377 } while (nsegs); 378 378 379 379 return 0; 380 + 381 + out_maperr: 382 + if (PTR_ERR(seg) == -EAGAIN) 383 + xprt_wait_for_buffer_space(rqst->rq_task, NULL); 384 + return PTR_ERR(seg); 380 385 } 381 386 382 387 /* Register and XDR encode the Write list. Supports encoding a list ··· 428 423 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 429 424 true, &mr); 430 425 if (IS_ERR(seg)) 431 - return PTR_ERR(seg); 426 + goto out_maperr; 432 427 rpcrdma_mr_push(mr, &req->rl_registered); 433 428 434 429 if (encode_rdma_segment(xdr, mr) < 0) ··· 445 440 *segcount = cpu_to_be32(nchunks); 446 441 447 442 return 0; 443 + 444 + out_maperr: 445 + if (PTR_ERR(seg) == -EAGAIN) 446 + xprt_wait_for_buffer_space(rqst->rq_task, NULL); 447 + return PTR_ERR(seg); 448 448 } 449 449 450 450 /* Register and XDR encode the Reply chunk. Supports encoding an array ··· 491 481 seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, 492 482 true, &mr); 493 483 if (IS_ERR(seg)) 494 - return PTR_ERR(seg); 484 + goto out_maperr; 495 485 rpcrdma_mr_push(mr, &req->rl_registered); 496 486 497 487 if (encode_rdma_segment(xdr, mr) < 0) ··· 508 498 *segcount = cpu_to_be32(nchunks); 509 499 510 500 return 0; 501 + 502 + out_maperr: 503 + if (PTR_ERR(seg) == -EAGAIN) 504 + xprt_wait_for_buffer_space(rqst->rq_task, NULL); 505 + return PTR_ERR(seg); 511 506 } 512 507 513 508 /** ··· 739 724 * Returns: 740 725 * %0 if the RPC was sent successfully, 741 726 * %-ENOTCONN if the connection was lost, 742 - * %-EAGAIN if not enough pages are available for on-demand reply buffer, 743 - * %-ENOBUFS if no MRs are available to register chunks, 727 + * %-EAGAIN if the caller should call again with the same arguments, 728 + * %-ENOBUFS if the caller should call again after a delay, 744 729 * %-EMSGSIZE if the transport header is too small, 745 730 * %-EIO if a permanent problem occurred while marshaling. 746 731 */ ··· 883 868 return 0; 884 869 885 870 out_err: 886 - if (ret != -ENOBUFS) { 887 - pr_err("rpcrdma: header marshaling failed (%d)\n", ret); 888 - r_xprt->rx_stats.failed_marshal_count++; 889 - } 871 + r_xprt->rx_stats.failed_marshal_count++; 890 872 return ret; 891 873 } 892 874 ··· 1378 1366 1379 1367 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); 1380 1368 1381 - queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); 1369 + queue_work(rpcrdma_receive_wq, &rep->rr_work); 1382 1370 return; 1383 1371 1384 1372 out_badstatus:
+10 -33
net/sunrpc/xprtrdma/transport.c
··· 52 52 #include <linux/slab.h> 53 53 #include <linux/seq_file.h> 54 54 #include <linux/sunrpc/addr.h> 55 - #include <linux/smp.h> 56 55 57 56 #include "xprt_rdma.h" 58 57 ··· 236 237 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 237 238 238 239 spin_lock_bh(&xprt->transport_lock); 239 - if (++xprt->connect_cookie == 0) /* maintain a reserved value */ 240 - ++xprt->connect_cookie; 241 240 if (ep->rep_connected > 0) { 242 241 if (!xprt_test_and_set_connected(xprt)) 243 242 xprt_wake_pending_tasks(xprt, 0); ··· 537 540 } 538 541 } 539 542 540 - /* Allocate a fixed-size buffer in which to construct and send the 541 - * RPC-over-RDMA header for this request. 542 - */ 543 - static bool 544 - rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 545 - gfp_t flags) 546 - { 547 - size_t size = RPCRDMA_HDRBUF_SIZE; 548 - struct rpcrdma_regbuf *rb; 549 - 550 - if (req->rl_rdmabuf) 551 - return true; 552 - 553 - rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); 554 - if (IS_ERR(rb)) 555 - return false; 556 - 557 - r_xprt->rx_stats.hardway_register_count += size; 558 - req->rl_rdmabuf = rb; 559 - xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); 560 - return true; 561 - } 562 - 563 543 static bool 564 544 rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, 565 545 size_t size, gfp_t flags) ··· 618 644 if (RPC_IS_SWAPPER(task)) 619 645 flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; 620 646 621 - if (!rpcrdma_get_rdmabuf(r_xprt, req, flags)) 622 - goto out_fail; 623 647 if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags)) 624 648 goto out_fail; 625 649 if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) 626 650 goto out_fail; 627 651 628 - req->rl_cpu = smp_processor_id(); 629 - req->rl_connect_cookie = 0; /* our reserved value */ 630 652 rpcrdma_set_xprtdata(rqst, req); 631 653 rqst->rq_buffer = req->rl_sendbuf->rg_base; 632 654 rqst->rq_rbuffer = req->rl_recvbuf->rg_base; ··· 664 694 * Returns: 665 695 * %0 if the RPC message has been sent 666 696 * %-ENOTCONN if the caller should reconnect and call again 667 - * %-ENOBUFS if the caller should call again later 697 + * %-EAGAIN if the caller should call again 698 + * %-ENOBUFS if the caller should call again after a delay 668 699 * %-EIO if a permanent error occurred and the request was not 669 700 * sent. Do not try to send this message again. 670 701 */ ··· 694 723 rpcrdma_recv_buffer_get(req); 695 724 696 725 /* Must suppress retransmit to maintain credits */ 697 - if (req->rl_connect_cookie == xprt->connect_cookie) 726 + if (rqst->rq_connect_cookie == xprt->connect_cookie) 698 727 goto drop_connection; 699 - req->rl_connect_cookie = xprt->connect_cookie; 728 + rqst->rq_xtime = ktime_get(); 700 729 701 730 __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); 702 731 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) ··· 704 733 705 734 rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; 706 735 rqst->rq_bytes_sent = 0; 736 + 737 + /* An RPC with no reply will throw off credit accounting, 738 + * so drop the connection to reset the credit grant. 739 + */ 740 + if (!rpc_reply_expected(task)) 741 + goto drop_connection; 707 742 return 0; 708 743 709 744 failed_marshal:
+29 -15
net/sunrpc/xprtrdma/verbs.c
··· 250 250 wait_for_completion(&ia->ri_remove_done); 251 251 252 252 ia->ri_id = NULL; 253 - ia->ri_pd = NULL; 254 253 ia->ri_device = NULL; 255 254 /* Return 1 to ensure the core destroys the id. */ 256 255 return 1; 257 256 case RDMA_CM_EVENT_ESTABLISHED: 257 + ++xprt->rx_xprt.connect_cookie; 258 258 connstate = 1; 259 259 rpcrdma_update_connect_private(xprt, &event->param.conn); 260 260 goto connected; ··· 273 273 connstate = -EAGAIN; 274 274 goto connected; 275 275 case RDMA_CM_EVENT_DISCONNECTED: 276 + ++xprt->rx_xprt.connect_cookie; 276 277 connstate = -ECONNABORTED; 277 278 connected: 278 279 xprt->rx_buf.rb_credits = 1; ··· 446 445 ia->ri_id->qp = NULL; 447 446 } 448 447 ib_free_cq(ep->rep_attr.recv_cq); 448 + ep->rep_attr.recv_cq = NULL; 449 449 ib_free_cq(ep->rep_attr.send_cq); 450 + ep->rep_attr.send_cq = NULL; 450 451 451 452 /* The ULP is responsible for ensuring all DMA 452 453 * mappings and MRs are gone. ··· 461 458 rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); 462 459 } 463 460 rpcrdma_mrs_destroy(buf); 461 + ib_dealloc_pd(ia->ri_pd); 462 + ia->ri_pd = NULL; 464 463 465 464 /* Allow waiters to continue */ 466 465 complete(&ia->ri_remove_done); ··· 594 589 595 590 /* Client offers RDMA Read but does not initiate */ 596 591 ep->rep_remote_cma.initiator_depth = 0; 597 - if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ 598 - ep->rep_remote_cma.responder_resources = 32; 599 - else 600 - ep->rep_remote_cma.responder_resources = 601 - ia->ri_device->attrs.max_qp_rd_atom; 592 + ep->rep_remote_cma.responder_resources = 593 + min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom); 602 594 603 595 /* Limit transport retries so client can detect server 604 596 * GID changes quickly. RPC layer handles re-establishing ··· 630 628 { 631 629 cancel_delayed_work_sync(&ep->rep_connect_worker); 632 630 633 - if (ia->ri_id->qp) { 631 + if (ia->ri_id && ia->ri_id->qp) { 634 632 rpcrdma_ep_disconnect(ep, ia); 635 633 rdma_destroy_qp(ia->ri_id); 636 634 ia->ri_id->qp = NULL; 637 635 } 638 636 639 - ib_free_cq(ep->rep_attr.recv_cq); 640 - ib_free_cq(ep->rep_attr.send_cq); 637 + if (ep->rep_attr.recv_cq) 638 + ib_free_cq(ep->rep_attr.recv_cq); 639 + if (ep->rep_attr.send_cq) 640 + ib_free_cq(ep->rep_attr.send_cq); 641 641 } 642 642 643 643 /* Re-establish a connection after a device removal event. ··· 1028 1024 LIST_HEAD(free); 1029 1025 LIST_HEAD(all); 1030 1026 1031 - for (count = 0; count < 32; count++) { 1027 + for (count = 0; count < 3; count++) { 1032 1028 struct rpcrdma_mr *mr; 1033 1029 int rc; 1034 1030 ··· 1053 1049 list_splice(&all, &buf->rb_all); 1054 1050 r_xprt->rx_stats.mrs_allocated += count; 1055 1051 spin_unlock(&buf->rb_mrlock); 1056 - 1057 1052 trace_xprtrdma_createmrs(r_xprt, count); 1053 + 1054 + xprt_write_space(&r_xprt->rx_xprt); 1058 1055 } 1059 1056 1060 1057 static void ··· 1073 1068 rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) 1074 1069 { 1075 1070 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; 1071 + struct rpcrdma_regbuf *rb; 1076 1072 struct rpcrdma_req *req; 1077 1073 1078 1074 req = kzalloc(sizeof(*req), GFP_KERNEL); 1079 1075 if (req == NULL) 1080 1076 return ERR_PTR(-ENOMEM); 1081 1077 1078 + rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, 1079 + DMA_TO_DEVICE, GFP_KERNEL); 1080 + if (IS_ERR(rb)) { 1081 + kfree(req); 1082 + return ERR_PTR(-ENOMEM); 1083 + } 1084 + req->rl_rdmabuf = rb; 1085 + xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); 1086 + req->rl_buffer = buffer; 1087 + INIT_LIST_HEAD(&req->rl_registered); 1088 + 1082 1089 spin_lock(&buffer->rb_reqslock); 1083 1090 list_add(&req->rl_all, &buffer->rb_allreqs); 1084 1091 spin_unlock(&buffer->rb_reqslock); 1085 - req->rl_buffer = &r_xprt->rx_buf; 1086 - INIT_LIST_HEAD(&req->rl_registered); 1087 1092 return req; 1088 1093 } 1089 1094 ··· 1550 1535 struct rpcrdma_req *req) 1551 1536 { 1552 1537 struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; 1553 - struct ib_send_wr *send_wr_fail; 1554 1538 int rc; 1555 1539 1556 1540 if (req->rl_reply) { ··· 1568 1554 --ep->rep_send_count; 1569 1555 } 1570 1556 1571 - rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); 1557 + rc = ia->ri_ops->ro_send(ia, req); 1572 1558 trace_xprtrdma_post_send(req, rc); 1573 1559 if (rc) 1574 1560 return -ENOTCONN;
+2 -2
net/sunrpc/xprtrdma/xprt_rdma.h
··· 334 334 struct rpcrdma_buffer; 335 335 struct rpcrdma_req { 336 336 struct list_head rl_list; 337 - int rl_cpu; 338 - unsigned int rl_connect_cookie; 339 337 struct rpcrdma_buffer *rl_buffer; 340 338 struct rpcrdma_rep *rl_reply; 341 339 struct xdr_stream rl_stream; ··· 472 474 (*ro_map)(struct rpcrdma_xprt *, 473 475 struct rpcrdma_mr_seg *, int, bool, 474 476 struct rpcrdma_mr **); 477 + int (*ro_send)(struct rpcrdma_ia *ia, 478 + struct rpcrdma_req *req); 475 479 void (*ro_reminv)(struct rpcrdma_rep *rep, 476 480 struct list_head *mrs); 477 481 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
+4
net/sunrpc/xprtsock.c
··· 527 527 xs_pktdump("packet data:", 528 528 req->rq_svec->iov_base, req->rq_svec->iov_len); 529 529 530 + req->rq_xtime = ktime_get(); 530 531 status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent, 531 532 true, &sent); 532 533 dprintk("RPC: %s(%u) = %d\n", ··· 590 589 591 590 if (!xprt_bound(xprt)) 592 591 return -ENOTCONN; 592 + req->rq_xtime = ktime_get(); 593 593 status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, 594 594 xdr, req->rq_bytes_sent, true, &sent); 595 595 ··· 680 678 /* Continue transmitting the packet/record. We must be careful 681 679 * to cope with writespace callbacks arriving _after_ we have 682 680 * called sendmsg(). */ 681 + req->rq_xtime = ktime_get(); 683 682 while (1) { 684 683 sent = 0; 685 684 status = xs_sendpages(transport->sock, NULL, 0, xdr, ··· 1063 1060 if (!rovr) 1064 1061 goto out_unlock; 1065 1062 xprt_pin_rqst(rovr); 1063 + xprt_update_rtt(rovr->rq_task); 1066 1064 spin_unlock(&xprt->recv_lock); 1067 1065 task = rovr->rq_task; 1068 1066