Merge tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
"Lots of good bugfixes, including:

- fix a number of races in the NFSv4+ state code

- fix some shutdown crashes in multiple-network-namespace cases

- relax our 4.1 session limits; if you've an artificially low limit
to the number of 4.1 clients that can mount simultaneously, try
upgrading"

* tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux: (22 commits)
SUNRPC: Improve ordering of transport processing
nfsd: deal with revoked delegations appropriately
svcrdma: Enqueue after setting XPT_CLOSE in completion handlers
nfsd: use nfs->ns.inum as net ID
rpc: remove some BUG()s
svcrdma: Preserve CB send buffer across retransmits
nfds: avoid gettimeofday for nfssvc_boot time
fs, nfsd: convert nfs4_file.fi_ref from atomic_t to refcount_t
fs, nfsd: convert nfs4_cntl_odstate.co_odcount from atomic_t to refcount_t
fs, nfsd: convert nfs4_stid.sc_count from atomic_t to refcount_t
lockd: double unregister of inetaddr notifiers
nfsd4: catch some false session retries
nfsd4: fix cached replies to solo SEQUENCE compounds
sunrcp: make function _svc_create_xprt static
SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status
nfsd: use ARRAY_SIZE
nfsd: give out fewer session slots as limit approaches
nfsd: increase DRC cache limit
nfsd: remove unnecessary nofilehandle checks
nfs_common: convert int to bool
...

Linus Torvalds 8 years ago 4dd3c2e5 07c455ee

+225 -173

18 changed files

expand all collapse all

lockd

svc.c

nfs_common

grace.c

nfsd

fault_inject.c

netns.h

nfs3xdr.c

nfs4layouts.c

nfs4proc.c

nfs4state.c

nfssvc.c

state.h

xdr4.h

include

linux

fs.h

sunrpc

svc.h

trace

events

sunrpc.h

net

sunrpc

auth_gss

svcauth_gss.c

svc_xprt.c

xprtrdma

svc_rdma_backchannel.c

svc_rdma_transport.c

+9 -11

fs/lockd/svc.c

reviewed

··· 369 369 printk(KERN_WARNING 370 370 "lockd_up: svc_rqst allocation failed, error=%d\n", 371 371 error); 372 372 + lockd_unregister_notifiers(); 372 373 goto out_rqst; 373 374 } 374 375 ··· 460 459 } 461 460 462 461 error = lockd_up_net(serv, net); 463 463 - if (error < 0) 464 464 - goto err_net; 462 462 + if (error < 0) { 463 463 + lockd_unregister_notifiers(); 464 464 + goto err_put; 465 465 + } 465 466 466 467 error = lockd_start_svc(serv); 467 467 - if (error < 0) 468 468 - goto err_start; 469 469 - 468 468 + if (error < 0) { 469 469 + lockd_down_net(serv, net); 470 470 + goto err_put; 471 471 + } 470 472 nlmsvc_users++; 471 473 /* 472 474 * Note: svc_serv structures have an initial use count of 1, ··· 480 476 err_create: 481 477 mutex_unlock(&nlmsvc_mutex); 482 478 return error; 483 483 - 484 484 - err_start: 485 485 - lockd_down_net(serv, net); 486 486 - err_net: 487 487 - lockd_unregister_notifiers(); 488 488 - goto err_put; 489 479 } 490 480 EXPORT_SYMBOL_GPL(lockd_up); 491 481

+12 -12

fs/nfs_common/grace.c

reviewed

··· 55 55 } 56 56 EXPORT_SYMBOL_GPL(locks_end_grace); 57 57 58 58 - /** 59 59 - * locks_in_grace 60 60 - * 61 61 - * Lock managers call this function to determine when it is OK for them 62 62 - * to answer ordinary lock requests, and when they should accept only 63 63 - * lock reclaims. 64 64 - */ 65 65 - int 58 58 + static bool 66 59 __state_in_grace(struct net *net, bool open) 67 60 { 68 61 struct list_head *grace_list = net_generic(net, grace_net_id); ··· 71 78 return false; 72 79 } 73 80 74 74 - int locks_in_grace(struct net *net) 81 81 + /** 82 82 + * locks_in_grace 83 83 + * 84 84 + * Lock managers call this function to determine when it is OK for them 85 85 + * to answer ordinary lock requests, and when they should accept only 86 86 + * lock reclaims. 87 87 + */ 88 88 + bool locks_in_grace(struct net *net) 75 89 { 76 76 - return __state_in_grace(net, 0); 90 90 + return __state_in_grace(net, false); 77 91 } 78 92 EXPORT_SYMBOL_GPL(locks_in_grace); 79 93 80 80 - int opens_in_grace(struct net *net) 94 94 + bool opens_in_grace(struct net *net) 81 95 { 82 82 - return __state_in_grace(net, 1); 96 96 + return __state_in_grace(net, true); 83 97 } 84 98 EXPORT_SYMBOL_GPL(opens_in_grace); 85 99

+2 -3

fs/nfsd/fault_inject.c

reviewed

··· 12 12 #include <linux/nsproxy.h> 13 13 #include <linux/sunrpc/addr.h> 14 14 #include <linux/uaccess.h> 15 15 + #include <linux/kernel.h> 15 16 16 17 #include "state.h" 17 18 #include "netns.h" ··· 127 126 }, 128 127 }; 129 128 130 130 - #define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op)) 131 131 - 132 129 int nfsd_fault_inject_init(void) 133 130 { 134 131 unsigned int i; ··· 137 138 if (!debug_dir) 138 139 goto fail; 139 140 140 140 - for (i = 0; i < NUM_INJECT_OPS; i++) { 141 141 + for (i = 0; i < ARRAY_SIZE(inject_ops); i++) { 141 142 op = &inject_ops[i]; 142 143 if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) 143 144 goto fail;

+1 -1

fs/nfsd/netns.h

reviewed

··· 107 107 bool lockd_up; 108 108 109 109 /* Time of server startup */ 110 110 - struct timeval nfssvc_boot; 110 110 + struct timespec64 nfssvc_boot; 111 111 112 112 /* 113 113 * Max number of connections this nfsd container will allow. Defaults

+6 -4

fs/nfsd/nfs3xdr.c

reviewed

··· 748 748 if (resp->status == 0) { 749 749 *p++ = htonl(resp->count); 750 750 *p++ = htonl(resp->committed); 751 751 - *p++ = htonl(nn->nfssvc_boot.tv_sec); 752 752 - *p++ = htonl(nn->nfssvc_boot.tv_usec); 751 751 + /* unique identifier, y2038 overflow can be ignored */ 752 752 + *p++ = htonl((u32)nn->nfssvc_boot.tv_sec); 753 753 + *p++ = htonl(nn->nfssvc_boot.tv_nsec); 753 754 } 754 755 return xdr_ressize_check(rqstp, p); 755 756 } ··· 1120 1119 p = encode_wcc_data(rqstp, p, &resp->fh); 1121 1120 /* Write verifier */ 1122 1121 if (resp->status == 0) { 1123 1123 - *p++ = htonl(nn->nfssvc_boot.tv_sec); 1124 1124 - *p++ = htonl(nn->nfssvc_boot.tv_usec); 1122 1122 + /* unique identifier, y2038 overflow can be ignored */ 1123 1123 + *p++ = htonl((u32)nn->nfssvc_boot.tv_sec); 1124 1124 + *p++ = htonl(nn->nfssvc_boot.tv_nsec); 1125 1125 } 1126 1126 return xdr_ressize_check(rqstp, p); 1127 1127 }

+2 -2

fs/nfsd/nfs4layouts.c

reviewed

··· 336 336 337 337 trace_layout_recall(&ls->ls_stid.sc_stateid); 338 338 339 339 - atomic_inc(&ls->ls_stid.sc_count); 339 339 + refcount_inc(&ls->ls_stid.sc_count); 340 340 nfsd4_run_cb(&ls->ls_recall); 341 341 342 342 out_unlock: ··· 441 441 goto done; 442 442 } 443 443 444 444 - atomic_inc(&ls->ls_stid.sc_count); 444 444 + refcount_inc(&ls->ls_stid.sc_count); 445 445 list_add_tail(&new->lo_perstate, &ls->ls_layouts); 446 446 new = NULL; 447 447 done:

+5 -14

fs/nfsd/nfs4proc.c

reviewed

··· 485 485 nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 486 486 union nfsd4_op_u *u) 487 487 { 488 488 - if (!cstate->current_fh.fh_dentry) 489 489 - return nfserr_nofilehandle; 490 490 - 491 488 u->getfh = &cstate->current_fh; 492 489 return nfs_ok; 493 490 } ··· 532 535 nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 533 536 union nfsd4_op_u *u) 534 537 { 535 535 - if (!cstate->current_fh.fh_dentry) 536 536 - return nfserr_nofilehandle; 537 537 - 538 538 fh_dup2(&cstate->save_fh, &cstate->current_fh); 539 539 if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { 540 540 memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); ··· 564 570 565 571 /* 566 572 * This is opaque to client, so no need to byte-swap. Use 567 567 - * __force to keep sparse happy 573 573 + * __force to keep sparse happy. y2038 time_t overflow is 574 574 + * irrelevant in this usage. 568 575 */ 569 576 verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; 570 570 - verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec; 577 577 + verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec; 571 578 memcpy(verifier->data, verf, sizeof(verifier->data)); 572 579 } 573 580 ··· 698 703 union nfsd4_op_u *u) 699 704 { 700 705 struct nfsd4_link *link = &u->link; 701 701 - __be32 status = nfserr_nofilehandle; 706 706 + __be32 status; 702 707 703 703 - if (!cstate->save_fh.fh_dentry) 704 704 - return status; 705 708 status = nfsd_link(rqstp, &cstate->current_fh, 706 709 link->li_name, link->li_namelen, &cstate->save_fh); 707 710 if (!status) ··· 843 850 union nfsd4_op_u *u) 844 851 { 845 852 struct nfsd4_rename *rename = &u->rename; 846 846 - __be32 status = nfserr_nofilehandle; 853 853 + __be32 status; 847 854 848 848 - if (!cstate->save_fh.fh_dentry) 849 849 - return status; 850 855 if (opens_in_grace(SVC_NET(rqstp)) && 851 856 !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) 852 857 return nfserr_grace;

+100 -27

fs/nfsd/nfs4state.c

reviewed

··· 359 359 { 360 360 might_lock(&state_lock); 361 361 362 362 - if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { 362 362 + if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { 363 363 hlist_del_rcu(&fi->fi_hash); 364 364 spin_unlock(&state_lock); 365 365 WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); ··· 568 568 co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL); 569 569 if (co) { 570 570 co->co_client = clp; 571 571 - atomic_set(&co->co_odcount, 1); 571 571 + refcount_set(&co->co_odcount, 1); 572 572 } 573 573 return co; 574 574 } ··· 586 586 get_clnt_odstate(struct nfs4_clnt_odstate *co) 587 587 { 588 588 if (co) 589 589 - atomic_inc(&co->co_odcount); 589 589 + refcount_inc(&co->co_odcount); 590 590 } 591 591 592 592 static void ··· 598 598 return; 599 599 600 600 fp = co->co_file; 601 601 - if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { 601 601 + if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { 602 602 list_del(&co->co_perfile); 603 603 spin_unlock(&fp->fi_lock); 604 604 ··· 656 656 stid->sc_stateid.si_opaque.so_id = new_id; 657 657 stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; 658 658 /* Will be incremented before return to client: */ 659 659 - atomic_set(&stid->sc_count, 1); 659 659 + refcount_set(&stid->sc_count, 1); 660 660 spin_lock_init(&stid->sc_lock); 661 661 662 662 /* ··· 813 813 814 814 might_lock(&clp->cl_lock); 815 815 816 816 - if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) { 816 816 + if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) { 817 817 wake_up_all(&close_wq); 818 818 return; 819 819 } ··· 913 913 if (status) 914 914 return status; 915 915 ++fp->fi_delegees; 916 916 - atomic_inc(&dp->dl_stid.sc_count); 916 916 + refcount_inc(&dp->dl_stid.sc_count); 917 917 dp->dl_stid.sc_type = NFS4_DELEG_STID; 918 918 list_add(&dp->dl_perfile, &fp->fi_delegations); 919 919 list_add(&dp->dl_perclnt, &clp->cl_delegations); ··· 1214 1214 1215 1215 WARN_ON_ONCE(!list_empty(&stp->st_locks)); 1216 1216 1217 1217 - if (!atomic_dec_and_test(&s->sc_count)) { 1217 1217 + if (!refcount_dec_and_test(&s->sc_count)) { 1218 1218 wake_up_all(&close_wq); 1219 1219 return; 1220 1220 } ··· 1439 1439 { 1440 1440 int i; 1441 1441 1442 1442 - for (i = 0; i < ses->se_fchannel.maxreqs; i++) 1442 1442 + for (i = 0; i < ses->se_fchannel.maxreqs; i++) { 1443 1443 + free_svc_cred(&ses->se_slots[i]->sl_cred); 1443 1444 kfree(ses->se_slots[i]); 1445 1445 + } 1444 1446 } 1445 1447 1446 1448 /* ··· 1474 1472 spin_lock(&nfsd_drc_lock); 1475 1473 avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, 1476 1474 nfsd_drc_max_mem - nfsd_drc_mem_used); 1475 1475 + /* 1476 1476 + * Never use more than a third of the remaining memory, 1477 1477 + * unless it's the only way to give this client a slot: 1478 1478 + */ 1479 1479 + avail = clamp_t(int, avail, slotsize, avail/3); 1477 1480 num = min_t(int, num, avail / slotsize); 1478 1481 nfsd_drc_mem_used += num * slotsize; 1479 1482 spin_unlock(&nfsd_drc_lock); ··· 2079 2072 s = find_stateid_locked(cl, t); 2080 2073 if (s != NULL) { 2081 2074 if (typemask & s->sc_type) 2082 2082 - atomic_inc(&s->sc_count); 2075 2075 + refcount_inc(&s->sc_count); 2083 2076 else 2084 2077 s = NULL; 2085 2078 } ··· 2294 2287 2295 2288 dprintk("--> %s slot %p\n", __func__, slot); 2296 2289 2290 2290 + slot->sl_flags |= NFSD4_SLOT_INITIALIZED; 2297 2291 slot->sl_opcnt = resp->opcnt; 2298 2292 slot->sl_status = resp->cstate.status; 2293 2293 + free_svc_cred(&slot->sl_cred); 2294 2294 + copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred); 2299 2295 2300 2300 - slot->sl_flags |= NFSD4_SLOT_INITIALIZED; 2301 2301 - if (nfsd4_not_cached(resp)) { 2302 2302 - slot->sl_datalen = 0; 2296 2296 + if (!nfsd4_cache_this(resp)) { 2297 2297 + slot->sl_flags &= ~NFSD4_SLOT_CACHED; 2303 2298 return; 2304 2299 } 2300 2300 + slot->sl_flags |= NFSD4_SLOT_CACHED; 2301 2301 + 2305 2302 base = resp->cstate.data_offset; 2306 2303 slot->sl_datalen = buf->len - base; 2307 2304 if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) ··· 2332 2321 op = &args->ops[resp->opcnt - 1]; 2333 2322 nfsd4_encode_operation(resp, op); 2334 2323 2335 2335 - /* Return nfserr_retry_uncached_rep in next operation. */ 2336 2336 - if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { 2324 2324 + if (slot->sl_flags & NFSD4_SLOT_CACHED) 2325 2325 + return op->status; 2326 2326 + if (args->opcnt == 1) { 2327 2327 + /* 2328 2328 + * The original operation wasn't a solo sequence--we 2329 2329 + * always cache those--so this retry must not match the 2330 2330 + * original: 2331 2331 + */ 2332 2332 + op->status = nfserr_seq_false_retry; 2333 2333 + } else { 2337 2334 op = &args->ops[resp->opcnt++]; 2338 2335 op->status = nfserr_retry_uncached_rep; 2339 2336 nfsd4_encode_operation(resp, op); ··· 3005 2986 return xb->len > session->se_fchannel.maxreq_sz; 3006 2987 } 3007 2988 2989 2989 + static bool replay_matches_cache(struct svc_rqst *rqstp, 2990 2990 + struct nfsd4_sequence *seq, struct nfsd4_slot *slot) 2991 2991 + { 2992 2992 + struct nfsd4_compoundargs *argp = rqstp->rq_argp; 2993 2993 + 2994 2994 + if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) != 2995 2995 + (bool)seq->cachethis) 2996 2996 + return false; 2997 2997 + /* 2998 2998 + * If there's an error than the reply can have fewer ops than 2999 2999 + * the call. But if we cached a reply with *more* ops than the 3000 3000 + * call you're sending us now, then this new call is clearly not 3001 3001 + * really a replay of the old one: 3002 3002 + */ 3003 3003 + if (slot->sl_opcnt < argp->opcnt) 3004 3004 + return false; 3005 3005 + /* This is the only check explicitly called by spec: */ 3006 3006 + if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) 3007 3007 + return false; 3008 3008 + /* 3009 3009 + * There may be more comparisons we could actually do, but the 3010 3010 + * spec doesn't require us to catch every case where the calls 3011 3011 + * don't match (that would require caching the call as well as 3012 3012 + * the reply), so we don't bother. 3013 3013 + */ 3014 3014 + return true; 3015 3015 + } 3016 3016 + 3008 3017 __be32 3009 3018 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 3010 3019 union nfsd4_op_u *u) ··· 3091 3044 if (status == nfserr_replay_cache) { 3092 3045 status = nfserr_seq_misordered; 3093 3046 if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) 3047 3047 + goto out_put_session; 3048 3048 + status = nfserr_seq_false_retry; 3049 3049 + if (!replay_matches_cache(rqstp, seq, slot)) 3094 3050 goto out_put_session; 3095 3051 cstate->slot = slot; 3096 3052 cstate->session = session; ··· 3401 3351 { 3402 3352 lockdep_assert_held(&state_lock); 3403 3353 3404 3404 - atomic_set(&fp->fi_ref, 1); 3354 3354 + refcount_set(&fp->fi_ref, 1); 3405 3355 spin_lock_init(&fp->fi_lock); 3406 3356 INIT_LIST_HEAD(&fp->fi_stateids); 3407 3357 INIT_LIST_HEAD(&fp->fi_delegations); ··· 3564 3514 continue; 3565 3515 if (local->st_stateowner == &oo->oo_owner) { 3566 3516 ret = local; 3567 3567 - atomic_inc(&ret->st_stid.sc_count); 3517 3517 + refcount_inc(&ret->st_stid.sc_count); 3568 3518 break; 3569 3519 } 3570 3520 } ··· 3623 3573 goto out_unlock; 3624 3574 3625 3575 open->op_stp = NULL; 3626 3626 - atomic_inc(&stp->st_stid.sc_count); 3576 3576 + refcount_inc(&stp->st_stid.sc_count); 3627 3577 stp->st_stid.sc_type = NFS4_OPEN_STID; 3628 3578 INIT_LIST_HEAD(&stp->st_locks); 3629 3579 stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner); ··· 3671 3621 * there should be no danger of the refcount going back up again at 3672 3622 * this point. 3673 3623 */ 3674 3674 - wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2); 3624 3624 + wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2); 3675 3625 3676 3626 release_all_access(s); 3677 3627 if (s->st_stid.sc_file) { ··· 3697 3647 3698 3648 hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { 3699 3649 if (fh_match(&fp->fi_fhandle, fh)) { 3700 3700 - if (atomic_inc_not_zero(&fp->fi_ref)) 3650 3650 + if (refcount_inc_not_zero(&fp->fi_ref)) 3701 3651 return fp; 3702 3652 } 3703 3653 } ··· 3833 3783 * lock) we know the server hasn't removed the lease yet, we know 3834 3784 * it's safe to take a reference. 3835 3785 */ 3836 3836 - atomic_inc(&dp->dl_stid.sc_count); 3786 3786 + refcount_inc(&dp->dl_stid.sc_count); 3837 3787 nfsd4_run_cb(&dp->dl_recall); 3838 3788 } 3839 3789 ··· 4016 3966 { 4017 3967 struct nfs4_stid *ret; 4018 3968 4019 4019 - ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); 3969 3969 + ret = find_stateid_by_type(cl, s, 3970 3970 + NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID); 4020 3971 if (!ret) 4021 3972 return NULL; 4022 3973 return delegstateid(ret); ··· 4040 3989 deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); 4041 3990 if (deleg == NULL) 4042 3991 goto out; 3992 3992 + if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) { 3993 3993 + nfs4_put_stid(&deleg->dl_stid); 3994 3994 + if (cl->cl_minorversion) 3995 3995 + status = nfserr_deleg_revoked; 3996 3996 + goto out; 3997 3997 + } 4043 3998 flags = share_access_to_flags(open->op_share_access); 4044 3999 status = nfs4_check_delegmode(deleg, flags); 4045 4000 if (status) { ··· 4915 4858 struct nfs4_stid **s, struct nfsd_net *nn) 4916 4859 { 4917 4860 __be32 status; 4861 4861 + bool return_revoked = false; 4862 4862 + 4863 4863 + /* 4864 4864 + * only return revoked delegations if explicitly asked. 4865 4865 + * otherwise we report revoked or bad_stateid status. 4866 4866 + */ 4867 4867 + if (typemask & NFS4_REVOKED_DELEG_STID) 4868 4868 + return_revoked = true; 4869 4869 + else if (typemask & NFS4_DELEG_STID) 4870 4870 + typemask |= NFS4_REVOKED_DELEG_STID; 4918 4871 4919 4872 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4920 4873 return nfserr_bad_stateid; ··· 4939 4872 *s = find_stateid_by_type(cstate->clp, stateid, typemask); 4940 4873 if (!*s) 4941 4874 return nfserr_bad_stateid; 4875 4875 + if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { 4876 4876 + nfs4_put_stid(*s); 4877 4877 + if (cstate->minorversion) 4878 4878 + return nfserr_deleg_revoked; 4879 4879 + return nfserr_bad_stateid; 4880 4880 + } 4942 4881 return nfs_ok; 4943 4882 } 4944 4883 ··· 5144 5071 ret = nfserr_locks_held; 5145 5072 break; 5146 5073 case NFS4_LOCK_STID: 5147 5147 - atomic_inc(&s->sc_count); 5074 5074 + refcount_inc(&s->sc_count); 5148 5075 spin_unlock(&cl->cl_lock); 5149 5076 ret = nfsd4_free_lock_stateid(stateid, s); 5150 5077 goto out; ··· 5651 5578 5652 5579 lockdep_assert_held(&clp->cl_lock); 5653 5580 5654 5654 - atomic_inc(&stp->st_stid.sc_count); 5581 5581 + refcount_inc(&stp->st_stid.sc_count); 5655 5582 stp->st_stid.sc_type = NFS4_LOCK_STID; 5656 5583 stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); 5657 5584 get_nfs4_file(fp); ··· 5677 5604 5678 5605 list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { 5679 5606 if (lst->st_stid.sc_file == fp) { 5680 5680 - atomic_inc(&lst->st_stid.sc_count); 5607 5607 + refcount_inc(&lst->st_stid.sc_count); 5681 5608 return lst; 5682 5609 } 5683 5610 } ··· 7079 7006 nn->nfsd4_manager.block_opens = true; 7080 7007 locks_start_grace(net, &nn->nfsd4_manager); 7081 7008 nfsd4_client_tracking_init(net); 7082 7082 - printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", 7083 7083 - nn->nfsd4_grace, net); 7009 7009 + printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", 7010 7010 + nn->nfsd4_grace, net->ns.inum); 7084 7011 queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); 7085 7012 return 0; 7086 7013 }

+2 -2

fs/nfsd/nfssvc.c

reviewed

··· 447 447 */ 448 448 static void set_max_drc(void) 449 449 { 450 450 - #define NFSD_DRC_SIZE_SHIFT 10 450 450 + #define NFSD_DRC_SIZE_SHIFT 7 451 451 nfsd_drc_max_mem = (nr_free_buffer_pages() 452 452 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; 453 453 nfsd_drc_mem_used = 0; ··· 517 517 register_inet6addr_notifier(&nfsd_inet6addr_notifier); 518 518 #endif 519 519 } 520 520 - do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ 520 520 + ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */ 521 521 return 0; 522 522 } 523 523

+7 -4

fs/nfsd/state.h

reviewed

··· 36 36 #define _NFSD4_STATE_H 37 37 38 38 #include <linux/idr.h> 39 39 + #include <linux/refcount.h> 39 40 #include <linux/sunrpc/svc_xprt.h> 40 41 #include "nfsfh.h" 41 42 ··· 84 83 * fields that are of general use to any stateid. 85 84 */ 86 85 struct nfs4_stid { 87 87 - atomic_t sc_count; 86 86 + refcount_t sc_count; 88 87 #define NFS4_OPEN_STID 1 89 88 #define NFS4_LOCK_STID 2 90 89 #define NFS4_DELEG_STID 4 ··· 170 169 struct nfsd4_slot { 171 170 u32 sl_seqid; 172 171 __be32 sl_status; 172 172 + struct svc_cred sl_cred; 173 173 u32 sl_datalen; 174 174 u16 sl_opcnt; 175 175 #define NFSD4_SLOT_INUSE (1 << 0) 176 176 #define NFSD4_SLOT_CACHETHIS (1 << 1) 177 177 #define NFSD4_SLOT_INITIALIZED (1 << 2) 178 178 + #define NFSD4_SLOT_CACHED (1 << 3) 178 179 u8 sl_flags; 179 180 char sl_data[]; 180 181 }; ··· 468 465 struct nfs4_client *co_client; 469 466 struct nfs4_file *co_file; 470 467 struct list_head co_perfile; 471 471 - atomic_t co_odcount; 468 468 + refcount_t co_odcount; 472 469 }; 473 470 474 471 /* ··· 484 481 * the global state_lock spinlock. 485 482 */ 486 483 struct nfs4_file { 487 487 - atomic_t fi_ref; 484 484 + refcount_t fi_ref; 488 485 spinlock_t fi_lock; 489 486 struct hlist_node fi_hash; /* hash on fi_fhandle */ 490 487 struct list_head fi_stateids; ··· 637 634 void put_nfs4_file(struct nfs4_file *fi); 638 635 static inline void get_nfs4_file(struct nfs4_file *fi) 639 636 { 640 640 - atomic_inc(&fi->fi_ref); 637 637 + refcount_inc(&fi->fi_ref); 641 638 } 642 639 struct file *find_any_file(struct nfs4_file *f); 643 640

+11 -2

fs/nfsd/xdr4.h

reviewed

··· 649 649 return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; 650 650 } 651 651 652 652 - static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) 652 652 + /* 653 653 + * The session reply cache only needs to cache replies that the client 654 654 + * actually asked us to. But it's almost free for us to cache compounds 655 655 + * consisting of only a SEQUENCE op, so we may as well cache those too. 656 656 + * Also, the protocol doesn't give us a convenient response in the case 657 657 + * of a replay of a solo SEQUENCE op that wasn't cached 658 658 + * (RETRY_UNCACHED_REP can only be returned in the second op of a 659 659 + * compound). 660 660 + */ 661 661 + static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp) 653 662 { 654 654 - return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) 663 663 + return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) 655 664 || nfsd4_is_solo_sequence(resp); 656 665 } 657 666

+2 -2

include/linux/fs.h

reviewed

··· 971 971 struct net; 972 972 void locks_start_grace(struct net *, struct lock_manager *); 973 973 void locks_end_grace(struct lock_manager *); 974 974 - int locks_in_grace(struct net *); 975 975 - int opens_in_grace(struct net *); 974 974 + bool locks_in_grace(struct net *); 975 975 + bool opens_in_grace(struct net *); 976 976 977 977 /* that will die - we need it for nfs_lock_info */ 978 978 #include <linux/nfs_fs_i.h>

include/linux/sunrpc/svc.h

reviewed

··· 47 47 struct svc_pool_stats sp_stats; /* statistics on pool operation */ 48 48 #define SP_TASK_PENDING (0) /* still work to do even if no 49 49 * xprt is queued. */ 50 50 + #define SP_CONGESTED (1) 50 51 unsigned long sp_flags; 51 52 } ____cacheline_aligned_in_smp; 52 53

+10 -7

include/trace/events/sunrpc.h

reviewed

··· 486 486 TP_ARGS(rqst, status), 487 487 488 488 TP_STRUCT__entry( 489 489 - __field(struct sockaddr *, addr) 490 489 __field(u32, xid) 491 490 __field(int, status) 492 491 __field(unsigned long, flags) 492 492 + __dynamic_array(unsigned char, addr, rqst->rq_addrlen) 493 493 ), 494 494 495 495 TP_fast_assign( 496 496 - __entry->addr = (struct sockaddr *)&rqst->rq_addr; 497 496 __entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0; 498 497 __entry->status = status; 499 498 __entry->flags = rqst->rq_flags; 499 499 + memcpy(__get_dynamic_array(addr), 500 500 + &rqst->rq_addr, rqst->rq_addrlen); 500 501 ), 501 502 502 502 - TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", __entry->addr, 503 503 + TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", 504 504 + (struct sockaddr *)__get_dynamic_array(addr), 503 505 __entry->xid, __entry->status, 504 506 show_rqstp_flags(__entry->flags)) 505 507 ); ··· 546 544 TP_ARGS(rqst, status), 547 545 548 546 TP_STRUCT__entry( 549 549 - __field(struct sockaddr *, addr) 550 547 __field(u32, xid) 551 551 - __field(int, dropme) 552 548 __field(int, status) 553 549 __field(unsigned long, flags) 550 550 + __dynamic_array(unsigned char, addr, rqst->rq_addrlen) 554 551 ), 555 552 556 553 TP_fast_assign( 557 557 - __entry->addr = (struct sockaddr *)&rqst->rq_addr; 558 554 __entry->xid = be32_to_cpu(rqst->rq_xid); 559 555 __entry->status = status; 560 556 __entry->flags = rqst->rq_flags; 557 557 + memcpy(__get_dynamic_array(addr), 558 558 + &rqst->rq_addr, rqst->rq_addrlen); 561 559 ), 562 560 563 561 TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s", 564 564 - __entry->addr, __entry->xid, 562 562 + (struct sockaddr *)__get_dynamic_array(addr), 563 563 + __entry->xid, 565 564 __entry->status, show_rqstp_flags(__entry->flags)) 566 565 ); 567 566

+9 -5

net/sunrpc/auth_gss/svcauth_gss.c

reviewed

··· 855 855 return stat; 856 856 if (integ_len > buf->len) 857 857 return stat; 858 858 - if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) 859 859 - BUG(); 858 858 + if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) { 859 859 + WARN_ON_ONCE(1); 860 860 + return stat; 861 861 + } 860 862 /* copy out mic... */ 861 863 if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) 862 862 - BUG(); 864 864 + return stat; 863 865 if (mic.len > RPC_MAX_AUTH_SIZE) 864 866 return stat; 865 867 mic.data = kmalloc(mic.len, GFP_KERNEL); ··· 1613 1611 BUG_ON(integ_len % 4); 1614 1612 *p++ = htonl(integ_len); 1615 1613 *p++ = htonl(gc->gc_seq); 1616 1616 - if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) 1617 1617 - BUG(); 1614 1614 + if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) { 1615 1615 + WARN_ON_ONCE(1); 1616 1616 + goto out_err; 1617 1617 + } 1618 1618 if (resbuf->tail[0].iov_base == NULL) { 1619 1619 if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) 1620 1620 goto out_err;

+33 -73

net/sunrpc/svc_xprt.c

reviewed

··· 250 250 svc_xprt_received(new); 251 251 } 252 252 253 253 - int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, 254 254 - struct net *net, const int family, 255 255 - const unsigned short port, int flags) 253 253 + static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, 254 254 + struct net *net, const int family, 255 255 + const unsigned short port, int flags) 256 256 { 257 257 struct svc_xprt_class *xcl; 258 258 ··· 380 380 struct svc_pool *pool; 381 381 struct svc_rqst *rqstp = NULL; 382 382 int cpu; 383 383 - bool queued = false; 384 383 385 384 if (!svc_xprt_has_something_to_do(xprt)) 386 385 goto out; ··· 400 401 401 402 atomic_long_inc(&pool->sp_stats.packets); 402 403 403 403 - redo_search: 404 404 + dprintk("svc: transport %p put into queue\n", xprt); 405 405 + spin_lock_bh(&pool->sp_lock); 406 406 + list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); 407 407 + pool->sp_stats.sockets_queued++; 408 408 + spin_unlock_bh(&pool->sp_lock); 409 409 + 404 410 /* find a thread for this xprt */ 405 411 rcu_read_lock(); 406 412 list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { 407 407 - /* Do a lockless check first */ 408 408 - if (test_bit(RQ_BUSY, &rqstp->rq_flags)) 413 413 + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) 409 414 continue; 410 410 - 411 411 - /* 412 412 - * Once the xprt has been queued, it can only be dequeued by 413 413 - * the task that intends to service it. All we can do at that 414 414 - * point is to try to wake this thread back up so that it can 415 415 - * do so. 416 416 - */ 417 417 - if (!queued) { 418 418 - spin_lock_bh(&rqstp->rq_lock); 419 419 - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) { 420 420 - /* already busy, move on... */ 421 421 - spin_unlock_bh(&rqstp->rq_lock); 422 422 - continue; 423 423 - } 424 424 - 425 425 - /* this one will do */ 426 426 - rqstp->rq_xprt = xprt; 427 427 - svc_xprt_get(xprt); 428 428 - spin_unlock_bh(&rqstp->rq_lock); 429 429 - } 430 430 - rcu_read_unlock(); 431 431 - 432 415 atomic_long_inc(&pool->sp_stats.threads_woken); 433 416 wake_up_process(rqstp->rq_task); 434 434 - put_cpu(); 435 435 - goto out; 417 417 + goto out_unlock; 436 418 } 437 437 - rcu_read_unlock(); 438 438 - 439 439 - /* 440 440 - * We didn't find an idle thread to use, so we need to queue the xprt. 441 441 - * Do so and then search again. If we find one, we can't hook this one 442 442 - * up to it directly but we can wake the thread up in the hopes that it 443 443 - * will pick it up once it searches for a xprt to service. 444 444 - */ 445 445 - if (!queued) { 446 446 - queued = true; 447 447 - dprintk("svc: transport %p put into queue\n", xprt); 448 448 - spin_lock_bh(&pool->sp_lock); 449 449 - list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); 450 450 - pool->sp_stats.sockets_queued++; 451 451 - spin_unlock_bh(&pool->sp_lock); 452 452 - goto redo_search; 453 453 - } 419 419 + set_bit(SP_CONGESTED, &pool->sp_flags); 454 420 rqstp = NULL; 421 421 + out_unlock: 422 422 + rcu_read_unlock(); 455 423 put_cpu(); 456 424 out: 457 425 trace_svc_xprt_do_enqueue(xprt, rqstp); ··· 687 721 688 722 static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) 689 723 { 690 690 - struct svc_xprt *xprt; 691 724 struct svc_pool *pool = rqstp->rq_pool; 692 725 long time_left = 0; 693 726 694 727 /* rq_xprt should be clear on entry */ 695 728 WARN_ON_ONCE(rqstp->rq_xprt); 696 729 697 697 - /* Normally we will wait up to 5 seconds for any required 698 698 - * cache information to be provided. 699 699 - */ 700 700 - rqstp->rq_chandle.thread_wait = 5*HZ; 701 701 - 702 702 - xprt = svc_xprt_dequeue(pool); 703 703 - if (xprt) { 704 704 - rqstp->rq_xprt = xprt; 705 705 - 706 706 - /* As there is a shortage of threads and this request 707 707 - * had to be queued, don't allow the thread to wait so 708 708 - * long for cache updates. 709 709 - */ 710 710 - rqstp->rq_chandle.thread_wait = 1*HZ; 711 711 - clear_bit(SP_TASK_PENDING, &pool->sp_flags); 712 712 - return xprt; 713 713 - } 730 730 + rqstp->rq_xprt = svc_xprt_dequeue(pool); 731 731 + if (rqstp->rq_xprt) 732 732 + goto out_found; 714 733 715 734 /* 716 735 * We have to be able to interrupt this wait 717 736 * to bring down the daemons ... 718 737 */ 719 738 set_current_state(TASK_INTERRUPTIBLE); 739 739 + smp_mb__before_atomic(); 740 740 + clear_bit(SP_CONGESTED, &pool->sp_flags); 720 741 clear_bit(RQ_BUSY, &rqstp->rq_flags); 721 721 - smp_mb(); 742 742 + smp_mb__after_atomic(); 722 743 723 744 if (likely(rqst_should_sleep(rqstp))) 724 745 time_left = schedule_timeout(timeout); ··· 714 761 715 762 try_to_freeze(); 716 763 717 717 - spin_lock_bh(&rqstp->rq_lock); 718 764 set_bit(RQ_BUSY, &rqstp->rq_flags); 719 719 - spin_unlock_bh(&rqstp->rq_lock); 720 720 - 721 721 - xprt = rqstp->rq_xprt; 722 722 - if (xprt != NULL) 723 723 - return xprt; 765 765 + smp_mb__after_atomic(); 766 766 + rqstp->rq_xprt = svc_xprt_dequeue(pool); 767 767 + if (rqstp->rq_xprt) 768 768 + goto out_found; 724 769 725 770 if (!time_left) 726 771 atomic_long_inc(&pool->sp_stats.threads_timedout); ··· 726 775 if (signalled() || kthread_should_stop()) 727 776 return ERR_PTR(-EINTR); 728 777 return ERR_PTR(-EAGAIN); 778 778 + out_found: 779 779 + /* Normally we will wait up to 5 seconds for any required 780 780 + * cache information to be provided. 781 781 + */ 782 782 + if (!test_bit(SP_CONGESTED, &pool->sp_flags)) 783 783 + rqstp->rq_chandle.thread_wait = 5*HZ; 784 784 + else 785 785 + rqstp->rq_chandle.thread_wait = 1*HZ; 786 786 + return rqstp->rq_xprt; 729 787 } 730 788 731 789 static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)

+5 -1

net/sunrpc/xprtrdma/svc_rdma_backchannel.c

reviewed

··· 133 133 if (ret) 134 134 goto out_err; 135 135 136 136 + /* Bump page refcnt so Send completion doesn't release 137 137 + * the rq_buffer before all retransmits are complete. 138 138 + */ 139 139 + get_page(virt_to_page(rqst->rq_buffer)); 136 140 ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); 137 141 if (ret) 138 142 goto out_unmap; ··· 169 165 return -EINVAL; 170 166 } 171 167 172 172 - /* svc_rdma_sendto releases this page */ 173 168 page = alloc_page(RPCRDMA_DEF_GFP); 174 169 if (!page) 175 170 return -ENOMEM; ··· 187 184 { 188 185 struct rpc_rqst *rqst = task->tk_rqstp; 189 186 187 187 + put_page(virt_to_page(rqst->rq_buffer)); 190 188 kfree(rqst->rq_rbuffer); 191 189 } 192 190

+8 -3

net/sunrpc/xprtrdma/svc_rdma_transport.c

reviewed

··· 290 290 ib_event_msg(event->event), event->event, 291 291 event->element.qp); 292 292 set_bit(XPT_CLOSE, &xprt->xpt_flags); 293 293 + svc_xprt_enqueue(xprt); 293 294 break; 294 295 } 295 296 } ··· 323 322 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 324 323 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) 325 324 goto out; 326 326 - svc_xprt_enqueue(&xprt->sc_xprt); 327 327 - goto out; 325 325 + goto out_enqueue; 328 326 329 327 flushed: 330 328 if (wc->status != IB_WC_WR_FLUSH_ERR) ··· 333 333 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 334 334 svc_rdma_put_context(ctxt, 1); 335 335 336 336 + out_enqueue: 337 337 + svc_xprt_enqueue(&xprt->sc_xprt); 336 338 out: 337 339 svc_xprt_put(&xprt->sc_xprt); 338 340 } ··· 360 358 361 359 if (unlikely(wc->status != IB_WC_SUCCESS)) { 362 360 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 361 361 + svc_xprt_enqueue(&xprt->sc_xprt); 363 362 if (wc->status != IB_WC_WR_FLUSH_ERR) 364 363 pr_err("svcrdma: Send: %s (%u/0x%x)\n", 365 364 ib_wc_status_msg(wc->status), ··· 572 569 case RDMA_CM_EVENT_DEVICE_REMOVAL: 573 570 dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", 574 571 xprt, cma_id); 575 575 - if (xprt) 572 572 + if (xprt) { 576 573 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 574 574 + svc_xprt_enqueue(&xprt->sc_xprt); 575 575 + } 577 576 break; 578 577 579 578 default: