Merge branch 'for-3.18' of git://linux-nfs.org/~bfields/linux

+5 -1

fs/Kconfig

··· 233 233 source "fs/nfs/Kconfig" 234 234 source "fs/nfsd/Kconfig" 235 235 236 + config GRACE_PERIOD 237 + tristate 238 + 236 239 config LOCKD 237 240 tristate 238 241 depends on FILE_LOCKING 242 + select GRACE_PERIOD 239 243 240 244 config LOCKD_V4 241 245 bool ··· 253 249 254 250 config NFS_COMMON 255 251 bool 256 - depends on NFSD || NFS_FS 252 + depends on NFSD || NFS_FS || LOCKD 257 253 default y 258 254 259 255 source "net/sunrpc/Kconfig"

+2 -1

fs/lockd/Makefile

··· 5 5 obj-$(CONFIG_LOCKD) += lockd.o 6 6 7 7 lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ 8 - svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o 8 + svcshare.o svcproc.o svcsubs.o mon.o xdr.o 9 9 lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o 10 + lockd-objs-$(CONFIG_PROC_FS) += procfs.o 10 11 lockd-objs := $(lockd-objs-y)

+58 -10

fs/lockd/grace.c fs/nfs_common/grace.c

··· 1 1 /* 2 2 * Common code for control of lockd and nfsv4 grace periods. 3 + * 4 + * Transplanted from lockd code 3 5 */ 4 6 5 7 #include <linux/module.h> 6 - #include <linux/lockd/bind.h> 7 8 #include <net/net_namespace.h> 9 + #include <net/netns/generic.h> 10 + #include <linux/fs.h> 8 11 9 - #include "netns.h" 10 - 12 + static int grace_net_id; 11 13 static DEFINE_SPINLOCK(grace_lock); 12 14 13 15 /** 14 16 * locks_start_grace 17 + * @net: net namespace that this lock manager belongs to 15 18 * @lm: who this grace period is for 16 19 * 17 20 * A grace period is a period during which locks should not be given ··· 24 21 * 25 22 * This function is called to start a grace period. 26 23 */ 27 - void locks_start_grace(struct net *net, struct lock_manager *lm) 24 + void 25 + locks_start_grace(struct net *net, struct lock_manager *lm) 28 26 { 29 - struct lockd_net *ln = net_generic(net, lockd_net_id); 27 + struct list_head *grace_list = net_generic(net, grace_net_id); 30 28 31 29 spin_lock(&grace_lock); 32 - list_add(&lm->list, &ln->grace_list); 30 + list_add(&lm->list, grace_list); 33 31 spin_unlock(&grace_lock); 34 32 } 35 33 EXPORT_SYMBOL_GPL(locks_start_grace); 36 34 37 35 /** 38 36 * locks_end_grace 37 + * @net: net namespace that this lock manager belongs to 39 38 * @lm: who this grace period is for 40 39 * 41 40 * Call this function to state that the given lock manager is ready to ··· 46 41 * Note that callers count on it being safe to call this more than once, 47 42 * and the second call should be a no-op. 48 43 */ 49 - void locks_end_grace(struct lock_manager *lm) 44 + void 45 + locks_end_grace(struct lock_manager *lm) 50 46 { 51 47 spin_lock(&grace_lock); 52 48 list_del_init(&lm->list); ··· 62 56 * to answer ordinary lock requests, and when they should accept only 63 57 * lock reclaims. 64 58 */ 65 - int locks_in_grace(struct net *net) 59 + int 60 + locks_in_grace(struct net *net) 66 61 { 67 - struct lockd_net *ln = net_generic(net, lockd_net_id); 62 + struct list_head *grace_list = net_generic(net, grace_net_id); 68 63 69 - return !list_empty(&ln->grace_list); 64 + return !list_empty(grace_list); 70 65 } 71 66 EXPORT_SYMBOL_GPL(locks_in_grace); 67 + 68 + static int __net_init 69 + grace_init_net(struct net *net) 70 + { 71 + struct list_head *grace_list = net_generic(net, grace_net_id); 72 + 73 + INIT_LIST_HEAD(grace_list); 74 + return 0; 75 + } 76 + 77 + static void __net_exit 78 + grace_exit_net(struct net *net) 79 + { 80 + struct list_head *grace_list = net_generic(net, grace_net_id); 81 + 82 + BUG_ON(!list_empty(grace_list)); 83 + } 84 + 85 + static struct pernet_operations grace_net_ops = { 86 + .init = grace_init_net, 87 + .exit = grace_exit_net, 88 + .id = &grace_net_id, 89 + .size = sizeof(struct list_head), 90 + }; 91 + 92 + static int __init 93 + init_grace(void) 94 + { 95 + return register_pernet_subsys(&grace_net_ops); 96 + } 97 + 98 + static void __exit 99 + exit_grace(void) 100 + { 101 + unregister_pernet_subsys(&grace_net_ops); 102 + } 103 + 104 + MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>"); 105 + MODULE_LICENSE("GPL"); 106 + module_init(init_grace) 107 + module_exit(exit_grace)

-1

fs/lockd/netns.h

··· 11 11 12 12 struct delayed_work grace_period_end; 13 13 struct lock_manager lockd_manager; 14 - struct list_head grace_list; 15 14 16 15 spinlock_t nsm_clnt_lock; 17 16 unsigned int nsm_users;

+92

fs/lockd/procfs.c

··· 1 + /* 2 + * Procfs support for lockd 3 + * 4 + * Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com> 5 + */ 6 + 7 + #include <linux/fs.h> 8 + #include <linux/proc_fs.h> 9 + #include <linux/module.h> 10 + #include <linux/nsproxy.h> 11 + #include <net/net_namespace.h> 12 + 13 + #include "netns.h" 14 + #include "procfs.h" 15 + 16 + /* 17 + * We only allow strings that start with 'Y', 'y', or '1'. 18 + */ 19 + static ssize_t 20 + nlm_end_grace_write(struct file *file, const char __user *buf, size_t size, 21 + loff_t *pos) 22 + { 23 + char *data; 24 + struct lockd_net *ln = net_generic(current->nsproxy->net_ns, 25 + lockd_net_id); 26 + 27 + if (size < 1) 28 + return -EINVAL; 29 + 30 + data = simple_transaction_get(file, buf, size); 31 + if (IS_ERR(data)) 32 + return PTR_ERR(data); 33 + 34 + switch(data[0]) { 35 + case 'Y': 36 + case 'y': 37 + case '1': 38 + locks_end_grace(&ln->lockd_manager); 39 + break; 40 + default: 41 + return -EINVAL; 42 + } 43 + 44 + return size; 45 + } 46 + 47 + static ssize_t 48 + nlm_end_grace_read(struct file *file, char __user *buf, size_t size, 49 + loff_t *pos) 50 + { 51 + struct lockd_net *ln = net_generic(current->nsproxy->net_ns, 52 + lockd_net_id); 53 + char resp[3]; 54 + 55 + resp[0] = list_empty(&ln->lockd_manager.list) ? 'Y' : 'N'; 56 + resp[1] = '\n'; 57 + resp[2] = '\0'; 58 + 59 + return simple_read_from_buffer(buf, size, pos, resp, sizeof(resp)); 60 + } 61 + 62 + static const struct file_operations lockd_end_grace_operations = { 63 + .write = nlm_end_grace_write, 64 + .read = nlm_end_grace_read, 65 + .llseek = default_llseek, 66 + .release = simple_transaction_release, 67 + .owner = THIS_MODULE, 68 + }; 69 + 70 + int __init 71 + lockd_create_procfs(void) 72 + { 73 + struct proc_dir_entry *entry; 74 + 75 + entry = proc_mkdir("fs/lockd", NULL); 76 + if (!entry) 77 + return -ENOMEM; 78 + entry = proc_create("nlm_end_grace", S_IRUGO|S_IWUSR, entry, 79 + &lockd_end_grace_operations); 80 + if (!entry) { 81 + remove_proc_entry("fs/lockd", NULL); 82 + return -ENOMEM; 83 + } 84 + return 0; 85 + } 86 + 87 + void __exit 88 + lockd_remove_procfs(void) 89 + { 90 + remove_proc_entry("fs/lockd/nlm_end_grace", NULL); 91 + remove_proc_entry("fs/lockd", NULL); 92 + }

+28

fs/lockd/procfs.h

··· 1 + /* 2 + * Procfs support for lockd 3 + * 4 + * Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com> 5 + */ 6 + #ifndef _LOCKD_PROCFS_H 7 + #define _LOCKD_PROCFS_H 8 + 9 + #include <linux/kconfig.h> 10 + 11 + #if IS_ENABLED(CONFIG_PROC_FS) 12 + int lockd_create_procfs(void); 13 + void lockd_remove_procfs(void); 14 + #else 15 + static inline int 16 + lockd_create_procfs(void) 17 + { 18 + return 0; 19 + } 20 + 21 + static inline void 22 + lockd_remove_procfs(void) 23 + { 24 + return; 25 + } 26 + #endif /* IS_ENABLED(CONFIG_PROC_FS) */ 27 + 28 + #endif /* _LOCKD_PROCFS_H */

+14 -2

fs/lockd/svc.c

··· 36 36 #include <linux/nfs.h> 37 37 38 38 #include "netns.h" 39 + #include "procfs.h" 39 40 40 41 #define NLMDBG_FACILITY NLMDBG_SVC 41 42 #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) ··· 305 304 svc_sock_update_bufs(serv); 306 305 serv->sv_maxconn = nlm_max_connections; 307 306 308 - nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name); 307 + nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name); 309 308 if (IS_ERR(nlmsvc_task)) { 310 309 error = PTR_ERR(nlmsvc_task); 311 310 printk(KERN_WARNING 312 311 "lockd_up: kthread_run failed, error=%d\n", error); 313 312 goto out_task; 314 313 } 314 + nlmsvc_rqst->rq_task = nlmsvc_task; 315 + wake_up_process(nlmsvc_task); 316 + 315 317 dprintk("lockd_up: service started\n"); 316 318 return 0; 317 319 ··· 585 581 struct lockd_net *ln = net_generic(net, lockd_net_id); 586 582 587 583 INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); 588 - INIT_LIST_HEAD(&ln->grace_list); 584 + INIT_LIST_HEAD(&ln->lockd_manager.list); 589 585 spin_lock_init(&ln->nsm_clnt_lock); 590 586 return 0; 591 587 } ··· 619 615 err = register_pernet_subsys(&lockd_net_ops); 620 616 if (err) 621 617 goto err_pernet; 618 + 619 + err = lockd_create_procfs(); 620 + if (err) 621 + goto err_procfs; 622 + 622 623 return 0; 623 624 625 + err_procfs: 626 + unregister_pernet_subsys(&lockd_net_ops); 624 627 err_pernet: 625 628 #ifdef CONFIG_SYSCTL 626 629 unregister_sysctl_table(nlm_sysctl_table); ··· 640 629 { 641 630 /* FIXME: delete all NLM clients */ 642 631 nlm_shutdown_hosts(); 632 + lockd_remove_procfs(); 643 633 unregister_pernet_subsys(&lockd_net_ops); 644 634 #ifdef CONFIG_SYSCTL 645 635 unregister_sysctl_table(nlm_sysctl_table);

+3 -1

fs/nfs/callback.c

··· 235 235 236 236 cb_info->serv = serv; 237 237 cb_info->rqst = rqstp; 238 - cb_info->task = kthread_run(callback_svc, cb_info->rqst, 238 + cb_info->task = kthread_create(callback_svc, cb_info->rqst, 239 239 "nfsv4.%u-svc", minorversion); 240 240 if (IS_ERR(cb_info->task)) { 241 241 ret = PTR_ERR(cb_info->task); ··· 244 244 cb_info->task = NULL; 245 245 return ret; 246 246 } 247 + rqstp->rq_task = cb_info->task; 248 + wake_up_process(cb_info->task); 247 249 dprintk("nfs_callback_up: service started\n"); 248 250 return 0; 249 251 }

+2 -1

fs/nfs_common/Makefile

··· 3 3 # 4 4 5 5 obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o 6 - 7 6 nfs_acl-objs := nfsacl.o 7 + 8 + obj-$(CONFIG_GRACE_PERIOD) += grace.o

+1 -3

fs/nfsd/Kconfig

··· 71 71 select FS_POSIX_ACL 72 72 select SUNRPC_GSS 73 73 select CRYPTO 74 + select GRACE_PERIOD 74 75 help 75 76 This option enables support in your system's NFS server for 76 77 version 4 of the NFS protocol (RFC 3530). ··· 94 93 95 94 If you do not wish to enable fine-grained security labels SELinux or 96 95 Smack policies on NFSv4 files, say N. 97 - 98 - WARNING: there is still a chance of backwards-incompatible protocol changes. 99 - For now we recommend "Y" only for developers and testers. 100 96 101 97 config NFSD_FAULT_INJECTION 102 98 bool "NFS server manual fault injection"

-1

fs/nfsd/cache.h

··· 18 18 * is much larger than a sockaddr_in6. 19 19 */ 20 20 struct svc_cacherep { 21 - struct hlist_node c_hash; 22 21 struct list_head c_lru; 23 22 24 23 unsigned char c_state, /* unused, inprog, done */

+1

fs/nfsd/export.c

··· 1145 1145 { NFSEXP_ALLSQUASH, {"all_squash", ""}}, 1146 1146 { NFSEXP_ASYNC, {"async", "sync"}}, 1147 1147 { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}}, 1148 + { NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}}, 1148 1149 { NFSEXP_NOHIDE, {"nohide", ""}}, 1149 1150 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, 1150 1151 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},

+8 -5

fs/nfsd/nfs3proc.c

··· 223 223 newfhp = fh_init(&resp->fh, NFS3_FHSIZE); 224 224 attr = &argp->attrs; 225 225 226 - /* Get the directory inode */ 227 - nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE); 228 - if (nfserr) 229 - RETURN_STATUS(nfserr); 230 - 231 226 /* Unfudge the mode bits */ 232 227 attr->ia_mode &= ~S_IFMT; 233 228 if (!(attr->ia_valid & ATTR_MODE)) { ··· 466 471 resp->buflen = resp->count; 467 472 resp->rqstp = rqstp; 468 473 offset = argp->cookie; 474 + 475 + nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP); 476 + if (nfserr) 477 + RETURN_STATUS(nfserr); 478 + 479 + if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS) 480 + RETURN_STATUS(nfserr_notsupp); 481 + 469 482 nfserr = nfsd_readdir(rqstp, &resp->fh, 470 483 &offset, 471 484 &resp->common,

+40 -106

fs/nfsd/nfs4callback.c

··· 49 49 50 50 /* Index of predefined Linux callback client operations */ 51 51 52 - enum { 53 - NFSPROC4_CLNT_CB_NULL = 0, 54 - NFSPROC4_CLNT_CB_RECALL, 55 - NFSPROC4_CLNT_CB_SEQUENCE, 56 - }; 57 - 58 52 struct nfs4_cb_compound_hdr { 59 53 /* args */ 60 54 u32 ident; /* minorversion 0 only */ ··· 488 494 static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, 489 495 const struct nfsd4_callback *cb) 490 496 { 491 - const struct nfs4_delegation *args = cb->cb_op; 497 + const struct nfs4_delegation *dp = cb_to_delegation(cb); 492 498 struct nfs4_cb_compound_hdr hdr = { 493 499 .ident = cb->cb_clp->cl_cb_ident, 494 500 .minorversion = cb->cb_minorversion, ··· 496 502 497 503 encode_cb_compound4args(xdr, &hdr); 498 504 encode_cb_sequence4args(xdr, cb, &hdr); 499 - encode_cb_recall4args(xdr, args, &hdr); 505 + encode_cb_recall4args(xdr, dp, &hdr); 500 506 encode_cb_nops(&hdr); 501 507 } 502 508 ··· 740 746 741 747 static struct workqueue_struct *callback_wq; 742 748 743 - static void run_nfsd4_cb(struct nfsd4_callback *cb) 744 - { 745 - queue_work(callback_wq, &cb->cb_work); 746 - } 747 - 748 - static void do_probe_callback(struct nfs4_client *clp) 749 - { 750 - struct nfsd4_callback *cb = &clp->cl_cb_null; 751 - 752 - cb->cb_op = NULL; 753 - cb->cb_clp = clp; 754 - 755 - cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; 756 - cb->cb_msg.rpc_argp = NULL; 757 - cb->cb_msg.rpc_resp = NULL; 758 - 759 - cb->cb_ops = &nfsd4_cb_probe_ops; 760 - 761 - run_nfsd4_cb(cb); 762 - } 763 - 764 749 /* 765 750 * Poke the callback thread to process any updates to the callback 766 751 * parameters, and send a null probe. ··· 748 775 { 749 776 clp->cl_cb_state = NFSD4_CB_UNKNOWN; 750 777 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); 751 - do_probe_callback(clp); 778 + nfsd4_run_cb(&clp->cl_cb_null); 752 779 } 753 780 754 781 void nfsd4_probe_callback_sync(struct nfs4_client *clp) ··· 820 847 rpc_wake_up_next(&clp->cl_cb_waitq); 821 848 dprintk("%s: freed slot, new seqid=%d\n", __func__, 822 849 clp->cl_cb_session->se_cb_seq_nr); 823 - 824 - /* We're done looking into the sequence information */ 825 - task->tk_msg.rpc_resp = NULL; 826 850 } 827 - } 828 851 829 - 830 - static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 831 - { 832 - struct nfsd4_callback *cb = calldata; 833 - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); 834 - struct nfs4_client *clp = cb->cb_clp; 835 - struct rpc_clnt *current_rpc_client = clp->cl_cb_client; 836 - 837 - nfsd4_cb_done(task, calldata); 838 - 839 - if (current_rpc_client != task->tk_client) { 852 + if (clp->cl_cb_client != task->tk_client) { 840 853 /* We're shutting down or changing cl_cb_client; leave 841 854 * it to nfsd4_process_cb_update to restart the call if 842 855 * necessary. */ ··· 831 872 832 873 if (cb->cb_done) 833 874 return; 834 - switch (task->tk_status) { 875 + 876 + switch (cb->cb_ops->done(cb, task)) { 835 877 case 0: 836 - cb->cb_done = true; 837 - return; 838 - case -EBADHANDLE: 839 - case -NFS4ERR_BAD_STATEID: 840 - /* Race: client probably got cb_recall 841 - * before open reply granting delegation */ 842 - break; 843 - default: 844 - /* Network partition? */ 845 - nfsd4_mark_cb_down(clp, task->tk_status); 846 - } 847 - if (dp->dl_retries--) { 848 - rpc_delay(task, 2*HZ); 849 878 task->tk_status = 0; 850 879 rpc_restart_call_prepare(task); 851 880 return; 881 + case 1: 882 + break; 883 + case -1: 884 + /* Network partition? */ 885 + nfsd4_mark_cb_down(clp, task->tk_status); 886 + break; 887 + default: 888 + BUG(); 852 889 } 853 - nfsd4_mark_cb_down(clp, task->tk_status); 854 890 cb->cb_done = true; 855 891 } 856 892 857 - static void nfsd4_cb_recall_release(void *calldata) 893 + static void nfsd4_cb_release(void *calldata) 858 894 { 859 895 struct nfsd4_callback *cb = calldata; 860 896 struct nfs4_client *clp = cb->cb_clp; 861 - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); 862 897 863 898 if (cb->cb_done) { 864 899 spin_lock(&clp->cl_lock); 865 900 list_del(&cb->cb_per_client); 866 901 spin_unlock(&clp->cl_lock); 867 - nfs4_put_stid(&dp->dl_stid); 902 + 903 + cb->cb_ops->release(cb); 868 904 } 869 905 } 870 906 871 - static const struct rpc_call_ops nfsd4_cb_recall_ops = { 907 + static const struct rpc_call_ops nfsd4_cb_ops = { 872 908 .rpc_call_prepare = nfsd4_cb_prepare, 873 - .rpc_call_done = nfsd4_cb_recall_done, 874 - .rpc_release = nfsd4_cb_recall_release, 909 + .rpc_call_done = nfsd4_cb_done, 910 + .rpc_release = nfsd4_cb_release, 875 911 }; 876 912 877 913 int nfsd4_create_callback_queue(void) ··· 891 937 * instead, nfsd4_run_cb_null() will detect the killed 892 938 * client, destroy the rpc client, and stop: 893 939 */ 894 - do_probe_callback(clp); 940 + nfsd4_run_cb(&clp->cl_cb_null); 895 941 flush_workqueue(callback_wq); 896 - } 897 - 898 - static void nfsd4_release_cb(struct nfsd4_callback *cb) 899 - { 900 - if (cb->cb_ops->rpc_release) 901 - cb->cb_ops->rpc_release(cb); 902 942 } 903 943 904 944 /* requires cl_lock: */ ··· 957 1009 } 958 1010 /* Yay, the callback channel's back! Restart any callbacks: */ 959 1011 list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) 960 - run_nfsd4_cb(cb); 1012 + queue_work(callback_wq, &cb->cb_work); 961 1013 } 962 1014 963 1015 static void 964 - nfsd4_run_callback_rpc(struct nfsd4_callback *cb) 1016 + nfsd4_run_cb_work(struct work_struct *work) 965 1017 { 1018 + struct nfsd4_callback *cb = 1019 + container_of(work, struct nfsd4_callback, cb_work); 966 1020 struct nfs4_client *clp = cb->cb_clp; 967 1021 struct rpc_clnt *clnt; 1022 + 1023 + if (cb->cb_ops && cb->cb_ops->prepare) 1024 + cb->cb_ops->prepare(cb); 968 1025 969 1026 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) 970 1027 nfsd4_process_cb_update(cb); ··· 977 1024 clnt = clp->cl_cb_client; 978 1025 if (!clnt) { 979 1026 /* Callback channel broken, or client killed; give up: */ 980 - nfsd4_release_cb(cb); 1027 + if (cb->cb_ops && cb->cb_ops->release) 1028 + cb->cb_ops->release(cb); 981 1029 return; 982 1030 } 983 1031 cb->cb_msg.rpc_cred = clp->cl_cb_cred; 984 1032 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 985 - cb->cb_ops, cb); 1033 + cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); 986 1034 } 987 1035 988 - void 989 - nfsd4_run_cb_null(struct work_struct *w) 1036 + void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, 1037 + struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op) 990 1038 { 991 - struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, 992 - cb_work); 993 - nfsd4_run_callback_rpc(cb); 994 - } 995 - 996 - void 997 - nfsd4_run_cb_recall(struct work_struct *w) 998 - { 999 - struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, 1000 - cb_work); 1001 - 1002 - nfsd4_prepare_cb_recall(cb->cb_op); 1003 - nfsd4_run_callback_rpc(cb); 1004 - } 1005 - 1006 - void nfsd4_cb_recall(struct nfs4_delegation *dp) 1007 - { 1008 - struct nfsd4_callback *cb = &dp->dl_recall; 1009 - struct nfs4_client *clp = dp->dl_stid.sc_client; 1010 - 1011 - dp->dl_retries = 1; 1012 - cb->cb_op = dp; 1013 1039 cb->cb_clp = clp; 1014 - cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; 1040 + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op]; 1015 1041 cb->cb_msg.rpc_argp = cb; 1016 1042 cb->cb_msg.rpc_resp = cb; 1017 - 1018 - cb->cb_ops = &nfsd4_cb_recall_ops; 1019 - 1043 + cb->cb_ops = ops; 1044 + INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); 1020 1045 INIT_LIST_HEAD(&cb->cb_per_client); 1021 1046 cb->cb_done = true; 1047 + } 1022 1048 1023 - run_nfsd4_cb(&dp->dl_recall); 1049 + void nfsd4_run_cb(struct nfsd4_callback *cb) 1050 + { 1051 + queue_work(callback_wq, &cb->cb_work); 1024 1052 }

+8 -12

fs/nfsd/nfs4idmap.c

··· 215 215 memset(&ent, 0, sizeof(ent)); 216 216 217 217 /* Authentication name */ 218 - if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) 218 + len = qword_get(&buf, buf1, PAGE_SIZE); 219 + if (len <= 0 || len >= IDMAP_NAMESZ) 219 220 goto out; 220 221 memcpy(ent.authname, buf1, sizeof(ent.authname)); 221 222 ··· 246 245 /* Name */ 247 246 error = -EINVAL; 248 247 len = qword_get(&buf, buf1, PAGE_SIZE); 249 - if (len < 0) 248 + if (len < 0 || len >= IDMAP_NAMESZ) 250 249 goto out; 251 250 if (len == 0) 252 251 set_bit(CACHE_NEGATIVE, &ent.h.flags); 253 - else if (len >= IDMAP_NAMESZ) 254 - goto out; 255 252 else 256 253 memcpy(ent.name, buf1, sizeof(ent.name)); 257 254 error = -ENOMEM; ··· 258 259 goto out; 259 260 260 261 cache_put(&res->h, cd); 261 - 262 262 error = 0; 263 263 out: 264 264 kfree(buf1); 265 - 266 265 return error; 267 266 } 268 - 269 267 270 268 static struct ent * 271 269 idtoname_lookup(struct cache_detail *cd, struct ent *item) ··· 364 368 { 365 369 struct ent ent, *res; 366 370 char *buf1; 367 - int error = -EINVAL; 371 + int len, error = -EINVAL; 368 372 369 373 if (buf[buflen - 1] != '\n') 370 374 return (-EINVAL); ··· 377 381 memset(&ent, 0, sizeof(ent)); 378 382 379 383 /* Authentication name */ 380 - if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) 384 + len = qword_get(&buf, buf1, PAGE_SIZE); 385 + if (len <= 0 || len >= IDMAP_NAMESZ) 381 386 goto out; 382 387 memcpy(ent.authname, buf1, sizeof(ent.authname)); 383 388 ··· 389 392 IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; 390 393 391 394 /* Name */ 392 - error = qword_get(&buf, buf1, PAGE_SIZE); 393 - if (error <= 0 || error >= IDMAP_NAMESZ) 395 + len = qword_get(&buf, buf1, PAGE_SIZE); 396 + if (len <= 0 || len >= IDMAP_NAMESZ) 394 397 goto out; 395 398 memcpy(ent.name, buf1, sizeof(ent.name)); 396 399 ··· 418 421 error = 0; 419 422 out: 420 423 kfree(buf1); 421 - 422 424 return (error); 423 425 } 424 426

+49

fs/nfsd/nfs4proc.c

··· 1013 1013 return status; 1014 1014 } 1015 1015 1016 + static __be32 1017 + nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1018 + struct nfsd4_seek *seek) 1019 + { 1020 + int whence; 1021 + __be32 status; 1022 + struct file *file; 1023 + 1024 + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, 1025 + &seek->seek_stateid, 1026 + RD_STATE, &file); 1027 + if (status) { 1028 + dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); 1029 + return status; 1030 + } 1031 + 1032 + switch (seek->seek_whence) { 1033 + case NFS4_CONTENT_DATA: 1034 + whence = SEEK_DATA; 1035 + break; 1036 + case NFS4_CONTENT_HOLE: 1037 + whence = SEEK_HOLE; 1038 + break; 1039 + default: 1040 + status = nfserr_union_notsupp; 1041 + goto out; 1042 + } 1043 + 1044 + /* 1045 + * Note: This call does change file->f_pos, but nothing in NFSD 1046 + * should ever file->f_pos. 1047 + */ 1048 + seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence); 1049 + if (seek->seek_pos < 0) 1050 + status = nfserrno(seek->seek_pos); 1051 + else if (seek->seek_pos >= i_size_read(file_inode(file))) 1052 + seek->seek_eof = true; 1053 + 1054 + out: 1055 + fput(file); 1056 + return status; 1057 + } 1058 + 1016 1059 /* This routine never returns NFS_OK! If there are no other errors, it 1017 1060 * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the 1018 1061 * attributes matched. VERIFY is implemented by mapping NFSERR_SAME ··· 1923 1880 .op_name = "OP_FREE_STATEID", 1924 1881 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, 1925 1882 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 1883 + }, 1884 + 1885 + /* NFSv4.2 operations */ 1886 + [OP_SEEK] = { 1887 + .op_func = (nfsd4op_func)nfsd4_seek, 1888 + .op_name = "OP_SEEK", 1926 1889 }, 1927 1890 }; 1928 1891

+170 -37

fs/nfsd/nfs4recover.c

··· 58 58 void (*create)(struct nfs4_client *); 59 59 void (*remove)(struct nfs4_client *); 60 60 int (*check)(struct nfs4_client *); 61 - void (*grace_done)(struct nfsd_net *, time_t); 61 + void (*grace_done)(struct nfsd_net *); 62 62 }; 63 63 64 64 /* Globals */ ··· 188 188 189 189 status = mnt_want_write_file(nn->rec_file); 190 190 if (status) 191 - return; 191 + goto out_creds; 192 192 193 193 dir = nn->rec_file->f_path.dentry; 194 194 /* lock the parent */ ··· 228 228 user_recovery_dirname); 229 229 } 230 230 mnt_drop_write_file(nn->rec_file); 231 + out_creds: 231 232 nfs4_reset_creds(original_cred); 232 233 } 233 234 ··· 393 392 } 394 393 395 394 static void 396 - nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) 395 + nfsd4_recdir_purge_old(struct nfsd_net *nn) 397 396 { 398 397 int status; 399 398 ··· 480 479 return status; 481 480 } 482 481 482 + static void 483 + nfsd4_shutdown_recdir(struct net *net) 484 + { 485 + struct nfsd_net *nn = net_generic(net, nfsd_net_id); 486 + 487 + if (!nn->rec_file) 488 + return; 489 + fput(nn->rec_file); 490 + nn->rec_file = NULL; 491 + } 483 492 484 493 static int 485 494 nfs4_legacy_state_init(struct net *net) ··· 523 512 int status; 524 513 525 514 status = nfsd4_init_recdir(net); 526 - if (!status) 527 - status = nfsd4_recdir_load(net); 528 515 if (status) 529 - printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); 516 + return status; 517 + 518 + status = nfsd4_recdir_load(net); 519 + if (status) 520 + nfsd4_shutdown_recdir(net); 521 + 530 522 return status; 531 523 } 532 524 ··· 560 546 } 561 547 562 548 static void 563 - nfsd4_shutdown_recdir(struct nfsd_net *nn) 564 - { 565 - if (!nn->rec_file) 566 - return; 567 - fput(nn->rec_file); 568 - nn->rec_file = NULL; 569 - } 570 - 571 - static void 572 549 nfsd4_legacy_tracking_exit(struct net *net) 573 550 { 574 551 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 575 552 576 553 nfs4_release_reclaim(nn); 577 - nfsd4_shutdown_recdir(nn); 554 + nfsd4_shutdown_recdir(net); 578 555 nfs4_legacy_state_shutdown(net); 579 556 } 580 557 ··· 1021 1016 } 1022 1017 1023 1018 static void 1024 - nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) 1019 + nfsd4_cld_grace_done(struct nfsd_net *nn) 1025 1020 { 1026 1021 int ret; 1027 1022 struct cld_upcall *cup; ··· 1034 1029 } 1035 1030 1036 1031 cup->cu_msg.cm_cmd = Cld_GraceDone; 1037 - cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time; 1032 + cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time; 1038 1033 ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); 1039 1034 if (!ret) 1040 1035 ret = cup->cu_msg.cm_status; ··· 1067 1062 1068 1063 #define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" 1069 1064 #define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" 1065 + #define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION=" 1066 + #define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START=" 1070 1067 1071 1068 static char * 1072 1069 nfsd4_cltrack_legacy_topdir(void) ··· 1133 1126 return result; 1134 1127 } 1135 1128 1136 - static int 1137 - nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) 1129 + static char * 1130 + nfsd4_cltrack_client_has_session(struct nfs4_client *clp) 1138 1131 { 1139 - char *envp[2]; 1132 + int copied; 1133 + size_t len; 1134 + char *result; 1135 + 1136 + /* prefix + Y/N character + terminating NULL */ 1137 + len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1; 1138 + 1139 + result = kmalloc(len, GFP_KERNEL); 1140 + if (!result) 1141 + return result; 1142 + 1143 + copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c", 1144 + clp->cl_minorversion ? 'Y' : 'N'); 1145 + if (copied >= len) { 1146 + /* just return nothing if output was truncated */ 1147 + kfree(result); 1148 + return NULL; 1149 + } 1150 + 1151 + return result; 1152 + } 1153 + 1154 + static char * 1155 + nfsd4_cltrack_grace_start(time_t grace_start) 1156 + { 1157 + int copied; 1158 + size_t len; 1159 + char *result; 1160 + 1161 + /* prefix + max width of int64_t string + terminating NULL */ 1162 + len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1; 1163 + 1164 + result = kmalloc(len, GFP_KERNEL); 1165 + if (!result) 1166 + return result; 1167 + 1168 + copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld", 1169 + grace_start); 1170 + if (copied >= len) { 1171 + /* just return nothing if output was truncated */ 1172 + kfree(result); 1173 + return NULL; 1174 + } 1175 + 1176 + return result; 1177 + } 1178 + 1179 + static int 1180 + nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1) 1181 + { 1182 + char *envp[3]; 1140 1183 char *argv[4]; 1141 1184 int ret; 1142 1185 ··· 1197 1140 1198 1141 dprintk("%s: cmd: %s\n", __func__, cmd); 1199 1142 dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); 1200 - dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); 1143 + dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)"); 1144 + dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)"); 1201 1145 1202 - envp[0] = legacy; 1203 - envp[1] = NULL; 1146 + envp[0] = env0; 1147 + envp[1] = env1; 1148 + envp[2] = NULL; 1204 1149 1205 1150 argv[0] = (char *)cltrack_prog; 1206 1151 argv[1] = cmd; ··· 1246 1187 } 1247 1188 1248 1189 static int 1249 - nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) 1190 + nfsd4_umh_cltrack_init(struct net *net) 1250 1191 { 1192 + int ret; 1193 + struct nfsd_net *nn = net_generic(net, nfsd_net_id); 1194 + char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time); 1195 + 1251 1196 /* XXX: The usermode helper s not working in container yet. */ 1252 1197 if (net != &init_net) { 1253 1198 WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " 1254 1199 "tracking in a container!\n"); 1255 1200 return -EINVAL; 1256 1201 } 1257 - return nfsd4_umh_cltrack_upcall("init", NULL, NULL); 1202 + 1203 + ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); 1204 + kfree(grace_start); 1205 + return ret; 1206 + } 1207 + 1208 + static void 1209 + nfsd4_cltrack_upcall_lock(struct nfs4_client *clp) 1210 + { 1211 + wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK, 1212 + TASK_UNINTERRUPTIBLE); 1213 + } 1214 + 1215 + static void 1216 + nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp) 1217 + { 1218 + smp_mb__before_atomic(); 1219 + clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags); 1220 + smp_mb__after_atomic(); 1221 + wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK); 1258 1222 } 1259 1223 1260 1224 static void 1261 1225 nfsd4_umh_cltrack_create(struct nfs4_client *clp) 1262 1226 { 1263 - char *hexid; 1227 + char *hexid, *has_session, *grace_start; 1228 + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 1229 + 1230 + /* 1231 + * With v4.0 clients, there's little difference in outcome between a 1232 + * create and check operation, and we can end up calling into this 1233 + * function multiple times per client (once for each openowner). So, 1234 + * for v4.0 clients skip upcalling once the client has been recorded 1235 + * on stable storage. 1236 + * 1237 + * For v4.1+ clients, the outcome of the two operations is different, 1238 + * so we must ensure that we upcall for the create operation. v4.1+ 1239 + * clients call this on RECLAIM_COMPLETE though, so we should only end 1240 + * up doing a single create upcall per client. 1241 + */ 1242 + if (clp->cl_minorversion == 0 && 1243 + test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) 1244 + return; 1264 1245 1265 1246 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); 1266 1247 if (!hexid) { 1267 1248 dprintk("%s: can't allocate memory for upcall!\n", __func__); 1268 1249 return; 1269 1250 } 1270 - nfsd4_umh_cltrack_upcall("create", hexid, NULL); 1251 + 1252 + has_session = nfsd4_cltrack_client_has_session(clp); 1253 + grace_start = nfsd4_cltrack_grace_start(nn->boot_time); 1254 + 1255 + nfsd4_cltrack_upcall_lock(clp); 1256 + if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start)) 1257 + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); 1258 + nfsd4_cltrack_upcall_unlock(clp); 1259 + 1260 + kfree(has_session); 1261 + kfree(grace_start); 1271 1262 kfree(hexid); 1272 1263 } 1273 1264 ··· 1326 1217 { 1327 1218 char *hexid; 1328 1219 1220 + if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) 1221 + return; 1222 + 1329 1223 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); 1330 1224 if (!hexid) { 1331 1225 dprintk("%s: can't allocate memory for upcall!\n", __func__); 1332 1226 return; 1333 1227 } 1334 - nfsd4_umh_cltrack_upcall("remove", hexid, NULL); 1228 + 1229 + nfsd4_cltrack_upcall_lock(clp); 1230 + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) && 1231 + nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0) 1232 + clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); 1233 + nfsd4_cltrack_upcall_unlock(clp); 1234 + 1335 1235 kfree(hexid); 1336 1236 } 1337 1237 ··· 1348 1230 nfsd4_umh_cltrack_check(struct nfs4_client *clp) 1349 1231 { 1350 1232 int ret; 1351 - char *hexid, *legacy; 1233 + char *hexid, *has_session, *legacy; 1234 + 1235 + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) 1236 + return 0; 1352 1237 1353 1238 hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); 1354 1239 if (!hexid) { 1355 1240 dprintk("%s: can't allocate memory for upcall!\n", __func__); 1356 1241 return -ENOMEM; 1357 1242 } 1243 + 1244 + has_session = nfsd4_cltrack_client_has_session(clp); 1358 1245 legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); 1359 - ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy); 1246 + 1247 + nfsd4_cltrack_upcall_lock(clp); 1248 + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) { 1249 + ret = 0; 1250 + } else { 1251 + ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy); 1252 + if (ret == 0) 1253 + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); 1254 + } 1255 + nfsd4_cltrack_upcall_unlock(clp); 1256 + kfree(has_session); 1360 1257 kfree(legacy); 1361 1258 kfree(hexid); 1259 + 1362 1260 return ret; 1363 1261 } 1364 1262 1365 1263 static void 1366 - nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, 1367 - time_t boot_time) 1264 + nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn) 1368 1265 { 1369 1266 char *legacy; 1370 1267 char timestr[22]; /* FIXME: better way to determine max size? */ 1371 1268 1372 - sprintf(timestr, "%ld", boot_time); 1269 + sprintf(timestr, "%ld", nn->boot_time); 1373 1270 legacy = nfsd4_cltrack_legacy_topdir(); 1374 - nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); 1271 + nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL); 1375 1272 kfree(legacy); 1376 1273 } 1377 1274 ··· 1489 1356 } 1490 1357 1491 1358 void 1492 - nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) 1359 + nfsd4_record_grace_done(struct nfsd_net *nn) 1493 1360 { 1494 1361 if (nn->client_tracking_ops) 1495 - nn->client_tracking_ops->grace_done(nn, boot_time); 1362 + nn->client_tracking_ops->grace_done(nn); 1496 1363 } 1497 1364 1498 1365 static int

+90 -25

fs/nfsd/nfs4state.c

··· 96 96 97 97 static void free_session(struct nfsd4_session *); 98 98 99 + static struct nfsd4_callback_ops nfsd4_cb_recall_ops; 100 + 99 101 static bool is_session_dead(struct nfsd4_session *ses) 100 102 { 101 103 return ses->se_flags & NFS4_SESSION_DEAD; ··· 647 645 INIT_LIST_HEAD(&dp->dl_perclnt); 648 646 INIT_LIST_HEAD(&dp->dl_recall_lru); 649 647 dp->dl_type = NFS4_OPEN_DELEGATE_READ; 650 - INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); 648 + dp->dl_retries = 1; 649 + nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, 650 + &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); 651 651 return dp; 652 652 out_dec: 653 653 atomic_long_dec(&num_delegations); ··· 677 673 678 674 static void nfs4_put_deleg_lease(struct nfs4_file *fp) 679 675 { 680 - lockdep_assert_held(&state_lock); 676 + struct file *filp = NULL; 677 + struct file_lock *fl; 681 678 682 - if (!fp->fi_lease) 683 - return; 684 - if (atomic_dec_and_test(&fp->fi_delegees)) { 685 - vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); 679 + spin_lock(&fp->fi_lock); 680 + if (fp->fi_lease && atomic_dec_and_test(&fp->fi_delegees)) { 681 + swap(filp, fp->fi_deleg_file); 682 + fl = fp->fi_lease; 686 683 fp->fi_lease = NULL; 687 - fput(fp->fi_deleg_file); 688 - fp->fi_deleg_file = NULL; 684 + } 685 + spin_unlock(&fp->fi_lock); 686 + 687 + if (filp) { 688 + vfs_setlease(filp, F_UNLCK, &fl); 689 + fput(filp); 689 690 } 690 691 } 691 692 ··· 726 717 list_del_init(&dp->dl_recall_lru); 727 718 list_del_init(&dp->dl_perfile); 728 719 spin_unlock(&fp->fi_lock); 729 - if (fp) 730 - nfs4_put_deleg_lease(fp); 731 720 } 732 721 733 722 static void destroy_delegation(struct nfs4_delegation *dp) ··· 733 726 spin_lock(&state_lock); 734 727 unhash_delegation_locked(dp); 735 728 spin_unlock(&state_lock); 729 + nfs4_put_deleg_lease(dp->dl_stid.sc_file); 736 730 nfs4_put_stid(&dp->dl_stid); 737 731 } 738 732 ··· 742 734 struct nfs4_client *clp = dp->dl_stid.sc_client; 743 735 744 736 WARN_ON(!list_empty(&dp->dl_recall_lru)); 737 + 738 + nfs4_put_deleg_lease(dp->dl_stid.sc_file); 745 739 746 740 if (clp->cl_minorversion == 0) 747 741 nfs4_put_stid(&dp->dl_stid); ··· 1645 1635 while (!list_empty(&reaplist)) { 1646 1636 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1647 1637 list_del_init(&dp->dl_recall_lru); 1638 + nfs4_put_deleg_lease(dp->dl_stid.sc_file); 1648 1639 nfs4_put_stid(&dp->dl_stid); 1649 1640 } 1650 1641 while (!list_empty(&clp->cl_revoked)) { ··· 1873 1862 free_client(clp); 1874 1863 return NULL; 1875 1864 } 1876 - INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); 1865 + nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); 1877 1866 clp->cl_time = get_seconds(); 1878 1867 clear_bit(0, &clp->cl_cb_slot_busy); 1879 1868 copy_verf(clp, verf); ··· 3360 3349 return ret; 3361 3350 } 3362 3351 3363 - void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) 3352 + static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) 3364 3353 { 3354 + struct nfs4_delegation *dp = cb_to_delegation(cb); 3365 3355 struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, 3366 3356 nfsd_net_id); 3367 3357 ··· 3383 3371 spin_unlock(&state_lock); 3384 3372 } 3385 3373 3374 + static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, 3375 + struct rpc_task *task) 3376 + { 3377 + struct nfs4_delegation *dp = cb_to_delegation(cb); 3378 + 3379 + switch (task->tk_status) { 3380 + case 0: 3381 + return 1; 3382 + case -EBADHANDLE: 3383 + case -NFS4ERR_BAD_STATEID: 3384 + /* 3385 + * Race: client probably got cb_recall before open reply 3386 + * granting delegation. 3387 + */ 3388 + if (dp->dl_retries--) { 3389 + rpc_delay(task, 2 * HZ); 3390 + return 0; 3391 + } 3392 + /*FALLTHRU*/ 3393 + default: 3394 + return -1; 3395 + } 3396 + } 3397 + 3398 + static void nfsd4_cb_recall_release(struct nfsd4_callback *cb) 3399 + { 3400 + struct nfs4_delegation *dp = cb_to_delegation(cb); 3401 + 3402 + nfs4_put_stid(&dp->dl_stid); 3403 + } 3404 + 3405 + static struct nfsd4_callback_ops nfsd4_cb_recall_ops = { 3406 + .prepare = nfsd4_cb_recall_prepare, 3407 + .done = nfsd4_cb_recall_done, 3408 + .release = nfsd4_cb_recall_release, 3409 + }; 3410 + 3386 3411 static void nfsd_break_one_deleg(struct nfs4_delegation *dp) 3387 3412 { 3388 3413 /* ··· 3430 3381 * it's safe to take a reference. 3431 3382 */ 3432 3383 atomic_inc(&dp->dl_stid.sc_count); 3433 - nfsd4_cb_recall(dp); 3384 + nfsd4_run_cb(&dp->dl_recall); 3434 3385 } 3435 3386 3436 3387 /* Called from break_lease() with i_lock held. */ ··· 3808 3759 fl = locks_alloc_lock(); 3809 3760 if (!fl) 3810 3761 return NULL; 3811 - locks_init_lock(fl); 3812 3762 fl->fl_lmops = &nfsd_lease_mng_ops; 3813 3763 fl->fl_flags = FL_DELEG; 3814 3764 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; ··· 4155 4107 return status; 4156 4108 } 4157 4109 4158 - static void 4110 + void 4159 4111 nfsd4_end_grace(struct nfsd_net *nn) 4160 4112 { 4161 4113 /* do nothing if grace period already ended */ ··· 4164 4116 4165 4117 dprintk("NFSD: end of grace period\n"); 4166 4118 nn->grace_ended = true; 4167 - nfsd4_record_grace_done(nn, nn->boot_time); 4119 + /* 4120 + * If the server goes down again right now, an NFSv4 4121 + * client will still be allowed to reclaim after it comes back up, 4122 + * even if it hasn't yet had a chance to reclaim state this time. 4123 + * 4124 + */ 4125 + nfsd4_record_grace_done(nn); 4126 + /* 4127 + * At this point, NFSv4 clients can still reclaim. But if the 4128 + * server crashes, any that have not yet reclaimed will be out 4129 + * of luck on the next boot. 4130 + * 4131 + * (NFSv4.1+ clients are considered to have reclaimed once they 4132 + * call RECLAIM_COMPLETE. NFSv4.0 clients are considered to 4133 + * have reclaimed after their first OPEN.) 4134 + */ 4168 4135 locks_end_grace(&nn->nfsd4_manager); 4169 4136 /* 4170 - * Now that every NFSv4 client has had the chance to recover and 4171 - * to see the (possibly new, possibly shorter) lease time, we 4172 - * can safely set the next grace time to the current lease time: 4137 + * At this point, and once lockd and/or any other containers 4138 + * exit their grace period, further reclaims will fail and 4139 + * regular locking can resume. 4173 4140 */ 4174 - nn->nfsd4_grace = nn->nfsd4_lease; 4175 4141 } 4176 4142 4177 4143 static time_t ··· 5272 5210 } 5273 5211 5274 5212 fp = lock_stp->st_stid.sc_file; 5275 - locks_init_lock(file_lock); 5276 5213 switch (lock->lk_type) { 5277 5214 case NFS4_READ_LT: 5278 5215 case NFS4_READW_LT: ··· 5415 5354 status = nfserr_jukebox; 5416 5355 goto out; 5417 5356 } 5418 - locks_init_lock(file_lock); 5357 + 5419 5358 switch (lockt->lt_type) { 5420 5359 case NFS4_READ_LT: 5421 5360 case NFS4_READW_LT: ··· 5493 5432 status = nfserr_jukebox; 5494 5433 goto fput; 5495 5434 } 5496 - locks_init_lock(file_lock); 5435 + 5497 5436 file_lock->fl_type = F_UNLCK; 5498 5437 file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); 5499 5438 file_lock->fl_pid = current->tgid; ··· 5705 5644 status = lookup_clientid(clid, cstate, nn); 5706 5645 if (status) 5707 5646 return nfserr_reclaim_bad; 5647 + 5648 + if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags)) 5649 + return nfserr_no_grace; 5708 5650 5709 5651 if (nfsd4_client_record_check(cstate->clp)) 5710 5652 return nfserr_reclaim_bad; ··· 6406 6342 ret = nfs4_state_create_net(net); 6407 6343 if (ret) 6408 6344 return ret; 6409 - nfsd4_client_tracking_init(net); 6410 6345 nn->boot_time = get_seconds(); 6411 - locks_start_grace(net, &nn->nfsd4_manager); 6412 6346 nn->grace_ended = false; 6347 + locks_start_grace(net, &nn->nfsd4_manager); 6348 + nfsd4_client_tracking_init(net); 6413 6349 printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", 6414 6350 nn->nfsd4_grace, net); 6415 6351 queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); ··· 6466 6402 list_for_each_safe(pos, next, &reaplist) { 6467 6403 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6468 6404 list_del_init(&dp->dl_recall_lru); 6405 + nfs4_put_deleg_lease(dp->dl_stid.sc_file); 6469 6406 nfs4_put_stid(&dp->dl_stid); 6470 6407 } 6471 6408

+68 -7

fs/nfsd/nfs4xdr.c

··· 31 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 - * 35 - * TODO: Neil Brown made the following observation: We currently 36 - * initially reserve NFSD_BUFSIZE space on the transmit queue and 37 - * never release any of that until the request is complete. 38 - * It would be good to calculate a new maximum response size while 39 - * decoding the COMPOUND, and call svc_reserve with this number 40 - * at the end of nfs4svc_decode_compoundargs. 41 34 */ 42 35 43 36 #include <linux/slab.h> ··· 1514 1521 } 1515 1522 1516 1523 static __be32 1524 + nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) 1525 + { 1526 + DECODE_HEAD; 1527 + 1528 + status = nfsd4_decode_stateid(argp, &seek->seek_stateid); 1529 + if (status) 1530 + return status; 1531 + 1532 + READ_BUF(8 + 4); 1533 + p = xdr_decode_hyper(p, &seek->seek_offset); 1534 + seek->seek_whence = be32_to_cpup(p); 1535 + 1536 + DECODE_TAIL; 1537 + } 1538 + 1539 + static __be32 1517 1540 nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) 1518 1541 { 1519 1542 return nfs_ok; ··· 1602 1593 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, 1603 1594 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, 1604 1595 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, 1596 + 1597 + /* new operations for NFSv4.2 */ 1598 + [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp, 1599 + [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp, 1600 + [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, 1601 + [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp, 1602 + [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, 1603 + [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, 1604 + [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, 1605 + [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp, 1606 + [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp, 1607 + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, 1608 + [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, 1609 + [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, 1605 1610 }; 1606 1611 1607 1612 static inline bool ··· 1693 1670 readbytes += nfsd4_max_reply(argp->rqstp, op); 1694 1671 } else 1695 1672 max_reply += nfsd4_max_reply(argp->rqstp, op); 1673 + /* 1674 + * OP_LOCK may return a conflicting lock. (Special case 1675 + * because it will just skip encoding this if it runs 1676 + * out of xdr buffer space, and it is the only operation 1677 + * that behaves this way.) 1678 + */ 1679 + if (op->opnum == OP_LOCK) 1680 + max_reply += NFS4_OPAQUE_LIMIT; 1696 1681 1697 1682 if (op->status) { 1698 1683 argp->opcnt = i+1; ··· 3795 3764 } 3796 3765 3797 3766 static __be32 3767 + nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, 3768 + struct nfsd4_seek *seek) 3769 + { 3770 + __be32 *p; 3771 + 3772 + if (nfserr) 3773 + return nfserr; 3774 + 3775 + p = xdr_reserve_space(&resp->xdr, 4 + 8); 3776 + *p++ = cpu_to_be32(seek->seek_eof); 3777 + p = xdr_encode_hyper(p, seek->seek_pos); 3778 + 3779 + return nfserr; 3780 + } 3781 + 3782 + static __be32 3798 3783 nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) 3799 3784 { 3800 3785 return nfserr; ··· 3882 3835 [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, 3883 3836 [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, 3884 3837 [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, 3838 + 3839 + /* NFSv4.2 operations */ 3840 + [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, 3841 + [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop, 3842 + [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, 3843 + [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, 3844 + [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, 3845 + [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, 3846 + [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, 3847 + [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, 3848 + [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop, 3849 + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, 3850 + [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, 3851 + [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, 3885 3852 }; 3886 3853 3887 3854 /*

+115 -103

fs/nfsd/nfscache.c

··· 27 27 */ 28 28 #define TARGET_BUCKET_SIZE 64 29 29 30 - static struct hlist_head * cache_hash; 31 - static struct list_head lru_head; 30 + struct nfsd_drc_bucket { 31 + struct list_head lru_head; 32 + spinlock_t cache_lock; 33 + }; 34 + 35 + static struct nfsd_drc_bucket *drc_hashtbl; 32 36 static struct kmem_cache *drc_slab; 33 37 34 38 /* max number of entries allowed in the cache */ ··· 40 36 41 37 /* number of significant bits in the hash value */ 42 38 static unsigned int maskbits; 39 + static unsigned int drc_hashsize; 43 40 44 41 /* 45 42 * Stats and other tracking of on the duplicate reply cache. All of these and ··· 48 43 */ 49 44 50 45 /* total number of entries */ 51 - static unsigned int num_drc_entries; 46 + static atomic_t num_drc_entries; 52 47 53 48 /* cache misses due only to checksum comparison failures */ 54 49 static unsigned int payload_misses; ··· 80 75 * A cache entry is "single use" if c_state == RC_INPROG 81 76 * Otherwise, it when accessing _prev or _next, the lock must be held. 82 77 */ 83 - static DEFINE_SPINLOCK(cache_lock); 84 78 static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); 85 79 86 80 /* ··· 120 116 return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); 121 117 } 122 118 119 + static u32 120 + nfsd_cache_hash(__be32 xid) 121 + { 122 + return hash_32(be32_to_cpu(xid), maskbits); 123 + } 124 + 123 125 static struct svc_cacherep * 124 126 nfsd_reply_cache_alloc(void) 125 127 { ··· 136 126 rp->c_state = RC_UNUSED; 137 127 rp->c_type = RC_NOCACHE; 138 128 INIT_LIST_HEAD(&rp->c_lru); 139 - INIT_HLIST_NODE(&rp->c_hash); 140 129 } 141 130 return rp; 142 131 } ··· 147 138 drc_mem_usage -= rp->c_replvec.iov_len; 148 139 kfree(rp->c_replvec.iov_base); 149 140 } 150 - if (!hlist_unhashed(&rp->c_hash)) 151 - hlist_del(&rp->c_hash); 152 141 list_del(&rp->c_lru); 153 - --num_drc_entries; 142 + atomic_dec(&num_drc_entries); 154 143 drc_mem_usage -= sizeof(*rp); 155 144 kmem_cache_free(drc_slab, rp); 156 145 } 157 146 158 147 static void 159 - nfsd_reply_cache_free(struct svc_cacherep *rp) 148 + nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) 160 149 { 161 - spin_lock(&cache_lock); 150 + spin_lock(&b->cache_lock); 162 151 nfsd_reply_cache_free_locked(rp); 163 - spin_unlock(&cache_lock); 152 + spin_unlock(&b->cache_lock); 164 153 } 165 154 166 155 int nfsd_reply_cache_init(void) 167 156 { 168 157 unsigned int hashsize; 158 + unsigned int i; 169 159 170 - INIT_LIST_HEAD(&lru_head); 171 160 max_drc_entries = nfsd_cache_size_limit(); 172 - num_drc_entries = 0; 161 + atomic_set(&num_drc_entries, 0); 173 162 hashsize = nfsd_hashsize(max_drc_entries); 174 163 maskbits = ilog2(hashsize); 175 164 ··· 177 170 if (!drc_slab) 178 171 goto out_nomem; 179 172 180 - cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); 181 - if (!cache_hash) 173 + drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL); 174 + if (!drc_hashtbl) 182 175 goto out_nomem; 176 + for (i = 0; i < hashsize; i++) { 177 + INIT_LIST_HEAD(&drc_hashtbl[i].lru_head); 178 + spin_lock_init(&drc_hashtbl[i].cache_lock); 179 + } 180 + drc_hashsize = hashsize; 183 181 184 182 return 0; 185 183 out_nomem: ··· 196 184 void nfsd_reply_cache_shutdown(void) 197 185 { 198 186 struct svc_cacherep *rp; 187 + unsigned int i; 199 188 200 189 unregister_shrinker(&nfsd_reply_cache_shrinker); 201 190 cancel_delayed_work_sync(&cache_cleaner); 202 191 203 - while (!list_empty(&lru_head)) { 204 - rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); 205 - nfsd_reply_cache_free_locked(rp); 192 + for (i = 0; i < drc_hashsize; i++) { 193 + struct list_head *head = &drc_hashtbl[i].lru_head; 194 + while (!list_empty(head)) { 195 + rp = list_first_entry(head, struct svc_cacherep, c_lru); 196 + nfsd_reply_cache_free_locked(rp); 197 + } 206 198 } 207 199 208 - kfree (cache_hash); 209 - cache_hash = NULL; 200 + kfree (drc_hashtbl); 201 + drc_hashtbl = NULL; 202 + drc_hashsize = 0; 210 203 211 204 if (drc_slab) { 212 205 kmem_cache_destroy(drc_slab); ··· 224 207 * not already scheduled. 225 208 */ 226 209 static void 227 - lru_put_end(struct svc_cacherep *rp) 210 + lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) 228 211 { 229 212 rp->c_timestamp = jiffies; 230 - list_move_tail(&rp->c_lru, &lru_head); 213 + list_move_tail(&rp->c_lru, &b->lru_head); 231 214 schedule_delayed_work(&cache_cleaner, RC_EXPIRE); 232 215 } 233 216 234 - /* 235 - * Move a cache entry from one hash list to another 236 - */ 237 - static void 238 - hash_refile(struct svc_cacherep *rp) 217 + static long 218 + prune_bucket(struct nfsd_drc_bucket *b) 239 219 { 240 - hlist_del_init(&rp->c_hash); 241 - /* 242 - * No point in byte swapping c_xid since we're just using it to pick 243 - * a hash bucket. 244 - */ 245 - hlist_add_head(&rp->c_hash, cache_hash + 246 - hash_32((__force u32)rp->c_xid, maskbits)); 220 + struct svc_cacherep *rp, *tmp; 221 + long freed = 0; 222 + 223 + list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) { 224 + /* 225 + * Don't free entries attached to calls that are still 226 + * in-progress, but do keep scanning the list. 227 + */ 228 + if (rp->c_state == RC_INPROG) 229 + continue; 230 + if (atomic_read(&num_drc_entries) <= max_drc_entries && 231 + time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) 232 + break; 233 + nfsd_reply_cache_free_locked(rp); 234 + freed++; 235 + } 236 + return freed; 247 237 } 248 238 249 239 /* ··· 260 236 static long 261 237 prune_cache_entries(void) 262 238 { 263 - struct svc_cacherep *rp, *tmp; 239 + unsigned int i; 264 240 long freed = 0; 241 + bool cancel = true; 265 242 266 - list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { 267 - /* 268 - * Don't free entries attached to calls that are still 269 - * in-progress, but do keep scanning the list. 270 - */ 271 - if (rp->c_state == RC_INPROG) 243 + for (i = 0; i < drc_hashsize; i++) { 244 + struct nfsd_drc_bucket *b = &drc_hashtbl[i]; 245 + 246 + if (list_empty(&b->lru_head)) 272 247 continue; 273 - if (num_drc_entries <= max_drc_entries && 274 - time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) 275 - break; 276 - nfsd_reply_cache_free_locked(rp); 277 - freed++; 248 + spin_lock(&b->cache_lock); 249 + freed += prune_bucket(b); 250 + if (!list_empty(&b->lru_head)) 251 + cancel = false; 252 + spin_unlock(&b->cache_lock); 278 253 } 279 254 280 255 /* 281 - * Conditionally rearm the job. If we cleaned out the list, then 282 - * cancel any pending run (since there won't be any work to do). 283 - * Otherwise, we rearm the job or modify the existing one to run in 284 - * RC_EXPIRE since we just ran the pruner. 256 + * Conditionally rearm the job to run in RC_EXPIRE since we just 257 + * ran the pruner. 285 258 */ 286 - if (list_empty(&lru_head)) 287 - cancel_delayed_work(&cache_cleaner); 288 - else 259 + if (!cancel) 289 260 mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); 290 261 return freed; 291 262 } ··· 288 269 static void 289 270 cache_cleaner_func(struct work_struct *unused) 290 271 { 291 - spin_lock(&cache_lock); 292 272 prune_cache_entries(); 293 - spin_unlock(&cache_lock); 294 273 } 295 274 296 275 static unsigned long 297 276 nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) 298 277 { 299 - unsigned long num; 300 - 301 - spin_lock(&cache_lock); 302 - num = num_drc_entries; 303 - spin_unlock(&cache_lock); 304 - 305 - return num; 278 + return atomic_read(&num_drc_entries); 306 279 } 307 280 308 281 static unsigned long 309 282 nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) 310 283 { 311 - unsigned long freed; 312 - 313 - spin_lock(&cache_lock); 314 - freed = prune_cache_entries(); 315 - spin_unlock(&cache_lock); 316 - return freed; 284 + return prune_cache_entries(); 317 285 } 318 286 /* 319 287 * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes ··· 338 332 static bool 339 333 nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) 340 334 { 341 - /* Check RPC header info first */ 342 - if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || 343 - rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers || 344 - rqstp->rq_arg.len != rp->c_len || 345 - !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || 346 - rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) 335 + /* Check RPC XID first */ 336 + if (rqstp->rq_xid != rp->c_xid) 347 337 return false; 348 - 349 338 /* compare checksum of NFS data */ 350 339 if (csum != rp->c_csum) { 351 340 ++payload_misses; 352 341 return false; 353 342 } 343 + 344 + /* Other discriminators */ 345 + if (rqstp->rq_proc != rp->c_proc || 346 + rqstp->rq_prot != rp->c_prot || 347 + rqstp->rq_vers != rp->c_vers || 348 + rqstp->rq_arg.len != rp->c_len || 349 + !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || 350 + rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) 351 + return false; 354 352 355 353 return true; 356 354 } ··· 365 355 * NULL on failure. 366 356 */ 367 357 static struct svc_cacherep * 368 - nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) 358 + nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp, 359 + __wsum csum) 369 360 { 370 361 struct svc_cacherep *rp, *ret = NULL; 371 - struct hlist_head *rh; 362 + struct list_head *rh = &b->lru_head; 372 363 unsigned int entries = 0; 373 364 374 - /* 375 - * No point in byte swapping rq_xid since we're just using it to pick 376 - * a hash bucket. 377 - */ 378 - rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)]; 379 - hlist_for_each_entry(rp, rh, c_hash) { 365 + list_for_each_entry(rp, rh, c_lru) { 380 366 ++entries; 381 367 if (nfsd_cache_match(rqstp, csum, rp)) { 382 368 ret = rp; ··· 383 377 /* tally hash chain length stats */ 384 378 if (entries > longest_chain) { 385 379 longest_chain = entries; 386 - longest_chain_cachesize = num_drc_entries; 380 + longest_chain_cachesize = atomic_read(&num_drc_entries); 387 381 } else if (entries == longest_chain) { 388 382 /* prefer to keep the smallest cachesize possible here */ 389 - longest_chain_cachesize = min(longest_chain_cachesize, 390 - num_drc_entries); 383 + longest_chain_cachesize = min_t(unsigned int, 384 + longest_chain_cachesize, 385 + atomic_read(&num_drc_entries)); 391 386 } 392 387 393 388 return ret; ··· 410 403 vers = rqstp->rq_vers, 411 404 proc = rqstp->rq_proc; 412 405 __wsum csum; 406 + u32 hash = nfsd_cache_hash(xid); 407 + struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; 413 408 unsigned long age; 414 409 int type = rqstp->rq_cachetype; 415 410 int rtn = RC_DOIT; ··· 429 420 * preallocate an entry. 430 421 */ 431 422 rp = nfsd_reply_cache_alloc(); 432 - spin_lock(&cache_lock); 423 + spin_lock(&b->cache_lock); 433 424 if (likely(rp)) { 434 - ++num_drc_entries; 425 + atomic_inc(&num_drc_entries); 435 426 drc_mem_usage += sizeof(*rp); 436 427 } 437 428 438 429 /* go ahead and prune the cache */ 439 - prune_cache_entries(); 430 + prune_bucket(b); 440 431 441 - found = nfsd_cache_search(rqstp, csum); 432 + found = nfsd_cache_search(b, rqstp, csum); 442 433 if (found) { 443 434 if (likely(rp)) 444 435 nfsd_reply_cache_free_locked(rp); ··· 463 454 rp->c_len = rqstp->rq_arg.len; 464 455 rp->c_csum = csum; 465 456 466 - hash_refile(rp); 467 - lru_put_end(rp); 457 + lru_put_end(b, rp); 468 458 469 459 /* release any buffer */ 470 460 if (rp->c_type == RC_REPLBUFF) { ··· 473 465 } 474 466 rp->c_type = RC_NOCACHE; 475 467 out: 476 - spin_unlock(&cache_lock); 468 + spin_unlock(&b->cache_lock); 477 469 return rtn; 478 470 479 471 found_entry: 480 472 nfsdstats.rchits++; 481 473 /* We found a matching entry which is either in progress or done. */ 482 474 age = jiffies - rp->c_timestamp; 483 - lru_put_end(rp); 475 + lru_put_end(b, rp); 484 476 485 477 rtn = RC_DROPIT; 486 478 /* Request being processed or excessive rexmits */ ··· 535 527 { 536 528 struct svc_cacherep *rp = rqstp->rq_cacherep; 537 529 struct kvec *resv = &rqstp->rq_res.head[0], *cachv; 530 + u32 hash; 531 + struct nfsd_drc_bucket *b; 538 532 int len; 539 533 size_t bufsize = 0; 540 534 541 535 if (!rp) 542 536 return; 543 537 538 + hash = nfsd_cache_hash(rp->c_xid); 539 + b = &drc_hashtbl[hash]; 540 + 544 541 len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); 545 542 len >>= 2; 546 543 547 544 /* Don't cache excessive amounts of data and XDR failures */ 548 545 if (!statp || len > (256 >> 2)) { 549 - nfsd_reply_cache_free(rp); 546 + nfsd_reply_cache_free(b, rp); 550 547 return; 551 548 } 552 549 ··· 566 553 bufsize = len << 2; 567 554 cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); 568 555 if (!cachv->iov_base) { 569 - nfsd_reply_cache_free(rp); 556 + nfsd_reply_cache_free(b, rp); 570 557 return; 571 558 } 572 559 cachv->iov_len = bufsize; 573 560 memcpy(cachv->iov_base, statp, bufsize); 574 561 break; 575 562 case RC_NOCACHE: 576 - nfsd_reply_cache_free(rp); 563 + nfsd_reply_cache_free(b, rp); 577 564 return; 578 565 } 579 - spin_lock(&cache_lock); 566 + spin_lock(&b->cache_lock); 580 567 drc_mem_usage += bufsize; 581 - lru_put_end(rp); 568 + lru_put_end(b, rp); 582 569 rp->c_secure = rqstp->rq_secure; 583 570 rp->c_type = cachetype; 584 571 rp->c_state = RC_DONE; 585 - spin_unlock(&cache_lock); 572 + spin_unlock(&b->cache_lock); 586 573 return; 587 574 } 588 575 ··· 613 600 */ 614 601 static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) 615 602 { 616 - spin_lock(&cache_lock); 617 603 seq_printf(m, "max entries: %u\n", max_drc_entries); 618 - seq_printf(m, "num entries: %u\n", num_drc_entries); 604 + seq_printf(m, "num entries: %u\n", 605 + atomic_read(&num_drc_entries)); 619 606 seq_printf(m, "hash buckets: %u\n", 1 << maskbits); 620 607 seq_printf(m, "mem usage: %u\n", drc_mem_usage); 621 608 seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); ··· 624 611 seq_printf(m, "payload misses: %u\n", payload_misses); 625 612 seq_printf(m, "longest chain len: %u\n", longest_chain); 626 613 seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); 627 - spin_unlock(&cache_lock); 628 614 return 0; 629 615 } 630 616

+45

fs/nfsd/nfsctl.c

··· 49 49 NFSD_Leasetime, 50 50 NFSD_Gracetime, 51 51 NFSD_RecoveryDir, 52 + NFSD_V4EndGrace, 52 53 #endif 53 54 }; 54 55 ··· 69 68 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 70 69 static ssize_t write_gracetime(struct file *file, char *buf, size_t size); 71 70 static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); 71 + static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size); 72 72 #endif 73 73 74 74 static ssize_t (*write_op[])(struct file *, char *, size_t) = { ··· 86 84 [NFSD_Leasetime] = write_leasetime, 87 85 [NFSD_Gracetime] = write_gracetime, 88 86 [NFSD_RecoveryDir] = write_recoverydir, 87 + [NFSD_V4EndGrace] = write_v4_end_grace, 89 88 #endif 90 89 }; 91 90 ··· 1080 1077 return rv; 1081 1078 } 1082 1079 1080 + /** 1081 + * write_v4_end_grace - release grace period for nfsd's v4.x lock manager 1082 + * 1083 + * Input: 1084 + * buf: ignored 1085 + * size: zero 1086 + * OR 1087 + * 1088 + * Input: 1089 + * buf: any value 1090 + * size: non-zero length of C string in @buf 1091 + * Output: 1092 + * passed-in buffer filled with "Y" or "N" with a newline 1093 + * and NULL-terminated C string. This indicates whether 1094 + * the grace period has ended in the current net 1095 + * namespace. Return code is the size in bytes of the 1096 + * string. Writing a string that starts with 'Y', 'y', or 1097 + * '1' to the file will end the grace period for nfsd's v4 1098 + * lock manager. 1099 + */ 1100 + static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) 1101 + { 1102 + struct net *net = file->f_dentry->d_sb->s_fs_info; 1103 + struct nfsd_net *nn = net_generic(net, nfsd_net_id); 1104 + 1105 + if (size > 0) { 1106 + switch(buf[0]) { 1107 + case 'Y': 1108 + case 'y': 1109 + case '1': 1110 + nfsd4_end_grace(nn); 1111 + break; 1112 + default: 1113 + return -EINVAL; 1114 + } 1115 + } 1116 + 1117 + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%c\n", 1118 + nn->grace_ended ? 'Y' : 'N'); 1119 + } 1120 + 1083 1121 #endif 1084 1122 1085 1123 /*----------------------------------------------------------------------------*/ ··· 1154 1110 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1155 1111 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1156 1112 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, 1113 + [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO}, 1157 1114 #endif 1158 1115 /* last one */ {""} 1159 1116 };

+1 -1

fs/nfsd/nfsd.h

··· 251 251 #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) 252 252 #define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) 253 253 #define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) 254 - #define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) 254 + #define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP) 255 255 #define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) 256 256 #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) 257 257 #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL)

+4 -2

fs/nfsd/nfsfh.c

··· 209 209 * fix that case easily. 210 210 */ 211 211 struct cred *new = prepare_creds(); 212 - if (!new) 213 - return nfserrno(-ENOMEM); 212 + if (!new) { 213 + error = nfserrno(-ENOMEM); 214 + goto out; 215 + } 214 216 new->cap_effective = 215 217 cap_raise_nfsd_set(new->cap_effective, 216 218 new->cap_permitted);

+25 -6

fs/nfsd/state.h

··· 62 62 (s)->si_generation 63 63 64 64 struct nfsd4_callback { 65 - void *cb_op; 66 65 struct nfs4_client *cb_clp; 67 66 struct list_head cb_per_client; 68 67 u32 cb_minorversion; 69 68 struct rpc_message cb_msg; 70 - const struct rpc_call_ops *cb_ops; 69 + struct nfsd4_callback_ops *cb_ops; 71 70 struct work_struct cb_work; 72 71 bool cb_done; 72 + }; 73 + 74 + struct nfsd4_callback_ops { 75 + void (*prepare)(struct nfsd4_callback *); 76 + int (*done)(struct nfsd4_callback *, struct rpc_task *); 77 + void (*release)(struct nfsd4_callback *); 73 78 }; 74 79 75 80 /* ··· 131 126 int dl_retries; 132 127 struct nfsd4_callback dl_recall; 133 128 }; 129 + 130 + #define cb_to_delegation(cb) \ 131 + container_of(cb, struct nfs4_delegation, dl_recall) 134 132 135 133 /* client delegation callback info */ 136 134 struct nfs4_cb_conn { ··· 314 306 #define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ 315 307 #define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ 316 308 #define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ 309 + #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ 317 310 #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 318 311 1 << NFSD4_CLIENT_CB_KILL) 319 312 unsigned long cl_flags; ··· 526 517 #define RD_STATE 0x00000010 527 518 #define WR_STATE 0x00000020 528 519 520 + enum nfsd4_cb_op { 521 + NFSPROC4_CLNT_CB_NULL = 0, 522 + NFSPROC4_CLNT_CB_RECALL, 523 + NFSPROC4_CLNT_CB_SEQUENCE, 524 + }; 525 + 526 + 529 527 struct nfsd4_compound_state; 530 528 struct nfsd_net; 531 529 ··· 547 531 extern __be32 nfs4_check_open_reclaim(clientid_t *clid, 548 532 struct nfsd4_compound_state *cstate, struct nfsd_net *nn); 549 533 extern int set_callback_cred(void); 550 - void nfsd4_run_cb_null(struct work_struct *w); 551 - void nfsd4_run_cb_recall(struct work_struct *w); 552 534 extern void nfsd4_probe_callback(struct nfs4_client *clp); 553 535 extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); 554 536 extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); 555 - extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 537 + extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, 538 + struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); 539 + extern void nfsd4_run_cb(struct nfsd4_callback *cb); 556 540 extern int nfsd4_create_callback_queue(void); 557 541 extern void nfsd4_destroy_callback_queue(void); 558 542 extern void nfsd4_shutdown_callback(struct nfs4_client *); ··· 561 545 struct nfsd_net *nn); 562 546 extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); 563 547 548 + /* grace period management */ 549 + void nfsd4_end_grace(struct nfsd_net *nn); 550 + 564 551 /* nfs4recover operations */ 565 552 extern int nfsd4_client_tracking_init(struct net *net); 566 553 extern void nfsd4_client_tracking_exit(struct net *net); 567 554 extern void nfsd4_client_record_create(struct nfs4_client *clp); 568 555 extern void nfsd4_client_record_remove(struct nfs4_client *clp); 569 556 extern int nfsd4_client_record_check(struct nfs4_client *clp); 570 - extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); 557 + extern void nfsd4_record_grace_done(struct nfsd_net *nn); 571 558 572 559 /* nfs fault injection functions */ 573 560 #ifdef CONFIG_NFSD_FAULT_INJECTION

+27 -10

fs/nfsd/vfs.c

··· 445 445 if (err) 446 446 goto out; 447 447 size_change = 1; 448 + 449 + /* 450 + * RFC5661, Section 18.30.4: 451 + * Changing the size of a file with SETATTR indirectly 452 + * changes the time_modify and change attributes. 453 + * 454 + * (and similar for the older RFCs) 455 + */ 456 + if (iap->ia_size != i_size_read(inode)) 457 + iap->ia_valid |= ATTR_MTIME; 448 458 } 449 459 450 460 iap->ia_valid |= ATTR_CTIME; ··· 659 649 { 660 650 struct path path; 661 651 struct inode *inode; 652 + struct file *file; 662 653 int flags = O_RDONLY|O_LARGEFILE; 663 654 __be32 err; 664 655 int host_err = 0; ··· 714 703 else 715 704 flags = O_WRONLY|O_LARGEFILE; 716 705 } 717 - *filp = dentry_open(&path, flags, current_cred()); 718 - if (IS_ERR(*filp)) { 719 - host_err = PTR_ERR(*filp); 720 - *filp = NULL; 721 - } else { 722 - host_err = ima_file_check(*filp, may_flags); 723 706 724 - if (may_flags & NFSD_MAY_64BIT_COOKIE) 725 - (*filp)->f_mode |= FMODE_64BITHASH; 726 - else 727 - (*filp)->f_mode |= FMODE_32BITHASH; 707 + file = dentry_open(&path, flags, current_cred()); 708 + if (IS_ERR(file)) { 709 + host_err = PTR_ERR(file); 710 + goto out_nfserr; 728 711 } 729 712 713 + host_err = ima_file_check(file, may_flags); 714 + if (host_err) { 715 + nfsd_close(file); 716 + goto out_nfserr; 717 + } 718 + 719 + if (may_flags & NFSD_MAY_64BIT_COOKIE) 720 + file->f_mode |= FMODE_64BITHASH; 721 + else 722 + file->f_mode |= FMODE_32BITHASH; 723 + 724 + *filp = file; 730 725 out_nfserr: 731 726 err = nfserrno(host_err); 732 727 out:

+14

fs/nfsd/xdr4.h

··· 428 428 u32 rca_one_fs; 429 429 }; 430 430 431 + struct nfsd4_seek { 432 + /* request */ 433 + stateid_t seek_stateid; 434 + loff_t seek_offset; 435 + u32 seek_whence; 436 + 437 + /* response */ 438 + u32 seek_eof; 439 + loff_t seek_pos; 440 + }; 441 + 431 442 struct nfsd4_op { 432 443 int opnum; 433 444 __be32 status; ··· 484 473 struct nfsd4_reclaim_complete reclaim_complete; 485 474 struct nfsd4_test_stateid test_stateid; 486 475 struct nfsd4_free_stateid free_stateid; 476 + 477 + /* NFSv4.2 */ 478 + struct nfsd4_seek seek; 487 479 } u; 488 480 struct nfs4_replay * replay; 489 481 };

+23 -3

include/linux/nfs4.h

··· 110 110 OP_DESTROY_CLIENTID = 57, 111 111 OP_RECLAIM_COMPLETE = 58, 112 112 113 + /* nfs42 */ 114 + OP_ALLOCATE = 59, 115 + OP_COPY = 60, 116 + OP_COPY_NOTIFY = 61, 117 + OP_DEALLOCATE = 62, 118 + OP_IO_ADVISE = 63, 119 + OP_LAYOUTERROR = 64, 120 + OP_LAYOUTSTATS = 65, 121 + OP_OFFLOAD_CANCEL = 66, 122 + OP_OFFLOAD_STATUS = 67, 123 + OP_READ_PLUS = 68, 124 + OP_SEEK = 69, 125 + OP_WRITE_SAME = 70, 126 + 113 127 OP_ILLEGAL = 10044, 114 128 }; 115 129 ··· 131 117 Needs to be updated if more operations are defined in future.*/ 132 118 133 119 #define FIRST_NFS4_OP OP_ACCESS 134 - #define LAST_NFS4_OP OP_RECLAIM_COMPLETE 120 + #define LAST_NFS4_OP OP_WRITE_SAME 135 121 #define LAST_NFS40_OP OP_RELEASE_LOCKOWNER 136 122 #define LAST_NFS41_OP OP_RECLAIM_COMPLETE 137 - #define LAST_NFS42_OP OP_RECLAIM_COMPLETE 123 + #define LAST_NFS42_OP OP_WRITE_SAME 138 124 139 125 enum nfsstat4 { 140 126 NFS4_OK = 0, ··· 249 235 /* nfs42 */ 250 236 NFS4ERR_PARTNER_NOTSUPP = 10088, 251 237 NFS4ERR_PARTNER_NO_AUTH = 10089, 252 - NFS4ERR_METADATA_NOTSUPP = 10090, 238 + NFS4ERR_UNION_NOTSUPP = 10090, 253 239 NFS4ERR_OFFLOAD_DENIED = 10091, 254 240 NFS4ERR_WRONG_LFS = 10092, 255 241 NFS4ERR_BADLABEL = 10093, 242 + NFS4ERR_OFFLOAD_NO_REQS = 10094, 256 243 }; 257 244 258 245 static inline bool seqid_mutating_err(u32 err) ··· 548 533 549 534 struct nfs4_deviceid { 550 535 char data[NFS4_DEVICEID4_SIZE]; 536 + }; 537 + 538 + enum data_content4 { 539 + NFS4_CONTENT_DATA = 0, 540 + NFS4_CONTENT_HOLE = 1, 551 541 }; 552 542 553 543 #endif

+2

include/linux/proc_fs.h

··· 74 74 75 75 #endif /* CONFIG_PROC_FS */ 76 76 77 + struct net; 78 + 77 79 static inline struct proc_dir_entry *proc_net_mkdir( 78 80 struct net *net, const char *name, struct proc_dir_entry *parent) 79 81 {

-1

include/linux/sunrpc/svc.h

··· 280 280 bool rq_splice_ok; /* turned off in gss privacy 281 281 * to prevent encrypting page 282 282 * cache pages */ 283 - wait_queue_head_t rq_wait; /* synchronization */ 284 283 struct task_struct *rq_task; /* service thread */ 285 284 }; 286 285

+3 -2

include/uapi/linux/nfsd/export.h

··· 28 28 #define NFSEXP_ALLSQUASH 0x0008 29 29 #define NFSEXP_ASYNC 0x0010 30 30 #define NFSEXP_GATHERED_WRITES 0x0020 31 - /* 40 80 100 currently unused */ 31 + #define NFSEXP_NOREADDIRPLUS 0x0040 32 + /* 80 100 currently unused */ 32 33 #define NFSEXP_NOHIDE 0x0200 33 34 #define NFSEXP_NOSUBTREECHECK 0x0400 34 35 #define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */ ··· 48 47 */ 49 48 #define NFSEXP_V4ROOT 0x10000 50 49 /* All flags that we claim to support. (Note we don't support NOACL.) */ 51 - #define NFSEXP_ALLFLAGS 0x17E3F 50 + #define NFSEXP_ALLFLAGS 0x1FE7F 52 51 53 52 /* The flags that may vary depending on security flavor: */ 54 53 #define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \

-2

net/sunrpc/svc.c

··· 612 612 if (!rqstp) 613 613 goto out_enomem; 614 614 615 - init_waitqueue_head(&rqstp->rq_wait); 616 - 617 615 serv->sv_nrthreads++; 618 616 spin_lock_bh(&pool->sp_lock); 619 617 pool->sp_nrthreads++;

+33 -48

net/sunrpc/svc_xprt.c

··· 346 346 if (!svc_xprt_has_something_to_do(xprt)) 347 347 return; 348 348 349 - cpu = get_cpu(); 350 - pool = svc_pool_for_cpu(xprt->xpt_server, cpu); 351 - put_cpu(); 352 - 353 - spin_lock_bh(&pool->sp_lock); 354 - 355 - if (!list_empty(&pool->sp_threads) && 356 - !list_empty(&pool->sp_sockets)) 357 - printk(KERN_ERR 358 - "svc_xprt_enqueue: " 359 - "threads and transports both waiting??\n"); 360 - 361 - pool->sp_stats.packets++; 362 - 363 349 /* Mark transport as busy. It will remain in this state until 364 350 * the provider calls svc_xprt_received. We update XPT_BUSY 365 351 * atomically because it also guards against trying to enqueue ··· 354 368 if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { 355 369 /* Don't enqueue transport while already enqueued */ 356 370 dprintk("svc: transport %p busy, not enqueued\n", xprt); 357 - goto out_unlock; 371 + return; 358 372 } 373 + 374 + cpu = get_cpu(); 375 + pool = svc_pool_for_cpu(xprt->xpt_server, cpu); 376 + spin_lock_bh(&pool->sp_lock); 377 + 378 + pool->sp_stats.packets++; 359 379 360 380 if (!list_empty(&pool->sp_threads)) { 361 381 rqstp = list_entry(pool->sp_threads.next, ··· 374 382 printk(KERN_ERR 375 383 "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", 376 384 rqstp, rqstp->rq_xprt); 377 - rqstp->rq_xprt = xprt; 385 + /* Note the order of the following 3 lines: 386 + * We want to assign xprt to rqstp->rq_xprt only _after_ 387 + * we've woken up the process, so that we don't race with 388 + * the lockless check in svc_get_next_xprt(). 389 + */ 378 390 svc_xprt_get(xprt); 391 + wake_up_process(rqstp->rq_task); 392 + rqstp->rq_xprt = xprt; 379 393 pool->sp_stats.threads_woken++; 380 - wake_up(&rqstp->rq_wait); 381 394 } else { 382 395 dprintk("svc: transport %p put into queue\n", xprt); 383 396 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); 384 397 pool->sp_stats.sockets_queued++; 385 398 } 386 399 387 - out_unlock: 388 400 spin_unlock_bh(&pool->sp_lock); 401 + put_cpu(); 389 402 } 390 403 391 404 /* ··· 506 509 svc_thread_dequeue(pool, rqstp); 507 510 rqstp->rq_xprt = NULL; 508 511 */ 509 - wake_up(&rqstp->rq_wait); 512 + wake_up_process(rqstp->rq_task); 510 513 } else 511 514 pool->sp_task_pending = 1; 512 515 spin_unlock_bh(&pool->sp_lock); ··· 625 628 { 626 629 struct svc_xprt *xprt; 627 630 struct svc_pool *pool = rqstp->rq_pool; 628 - DECLARE_WAITQUEUE(wait, current); 629 - long time_left; 631 + long time_left = 0; 630 632 631 633 /* Normally we will wait up to 5 seconds for any required 632 634 * cache information to be provided. ··· 647 651 } else { 648 652 if (pool->sp_task_pending) { 649 653 pool->sp_task_pending = 0; 650 - spin_unlock_bh(&pool->sp_lock); 651 - return ERR_PTR(-EAGAIN); 654 + xprt = ERR_PTR(-EAGAIN); 655 + goto out; 652 656 } 653 - /* No data pending. Go to sleep */ 654 - svc_thread_enqueue(pool, rqstp); 655 - 656 657 /* 657 658 * We have to be able to interrupt this wait 658 659 * to bring down the daemons ... 659 660 */ 660 661 set_current_state(TASK_INTERRUPTIBLE); 661 662 662 - /* 663 - * checking kthread_should_stop() here allows us to avoid 664 - * locking and signalling when stopping kthreads that call 665 - * svc_recv. If the thread has already been woken up, then 666 - * we can exit here without sleeping. If not, then it 667 - * it'll be woken up quickly during the schedule_timeout 668 - */ 669 - if (kthread_should_stop()) { 670 - set_current_state(TASK_RUNNING); 671 - spin_unlock_bh(&pool->sp_lock); 672 - return ERR_PTR(-EINTR); 673 - } 674 - 675 - add_wait_queue(&rqstp->rq_wait, &wait); 663 + /* No data pending. Go to sleep */ 664 + svc_thread_enqueue(pool, rqstp); 676 665 spin_unlock_bh(&pool->sp_lock); 677 666 678 - time_left = schedule_timeout(timeout); 667 + if (!(signalled() || kthread_should_stop())) { 668 + time_left = schedule_timeout(timeout); 669 + __set_current_state(TASK_RUNNING); 679 670 680 - try_to_freeze(); 671 + try_to_freeze(); 672 + 673 + xprt = rqstp->rq_xprt; 674 + if (xprt != NULL) 675 + return xprt; 676 + } else 677 + __set_current_state(TASK_RUNNING); 681 678 682 679 spin_lock_bh(&pool->sp_lock); 683 - remove_wait_queue(&rqstp->rq_wait, &wait); 684 680 if (!time_left) 685 681 pool->sp_stats.threads_timedout++; 686 682 ··· 687 699 return ERR_PTR(-EAGAIN); 688 700 } 689 701 } 702 + out: 690 703 spin_unlock_bh(&pool->sp_lock); 691 704 return xprt; 692 705 } ··· 733 744 svc_add_new_temp_xprt(serv, newxpt); 734 745 else 735 746 module_put(xprt->xpt_class->xcl_owner); 736 - } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { 747 + } else { 737 748 /* XPT_DATA|XPT_DEFERRED case: */ 738 749 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", 739 750 rqstp, rqstp->rq_pool->sp_id, xprt, ··· 769 780 if (rqstp->rq_xprt) 770 781 printk(KERN_ERR 771 782 "svc_recv: service %p, transport not NULL!\n", 772 - rqstp); 773 - if (waitqueue_active(&rqstp->rq_wait)) 774 - printk(KERN_ERR 775 - "svc_recv: service %p, wait queue active!\n", 776 783 rqstp); 777 784 778 785 err = svc_alloc_arg(rqstp);

+7 -18

net/sunrpc/svcsock.c

··· 312 312 } 313 313 314 314 /* 315 - * Check input queue length 316 - */ 317 - static int svc_recv_available(struct svc_sock *svsk) 318 - { 319 - struct socket *sock = svsk->sk_sock; 320 - int avail, err; 321 - 322 - err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); 323 - 324 - return (err >= 0)? avail : err; 325 - } 326 - 327 - /* 328 315 * Generic recvfrom routine. 329 316 */ 330 317 static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, ··· 326 339 327 340 rqstp->rq_xprt_hlen = 0; 328 341 342 + clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 329 343 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, 330 344 msg.msg_flags); 345 + /* If we read a full record, then assume there may be more 346 + * data to read (stream based sockets only!) 347 + */ 348 + if (len == buflen) 349 + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 331 350 332 351 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", 333 352 svsk, iov[0].iov_base, iov[0].iov_len, len); ··· 973 980 unsigned int want; 974 981 int len; 975 982 976 - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 977 - 978 983 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 979 984 struct kvec iov; 980 985 ··· 1027 1036 "%s: Got unrecognized reply: " 1028 1037 "calldir 0x%x xpt_bc_xprt %p xid %08x\n", 1029 1038 __func__, ntohl(calldir), 1030 - bc_xprt, xid); 1039 + bc_xprt, ntohl(xid)); 1031 1040 return -EAGAIN; 1032 1041 } 1033 1042 ··· 1064 1073 static void svc_tcp_fragment_received(struct svc_sock *svsk) 1065 1074 { 1066 1075 /* If we have more data, signal svc_xprt_enqueue() to try again */ 1067 - if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) 1068 - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1069 1076 dprintk("svc: TCP %s record (%d bytes)\n", 1070 1077 svc_sock_final_rec(svsk) ? "final" : "nonfinal", 1071 1078 svc_sock_reclen(svsk));

+1 -1

net/sunrpc/xprtrdma/svc_rdma_transport.c

··· 91 91 .xcl_name = "rdma", 92 92 .xcl_owner = THIS_MODULE, 93 93 .xcl_ops = &svc_rdma_ops, 94 - .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, 94 + .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA, 95 95 .xcl_ident = XPRT_TRANSPORT_RDMA, 96 96 }; 97 97

+7

net/sunrpc/xprtrdma/xprt_rdma.h

··· 51 51 #include <linux/sunrpc/clnt.h> /* rpc_xprt */ 52 52 #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ 53 53 #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ 54 + #include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */ 54 55 55 56 #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ 56 57 #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ ··· 392 391 extern struct kmem_cache *svc_rdma_ctxt_cachep; 393 392 /* Workqueue created in svc_rdma.c */ 394 393 extern struct workqueue_struct *svc_rdma_wq; 394 + 395 + #if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT) 396 + #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD 397 + #else 398 + #define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT) 399 + #endif 395 400 396 401 #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */