Merge tag 'nfsd-4.3' of git://linux-nfs.org/~bfields/linux

+7 -9

Documentation/filesystems/nfs/nfs-rdma.txt

··· 138 138 - Build, install, reboot 139 139 140 140 The NFS/RDMA code will be enabled automatically if NFS and RDMA 141 - are turned on. The NFS/RDMA client and server are configured via the 142 - SUNRPC_XPRT_RDMA_CLIENT and SUNRPC_XPRT_RDMA_SERVER config options that both 143 - depend on SUNRPC and INFINIBAND. The default value of both options will be: 141 + are turned on. The NFS/RDMA client and server are configured via the hidden 142 + SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The 143 + value of SUNRPC_XPRT_RDMA will be: 144 144 145 145 - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client 146 146 and server will not be built ··· 238 238 239 239 - Start the NFS server 240 240 241 - If the NFS/RDMA server was built as a module 242 - (CONFIG_SUNRPC_XPRT_RDMA_SERVER=m in kernel config), load the RDMA 243 - transport module: 241 + If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in 242 + kernel config), load the RDMA transport module: 244 243 245 244 $ modprobe svcrdma 246 245 ··· 258 259 259 260 - On the client system 260 261 261 - If the NFS/RDMA client was built as a module 262 - (CONFIG_SUNRPC_XPRT_RDMA_CLIENT=m in kernel config), load the RDMA client 263 - module: 262 + If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in 263 + kernel config), load the RDMA client module: 264 264 265 265 $ modprobe xprtrdma.ko 266 266

+7 -1

fs/lockd/svc.c

··· 322 322 return error; 323 323 } 324 324 325 + static struct svc_serv_ops lockd_sv_ops = { 326 + .svo_shutdown = svc_rpcb_cleanup, 327 + .svo_enqueue_xprt = svc_xprt_do_enqueue, 328 + }; 329 + 325 330 static struct svc_serv *lockd_create_svc(void) 326 331 { 327 332 struct svc_serv *serv; ··· 355 350 nlm_timeout = LOCKD_DFLT_TIMEO; 356 351 nlmsvc_timeout = nlm_timeout * HZ; 357 352 358 - serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, svc_rpcb_cleanup); 353 + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops); 359 354 if (!serv) { 360 355 printk(KERN_WARNING "lockd_up: create service failed\n"); 361 356 return ERR_PTR(-ENOMEM); ··· 591 586 592 587 INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); 593 588 INIT_LIST_HEAD(&ln->lockd_manager.list); 589 + ln->lockd_manager.block_opens = false; 594 590 spin_lock_init(&ln->nsm_clnt_lock); 595 591 return 0; 596 592 }

+1

fs/locks.c

··· 1568 1568 * desired lease. 1569 1569 * @dentry: dentry to check 1570 1570 * @arg: type of lease that we're trying to acquire 1571 + * @flags: current lock flags 1571 1572 * 1572 1573 * Check to see if there's an existing open fd on this file that would 1573 1574 * conflict with the lease we're trying to set.

+5 -1

fs/nfs/callback.c

··· 308 308 return ret; 309 309 } 310 310 311 + static struct svc_serv_ops nfs_cb_sv_ops = { 312 + .svo_enqueue_xprt = svc_xprt_do_enqueue, 313 + }; 314 + 311 315 static struct svc_serv *nfs_callback_create_svc(int minorversion) 312 316 { 313 317 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; ··· 337 333 printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", 338 334 cb_info->users); 339 335 340 - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); 336 + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, &nfs_cb_sv_ops); 341 337 if (!serv) { 342 338 printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); 343 339 return ERR_PTR(-ENOMEM);

+21 -2

fs/nfs_common/grace.c

··· 63 63 * lock reclaims. 64 64 */ 65 65 int 66 - locks_in_grace(struct net *net) 66 + __state_in_grace(struct net *net, bool open) 67 67 { 68 68 struct list_head *grace_list = net_generic(net, grace_net_id); 69 + struct lock_manager *lm; 69 70 70 - return !list_empty(grace_list); 71 + if (!open) 72 + return !list_empty(grace_list); 73 + 74 + list_for_each_entry(lm, grace_list, list) { 75 + if (lm->block_opens) 76 + return true; 77 + } 78 + return false; 79 + } 80 + 81 + int locks_in_grace(struct net *net) 82 + { 83 + return __state_in_grace(net, 0); 71 84 } 72 85 EXPORT_SYMBOL_GPL(locks_in_grace); 86 + 87 + int opens_in_grace(struct net *net) 88 + { 89 + return __state_in_grace(net, 1); 90 + } 91 + EXPORT_SYMBOL_GPL(opens_in_grace); 73 92 74 93 static int __net_init 75 94 grace_init_net(struct net *net)

+3 -70

fs/nfsd/export.c

··· 1075 1075 return rv; 1076 1076 } 1077 1077 1078 - /* Iterator */ 1079 - 1080 - static void *e_start(struct seq_file *m, loff_t *pos) 1081 - __acquires(((struct cache_detail *)m->private)->hash_lock) 1082 - { 1083 - loff_t n = *pos; 1084 - unsigned hash, export; 1085 - struct cache_head *ch; 1086 - struct cache_detail *cd = m->private; 1087 - struct cache_head **export_table = cd->hash_table; 1088 - 1089 - read_lock(&cd->hash_lock); 1090 - if (!n--) 1091 - return SEQ_START_TOKEN; 1092 - hash = n >> 32; 1093 - export = n & ((1LL<<32) - 1); 1094 - 1095 - 1096 - for (ch=export_table[hash]; ch; ch=ch->next) 1097 - if (!export--) 1098 - return ch; 1099 - n &= ~((1LL<<32) - 1); 1100 - do { 1101 - hash++; 1102 - n += 1LL<<32; 1103 - } while(hash < EXPORT_HASHMAX && export_table[hash]==NULL); 1104 - if (hash >= EXPORT_HASHMAX) 1105 - return NULL; 1106 - *pos = n+1; 1107 - return export_table[hash]; 1108 - } 1109 - 1110 - static void *e_next(struct seq_file *m, void *p, loff_t *pos) 1111 - { 1112 - struct cache_head *ch = p; 1113 - int hash = (*pos >> 32); 1114 - struct cache_detail *cd = m->private; 1115 - struct cache_head **export_table = cd->hash_table; 1116 - 1117 - if (p == SEQ_START_TOKEN) 1118 - hash = 0; 1119 - else if (ch->next == NULL) { 1120 - hash++; 1121 - *pos += 1LL<<32; 1122 - } else { 1123 - ++*pos; 1124 - return ch->next; 1125 - } 1126 - *pos &= ~((1LL<<32) - 1); 1127 - while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) { 1128 - hash++; 1129 - *pos += 1LL<<32; 1130 - } 1131 - if (hash >= EXPORT_HASHMAX) 1132 - return NULL; 1133 - ++*pos; 1134 - return export_table[hash]; 1135 - } 1136 - 1137 - static void e_stop(struct seq_file *m, void *p) 1138 - __releases(((struct cache_detail *)m->private)->hash_lock) 1139 - { 1140 - struct cache_detail *cd = m->private; 1141 - 1142 - read_unlock(&cd->hash_lock); 1143 - } 1144 - 1145 1078 static struct flags { 1146 1079 int flag; 1147 1080 char *name[2]; ··· 1203 1270 } 1204 1271 1205 1272 const struct seq_operations nfs_exports_op = { 1206 - .start = e_start, 1207 - .next = e_next, 1208 - .stop = e_stop, 1273 + .start = cache_seq_start, 1274 + .next = cache_seq_next, 1275 + .stop = cache_seq_stop, 1209 1276 .show = e_show, 1210 1277 }; 1211 1278

+1

fs/nfsd/export.h

··· 6 6 7 7 #include <linux/sunrpc/cache.h> 8 8 #include <uapi/linux/nfsd/export.h> 9 + #include <linux/nfs4.h> 9 10 10 11 struct knfsd_fh; 11 12 struct svc_fh;

+1 -3

fs/nfsd/idmap.h

··· 37 37 38 38 #include <linux/in.h> 39 39 #include <linux/sunrpc/svc.h> 40 - 41 - /* XXX from linux/nfs_idmap.h */ 42 - #define IDMAP_NAMESZ 128 40 + #include <linux/nfs_idmap.h> 43 41 44 42 #ifdef CONFIG_NFSD_V4 45 43 int nfsd_idmap_init(struct net *);

+1

fs/nfsd/netns.h

··· 110 110 unsigned int max_connections; 111 111 112 112 u32 clientid_counter; 113 + u32 clverifier_counter; 113 114 114 115 struct svc_serv *nfsd_serv; 115 116 };

+4 -6

fs/nfsd/nfs2acl.c

··· 44 44 45 45 inode = d_inode(fh->fh_dentry); 46 46 47 - if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 47 + if (argp->mask & ~NFS_ACL_MASK) 48 48 RETURN_STATUS(nfserr_inval); 49 49 resp->mask = argp->mask; 50 50 51 51 nfserr = fh_getattr(fh, &resp->stat); 52 52 if (nfserr) 53 - goto fail; 53 + RETURN_STATUS(nfserr); 54 54 55 55 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { 56 56 acl = get_acl(inode, ACL_TYPE_ACCESS); ··· 202 202 if (!p) 203 203 return 0; 204 204 argp->mask = ntohl(*p++); 205 - if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || 205 + if (argp->mask & ~NFS_ACL_MASK || 206 206 !xdr_argsize_check(rqstp, p)) 207 207 return 0; 208 208 ··· 293 293 resp->acl_default, 294 294 resp->mask & NFS_DFACL, 295 295 NFS_ACL_DEFAULT); 296 - if (n <= 0) 297 - return 0; 298 - return 1; 296 + return (n > 0); 299 297 } 300 298 301 299 static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,

+2 -2

fs/nfsd/nfs3acl.c

··· 41 41 42 42 inode = d_inode(fh->fh_dentry); 43 43 44 - if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 44 + if (argp->mask & ~NFS_ACL_MASK) 45 45 RETURN_STATUS(nfserr_inval); 46 46 resp->mask = argp->mask; 47 47 ··· 148 148 if (!p) 149 149 return 0; 150 150 args->mask = ntohl(*p++); 151 - if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || 151 + if (args->mask & ~NFS_ACL_MASK || 152 152 !xdr_argsize_check(rqstp, p)) 153 153 return 0; 154 154

+5 -3

fs/nfsd/nfs4acl.c

··· 34 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 35 */ 36 36 37 + #include <linux/fs.h> 37 38 #include <linux/slab.h> 38 - #include <linux/nfs_fs.h> 39 + #include <linux/posix_acl.h> 40 + 39 41 #include "nfsfh.h" 40 42 #include "nfsd.h" 41 43 #include "acl.h" ··· 102 100 /* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the 103 101 * side of being more restrictive, so the mode bit mapping below is 104 102 * pessimistic. An optimistic version would be needed to handle DENY's, 105 - * but we espect to coalesce all ALLOWs and DENYs before mapping to mode 103 + * but we expect to coalesce all ALLOWs and DENYs before mapping to mode 106 104 * bits. */ 107 105 108 106 static void ··· 460 458 state->empty = 1; 461 459 /* 462 460 * In the worst case, each individual acl could be for a distinct 463 - * named user or group, but we don't no which, so we allocate 461 + * named user or group, but we don't know which, so we allocate 464 462 * enough space for either: 465 463 */ 466 464 alloc = sizeof(struct posix_ace_state_array)

+92 -40

fs/nfsd/nfs4callback.c

··· 435 435 */ 436 436 status = 0; 437 437 out: 438 - if (status) 439 - nfsd4_mark_cb_fault(cb->cb_clp, status); 438 + cb->cb_seq_status = status; 440 439 return status; 441 440 out_overflow: 442 441 print_overflow_msg(__func__, xdr); 443 - return -EIO; 442 + status = -EIO; 443 + goto out; 444 444 } 445 445 446 446 static int decode_cb_sequence4res(struct xdr_stream *xdr, ··· 451 451 if (cb->cb_minorversion == 0) 452 452 return 0; 453 453 454 - status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status); 455 - if (unlikely(status || cb->cb_status)) 454 + status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status); 455 + if (unlikely(status || cb->cb_seq_status)) 456 456 return status; 457 457 458 - cb->cb_update_seq_nr = true; 459 458 return decode_cb_sequence4resok(xdr, cb); 460 459 } 461 460 ··· 526 527 527 528 if (cb != NULL) { 528 529 status = decode_cb_sequence4res(xdr, cb); 529 - if (unlikely(status || cb->cb_status)) 530 + if (unlikely(status || cb->cb_seq_status)) 530 531 return status; 531 532 } 532 533 ··· 616 617 617 618 if (cb) { 618 619 status = decode_cb_sequence4res(xdr, cb); 619 - if (unlikely(status || cb->cb_status)) 620 + if (unlikely(status || cb->cb_seq_status)) 620 621 return status; 621 622 } 622 623 return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status); ··· 875 876 u32 minorversion = clp->cl_minorversion; 876 877 877 878 cb->cb_minorversion = minorversion; 878 - cb->cb_update_seq_nr = false; 879 + /* 880 + * cb_seq_status is only set in decode_cb_sequence4res, 881 + * and so will remain 1 if an rpc level failure occurs. 882 + */ 883 + cb->cb_seq_status = 1; 879 884 cb->cb_status = 0; 880 885 if (minorversion) { 881 886 if (!nfsd41_cb_get_slot(clp, task)) 882 887 return; 883 888 } 884 889 rpc_call_start(task); 890 + } 891 + 892 + static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb) 893 + { 894 + struct nfs4_client *clp = cb->cb_clp; 895 + struct nfsd4_session *session = clp->cl_cb_session; 896 + bool ret = true; 897 + 898 + if (!clp->cl_minorversion) { 899 + /* 900 + * If the backchannel connection was shut down while this 901 + * task was queued, we need to resubmit it after setting up 902 + * a new backchannel connection. 903 + * 904 + * Note that if we lost our callback connection permanently 905 + * the submission code will error out, so we don't need to 906 + * handle that case here. 907 + */ 908 + if (task->tk_flags & RPC_TASK_KILLED) 909 + goto need_restart; 910 + 911 + return true; 912 + } 913 + 914 + switch (cb->cb_seq_status) { 915 + case 0: 916 + /* 917 + * No need for lock, access serialized in nfsd4_cb_prepare 918 + * 919 + * RFC5661 20.9.3 920 + * If CB_SEQUENCE returns an error, then the state of the slot 921 + * (sequence ID, cached reply) MUST NOT change. 922 + */ 923 + ++session->se_cb_seq_nr; 924 + break; 925 + case -ESERVERFAULT: 926 + ++session->se_cb_seq_nr; 927 + case 1: 928 + case -NFS4ERR_BADSESSION: 929 + nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); 930 + ret = false; 931 + break; 932 + case -NFS4ERR_DELAY: 933 + if (!rpc_restart_call(task)) 934 + goto out; 935 + 936 + rpc_delay(task, 2 * HZ); 937 + return false; 938 + case -NFS4ERR_BADSLOT: 939 + goto retry_nowait; 940 + case -NFS4ERR_SEQ_MISORDERED: 941 + if (session->se_cb_seq_nr != 1) { 942 + session->se_cb_seq_nr = 1; 943 + goto retry_nowait; 944 + } 945 + break; 946 + default: 947 + dprintk("%s: unprocessed error %d\n", __func__, 948 + cb->cb_seq_status); 949 + } 950 + 951 + clear_bit(0, &clp->cl_cb_slot_busy); 952 + rpc_wake_up_next(&clp->cl_cb_waitq); 953 + dprintk("%s: freed slot, new seqid=%d\n", __func__, 954 + clp->cl_cb_session->se_cb_seq_nr); 955 + 956 + if (task->tk_flags & RPC_TASK_KILLED) 957 + goto need_restart; 958 + out: 959 + return ret; 960 + retry_nowait: 961 + if (rpc_restart_call_prepare(task)) 962 + ret = false; 963 + goto out; 964 + need_restart: 965 + task->tk_status = 0; 966 + cb->cb_need_restart = true; 967 + return false; 885 968 } 886 969 887 970 static void nfsd4_cb_done(struct rpc_task *task, void *calldata) ··· 974 893 dprintk("%s: minorversion=%d\n", __func__, 975 894 clp->cl_minorversion); 976 895 977 - if (clp->cl_minorversion) { 978 - /* 979 - * No need for lock, access serialized in nfsd4_cb_prepare 980 - * 981 - * RFC5661 20.9.3 982 - * If CB_SEQUENCE returns an error, then the state of the slot 983 - * (sequence ID, cached reply) MUST NOT change. 984 - */ 985 - if (cb->cb_update_seq_nr) 986 - ++clp->cl_cb_session->se_cb_seq_nr; 987 - 988 - clear_bit(0, &clp->cl_cb_slot_busy); 989 - rpc_wake_up_next(&clp->cl_cb_waitq); 990 - dprintk("%s: freed slot, new seqid=%d\n", __func__, 991 - clp->cl_cb_session->se_cb_seq_nr); 992 - } 993 - 994 - /* 995 - * If the backchannel connection was shut down while this 996 - * task was queued, we need to resubmit it after setting up 997 - * a new backchannel connection. 998 - * 999 - * Note that if we lost our callback connection permanently 1000 - * the submission code will error out, so we don't need to 1001 - * handle that case here. 1002 - */ 1003 - if (task->tk_flags & RPC_TASK_KILLED) { 1004 - task->tk_status = 0; 1005 - cb->cb_need_restart = true; 896 + if (!nfsd4_cb_sequence_done(task, cb)) 1006 897 return; 1007 - } 1008 898 1009 899 if (cb->cb_status) { 1010 900 WARN_ON_ONCE(task->tk_status); ··· 1151 1099 cb->cb_msg.rpc_resp = cb; 1152 1100 cb->cb_ops = ops; 1153 1101 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); 1102 + cb->cb_seq_status = 1; 1154 1103 cb->cb_status = 0; 1155 - cb->cb_update_seq_nr = false; 1156 1104 cb->cb_need_restart = false; 1157 1105 } 1158 1106

-3

fs/nfsd/nfs4idmap.c

··· 59 59 * that. 60 60 */ 61 61 62 - #define IDMAP_TYPE_USER 0 63 - #define IDMAP_TYPE_GROUP 1 64 - 65 62 struct ent { 66 63 struct cache_head h; 67 64 int type; /* User / Group */

+12 -18

fs/nfsd/nfs4proc.c

··· 276 276 nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); 277 277 278 278 /* 279 - * Following rfc 3530 14.2.16, use the returned bitmask 280 - * to indicate which attributes we used to store the 281 - * verifier: 279 + * Following rfc 3530 14.2.16, and rfc 5661 18.16.4 280 + * use the returned bitmask to indicate which attributes 281 + * we used to store the verifier: 282 282 */ 283 - if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) 284 - open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | 285 - FATTR4_WORD1_TIME_MODIFY); 283 + if (nfsd_create_is_exclusive(open->op_createmode) && status == 0) 284 + open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | 285 + FATTR4_WORD1_TIME_MODIFY); 286 286 } else 287 287 /* 288 288 * Note this may exit with the parent still locked. ··· 362 362 { 363 363 __be32 status; 364 364 struct svc_fh *resfh = NULL; 365 - struct nfsd4_compoundres *resp; 366 365 struct net *net = SVC_NET(rqstp); 367 366 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 368 367 ··· 388 389 copy_clientid(&open->op_clientid, cstate->session); 389 390 390 391 /* check seqid for replay. set nfs4_owner */ 391 - resp = rqstp->rq_resp; 392 - status = nfsd4_process_open1(&resp->cstate, open, nn); 392 + status = nfsd4_process_open1(cstate, open, nn); 393 393 if (status == nfserr_replay_me) { 394 394 struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; 395 395 fh_put(&cstate->current_fh); ··· 415 417 /* Openowner is now set, so sequence id will get bumped. Now we need 416 418 * these checks before we do any creates: */ 417 419 status = nfserr_grace; 418 - if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) 420 + if (opens_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) 419 421 goto out; 420 422 status = nfserr_no_grace; 421 - if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) 423 + if (!opens_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) 422 424 goto out; 423 425 424 426 switch (open->op_claim_type) { ··· 827 829 { 828 830 __be32 status; 829 831 830 - if (locks_in_grace(SVC_NET(rqstp))) 832 + if (opens_in_grace(SVC_NET(rqstp))) 831 833 return nfserr_grace; 832 834 status = nfsd_unlink(rqstp, &cstate->current_fh, 0, 833 835 remove->rm_name, remove->rm_namelen); ··· 846 848 847 849 if (!cstate->save_fh.fh_dentry) 848 850 return status; 849 - if (locks_in_grace(SVC_NET(rqstp)) && 851 + if (opens_in_grace(SVC_NET(rqstp)) && 850 852 !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) 851 853 return nfserr_grace; 852 854 status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, ··· 1362 1364 goto out; 1363 1365 } 1364 1366 1365 - nfserr = ops->proc_layoutcommit(inode, lcp); 1366 - if (nfserr) 1367 - goto out_put_stid; 1368 - 1369 1367 if (new_size > i_size_read(inode)) { 1370 1368 lcp->lc_size_chg = 1; 1371 1369 lcp->lc_newsize = new_size; ··· 1369 1375 lcp->lc_size_chg = 0; 1370 1376 } 1371 1377 1372 - out_put_stid: 1378 + nfserr = ops->proc_layoutcommit(inode, lcp); 1373 1379 nfs4_put_stid(&ls->ls_stid); 1374 1380 out: 1375 1381 return nfserr;

+11 -7

fs/nfsd/nfs4recover.c

··· 272 272 .ctx.actor = nfsd4_build_namelist, 273 273 .names = LIST_HEAD_INIT(ctx.names) 274 274 }; 275 + struct name_list *entry, *tmp; 275 276 int status; 276 277 277 278 status = nfs4_save_creds(&original_cred); ··· 287 286 288 287 status = iterate_dir(nn->rec_file, &ctx.ctx); 289 288 mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); 290 - while (!list_empty(&ctx.names)) { 291 - struct name_list *entry; 292 - entry = list_entry(ctx.names.next, struct name_list, list); 289 + 290 + list_for_each_entry_safe(entry, tmp, &ctx.names, list) { 293 291 if (!status) { 294 292 struct dentry *dentry; 295 293 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); ··· 304 304 } 305 305 mutex_unlock(&d_inode(dir)->i_mutex); 306 306 nfs4_reset_creds(original_cred); 307 + 308 + list_for_each_entry_safe(entry, tmp, &ctx.names, list) { 309 + dprintk("NFSD: %s. Left entry %s\n", __func__, entry->name); 310 + list_del(&entry->list); 311 + kfree(entry); 312 + } 307 313 return status; 308 314 } 309 315 ··· 547 541 548 542 /* XXX: The legacy code won't work in a container */ 549 543 if (net != &init_net) { 550 - WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client " 551 - "tracking in a container!\n"); 544 + pr_warn("NFSD: attempt to initialize legacy client tracking in a container ignored.\n"); 552 545 return -EINVAL; 553 546 } 554 547 ··· 1259 1254 1260 1255 /* XXX: The usermode helper s not working in container yet. */ 1261 1256 if (net != &init_net) { 1262 - WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " 1263 - "tracking in a container!\n"); 1257 + pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n"); 1264 1258 return -EINVAL; 1265 1259 } 1266 1260

+101 -79

fs/nfsd/nfs4state.c

··· 777 777 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); 778 778 } 779 779 780 - static void 780 + static bool 781 781 unhash_delegation_locked(struct nfs4_delegation *dp) 782 782 { 783 783 struct nfs4_file *fp = dp->dl_stid.sc_file; 784 784 785 785 lockdep_assert_held(&state_lock); 786 + 787 + if (list_empty(&dp->dl_perfile)) 788 + return false; 786 789 787 790 dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; 788 791 /* Ensure that deleg break won't try to requeue it */ ··· 795 792 list_del_init(&dp->dl_recall_lru); 796 793 list_del_init(&dp->dl_perfile); 797 794 spin_unlock(&fp->fi_lock); 795 + return true; 798 796 } 799 797 800 798 static void destroy_delegation(struct nfs4_delegation *dp) 801 799 { 800 + bool unhashed; 801 + 802 802 spin_lock(&state_lock); 803 - unhash_delegation_locked(dp); 803 + unhashed = unhash_delegation_locked(dp); 804 804 spin_unlock(&state_lock); 805 - put_clnt_odstate(dp->dl_clnt_odstate); 806 - nfs4_put_deleg_lease(dp->dl_stid.sc_file); 807 - nfs4_put_stid(&dp->dl_stid); 805 + if (unhashed) { 806 + put_clnt_odstate(dp->dl_clnt_odstate); 807 + nfs4_put_deleg_lease(dp->dl_stid.sc_file); 808 + nfs4_put_stid(&dp->dl_stid); 809 + } 808 810 } 809 811 810 812 static void revoke_delegation(struct nfs4_delegation *dp) ··· 998 990 } 999 991 } 1000 992 993 + static inline void nfs4_free_stateowner(struct nfs4_stateowner *sop) 994 + { 995 + kfree(sop->so_owner.data); 996 + sop->so_ops->so_free(sop); 997 + } 998 + 1001 999 static void nfs4_put_stateowner(struct nfs4_stateowner *sop) 1002 1000 { 1003 1001 struct nfs4_client *clp = sop->so_client; ··· 1014 1000 return; 1015 1001 sop->so_ops->so_unhash(sop); 1016 1002 spin_unlock(&clp->cl_lock); 1017 - kfree(sop->so_owner.data); 1018 - sop->so_ops->so_free(sop); 1003 + nfs4_free_stateowner(sop); 1019 1004 } 1020 1005 1021 - static void unhash_ol_stateid(struct nfs4_ol_stateid *stp) 1006 + static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp) 1022 1007 { 1023 1008 struct nfs4_file *fp = stp->st_stid.sc_file; 1024 1009 1025 1010 lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock); 1026 1011 1012 + if (list_empty(&stp->st_perfile)) 1013 + return false; 1014 + 1027 1015 spin_lock(&fp->fi_lock); 1028 - list_del(&stp->st_perfile); 1016 + list_del_init(&stp->st_perfile); 1029 1017 spin_unlock(&fp->fi_lock); 1030 1018 list_del(&stp->st_perstateowner); 1019 + return true; 1031 1020 } 1032 1021 1033 1022 static void nfs4_free_ol_stateid(struct nfs4_stid *stid) ··· 1080 1063 list_add(&stp->st_locks, reaplist); 1081 1064 } 1082 1065 1083 - static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) 1066 + static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) 1084 1067 { 1085 1068 struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); 1086 1069 1087 1070 lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); 1088 1071 1089 1072 list_del_init(&stp->st_locks); 1090 - unhash_ol_stateid(stp); 1091 1073 nfs4_unhash_stid(&stp->st_stid); 1074 + return unhash_ol_stateid(stp); 1092 1075 } 1093 1076 1094 1077 static void release_lock_stateid(struct nfs4_ol_stateid *stp) 1095 1078 { 1096 1079 struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); 1080 + bool unhashed; 1097 1081 1098 1082 spin_lock(&oo->oo_owner.so_client->cl_lock); 1099 - unhash_lock_stateid(stp); 1083 + unhashed = unhash_lock_stateid(stp); 1100 1084 spin_unlock(&oo->oo_owner.so_client->cl_lock); 1101 - nfs4_put_stid(&stp->st_stid); 1085 + if (unhashed) 1086 + nfs4_put_stid(&stp->st_stid); 1102 1087 } 1103 1088 1104 1089 static void unhash_lockowner_locked(struct nfs4_lockowner *lo) ··· 1148 1129 while (!list_empty(&lo->lo_owner.so_stateids)) { 1149 1130 stp = list_first_entry(&lo->lo_owner.so_stateids, 1150 1131 struct nfs4_ol_stateid, st_perstateowner); 1151 - unhash_lock_stateid(stp); 1132 + WARN_ON(!unhash_lock_stateid(stp)); 1152 1133 put_ol_stateid_locked(stp, &reaplist); 1153 1134 } 1154 1135 spin_unlock(&clp->cl_lock); ··· 1161 1142 { 1162 1143 struct nfs4_ol_stateid *stp; 1163 1144 1145 + lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock); 1146 + 1164 1147 while (!list_empty(&open_stp->st_locks)) { 1165 1148 stp = list_entry(open_stp->st_locks.next, 1166 1149 struct nfs4_ol_stateid, st_locks); 1167 - unhash_lock_stateid(stp); 1150 + WARN_ON(!unhash_lock_stateid(stp)); 1168 1151 put_ol_stateid_locked(stp, reaplist); 1169 1152 } 1170 1153 } 1171 1154 1172 - static void unhash_open_stateid(struct nfs4_ol_stateid *stp, 1155 + static bool unhash_open_stateid(struct nfs4_ol_stateid *stp, 1173 1156 struct list_head *reaplist) 1174 1157 { 1158 + bool unhashed; 1159 + 1175 1160 lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); 1176 1161 1177 - unhash_ol_stateid(stp); 1162 + unhashed = unhash_ol_stateid(stp); 1178 1163 release_open_stateid_locks(stp, reaplist); 1164 + return unhashed; 1179 1165 } 1180 1166 1181 1167 static void release_open_stateid(struct nfs4_ol_stateid *stp) ··· 1188 1164 LIST_HEAD(reaplist); 1189 1165 1190 1166 spin_lock(&stp->st_stid.sc_client->cl_lock); 1191 - unhash_open_stateid(stp, &reaplist); 1192 - put_ol_stateid_locked(stp, &reaplist); 1167 + if (unhash_open_stateid(stp, &reaplist)) 1168 + put_ol_stateid_locked(stp, &reaplist); 1193 1169 spin_unlock(&stp->st_stid.sc_client->cl_lock); 1194 1170 free_ol_stateid_reaplist(&reaplist); 1195 1171 } ··· 1234 1210 while (!list_empty(&oo->oo_owner.so_stateids)) { 1235 1211 stp = list_first_entry(&oo->oo_owner.so_stateids, 1236 1212 struct nfs4_ol_stateid, st_perstateowner); 1237 - unhash_open_stateid(stp, &reaplist); 1238 - put_ol_stateid_locked(stp, &reaplist); 1213 + if (unhash_open_stateid(stp, &reaplist)) 1214 + put_ol_stateid_locked(stp, &reaplist); 1239 1215 } 1240 1216 spin_unlock(&clp->cl_lock); 1241 1217 free_ol_stateid_reaplist(&reaplist); ··· 1738 1714 spin_lock(&state_lock); 1739 1715 while (!list_empty(&clp->cl_delegations)) { 1740 1716 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); 1741 - unhash_delegation_locked(dp); 1717 + WARN_ON(!unhash_delegation_locked(dp)); 1742 1718 list_add(&dp->dl_recall_lru, &reaplist); 1743 1719 } 1744 1720 spin_unlock(&state_lock); ··· 1918 1894 * __force to keep sparse happy 1919 1895 */ 1920 1896 verf[0] = (__force __be32)get_seconds(); 1921 - verf[1] = (__force __be32)nn->clientid_counter; 1897 + verf[1] = (__force __be32)nn->clverifier_counter++; 1922 1898 memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); 1923 1899 } 1924 1900 ··· 2265 2241 * Also note we should probably be using this in 4.0 case too. 2266 2242 */ 2267 2243 return !list_empty(&clp->cl_openowners) 2244 + #ifdef CONFIG_NFSD_PNFS 2245 + || !list_empty(&clp->cl_lo_states) 2246 + #endif 2268 2247 || !list_empty(&clp->cl_delegations) 2269 2248 || !list_empty(&clp->cl_sessions); 2270 2249 } ··· 2574 2547 goto out_free_conn; 2575 2548 cs_slot = &conf->cl_cs_slot; 2576 2549 status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); 2577 - if (status == nfserr_replay_cache) { 2578 - status = nfsd4_replay_create_session(cr_ses, cs_slot); 2579 - goto out_free_conn; 2580 - } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { 2581 - status = nfserr_seq_misordered; 2550 + if (status) { 2551 + if (status == nfserr_replay_cache) 2552 + status = nfsd4_replay_create_session(cr_ses, cs_slot); 2582 2553 goto out_free_conn; 2583 2554 } 2584 2555 } else if (unconf) { ··· 3066 3041 unconf = find_unconfirmed_client_by_name(&clname, nn); 3067 3042 if (unconf) 3068 3043 unhash_client_locked(unconf); 3069 - if (conf && same_verf(&conf->cl_verifier, &clverifier)) 3044 + if (conf && same_verf(&conf->cl_verifier, &clverifier)) { 3070 3045 /* case 1: probable callback update */ 3071 3046 copy_clid(new, conf); 3072 - else /* case 4 (new client) or cases 2, 3 (client reboot): */ 3047 + gen_confirm(new, nn); 3048 + } else /* case 4 (new client) or cases 2, 3 (client reboot): */ 3073 3049 gen_clid(new, nn); 3074 3050 new->cl_minorversion = 0; 3075 3051 gen_callback(new, setclid, rqstp); ··· 3111 3085 /* 3112 3086 * We try hard to give out unique clientid's, so if we get an 3113 3087 * attempt to confirm the same clientid with a different cred, 3114 - * there's a bug somewhere. Let's charitably assume it's our 3115 - * bug. 3088 + * the client may be buggy; this should never happen. 3089 + * 3090 + * Nevertheless, RFC 7530 recommends INUSE for this case: 3116 3091 */ 3117 - status = nfserr_serverfault; 3092 + status = nfserr_clid_inuse; 3118 3093 if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) 3119 3094 goto out; 3120 3095 if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) ··· 3342 3315 hash_openowner(oo, clp, strhashval); 3343 3316 ret = oo; 3344 3317 } else 3345 - nfs4_free_openowner(&oo->oo_owner); 3318 + nfs4_free_stateowner(&oo->oo_owner); 3319 + 3346 3320 spin_unlock(&clp->cl_lock); 3347 3321 return ret; 3348 3322 } ··· 3509 3481 struct rpc_task *task) 3510 3482 { 3511 3483 struct nfs4_delegation *dp = cb_to_delegation(cb); 3484 + 3485 + if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID) 3486 + return 1; 3512 3487 3513 3488 switch (task->tk_status) { 3514 3489 case 0: ··· 3916 3885 return status; 3917 3886 } 3918 3887 3919 - static void 3920 - nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session) 3921 - { 3922 - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; 3923 - } 3924 - 3925 3888 /* Should we give out recallable state?: */ 3926 3889 static bool nfsd4_cb_channel_good(struct nfs4_client *clp) 3927 3890 { ··· 3948 3923 static int nfs4_setlease(struct nfs4_delegation *dp) 3949 3924 { 3950 3925 struct nfs4_file *fp = dp->dl_stid.sc_file; 3951 - struct file_lock *fl, *ret; 3926 + struct file_lock *fl; 3952 3927 struct file *filp; 3953 3928 int status = 0; 3954 3929 ··· 3959 3934 if (!filp) { 3960 3935 /* We should always have a readable file here */ 3961 3936 WARN_ON_ONCE(1); 3937 + locks_free_lock(fl); 3962 3938 return -EBADF; 3963 3939 } 3964 3940 fl->fl_file = filp; 3965 - ret = fl; 3966 3941 status = vfs_setlease(filp, fl->fl_type, &fl, NULL); 3967 3942 if (fl) 3968 3943 locks_free_lock(fl); ··· 4088 4063 case NFS4_OPEN_CLAIM_FH: 4089 4064 /* 4090 4065 * Let's not give out any delegations till everyone's 4091 - * had the chance to reclaim theirs.... 4066 + * had the chance to reclaim theirs, *and* until 4067 + * NLM locks have all been reclaimed: 4092 4068 */ 4093 4069 if (locks_in_grace(clp->net)) 4094 4070 goto out_no_deleg; ··· 4235 4209 if (fp) 4236 4210 put_nfs4_file(fp); 4237 4211 if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) 4238 - nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate)); 4212 + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; 4239 4213 /* 4240 4214 * To finish the open response, we just need to set the rflags. 4241 4215 */ ··· 4364 4338 spin_lock(&state_lock); 4365 4339 list_for_each_safe(pos, next, &nn->del_recall_lru) { 4366 4340 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 4367 - if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) 4368 - continue; 4369 4341 if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { 4370 4342 t = dp->dl_time - cutoff; 4371 4343 new_timeo = min(new_timeo, t); 4372 4344 break; 4373 4345 } 4374 - unhash_delegation_locked(dp); 4346 + WARN_ON(!unhash_delegation_locked(dp)); 4375 4347 list_add(&dp->dl_recall_lru, &reaplist); 4376 4348 } 4377 4349 spin_unlock(&state_lock); ··· 4464 4440 { 4465 4441 if (ONE_STATEID(stateid) && (flags & RD_STATE)) 4466 4442 return nfs_ok; 4467 - else if (locks_in_grace(net)) { 4443 + else if (opens_in_grace(net)) { 4468 4444 /* Answer in remaining cases depends on existence of 4469 4445 * conflicting state; so we must wait out the grace period. */ 4470 4446 return nfserr_grace; ··· 4483 4459 static inline int 4484 4460 grace_disallows_io(struct net *net, struct inode *inode) 4485 4461 { 4486 - return locks_in_grace(net) && mandatory_lock(inode); 4462 + return opens_in_grace(net) && mandatory_lock(inode); 4487 4463 } 4488 4464 4489 4465 /* Returns true iff a is later than b: */ ··· 4775 4751 if (check_for_locks(stp->st_stid.sc_file, 4776 4752 lockowner(stp->st_stateowner))) 4777 4753 break; 4778 - unhash_lock_stateid(stp); 4754 + WARN_ON(!unhash_lock_stateid(stp)); 4779 4755 spin_unlock(&cl->cl_lock); 4780 4756 nfs4_put_stid(s); 4781 4757 ret = nfs_ok; ··· 4991 4967 static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) 4992 4968 { 4993 4969 struct nfs4_client *clp = s->st_stid.sc_client; 4970 + bool unhashed; 4994 4971 LIST_HEAD(reaplist); 4995 4972 4996 4973 s->st_stid.sc_type = NFS4_CLOSED_STID; 4997 4974 spin_lock(&clp->cl_lock); 4998 - unhash_open_stateid(s, &reaplist); 4975 + unhashed = unhash_open_stateid(s, &reaplist); 4999 4976 5000 4977 if (clp->cl_minorversion) { 5001 - put_ol_stateid_locked(s, &reaplist); 4978 + if (unhashed) 4979 + put_ol_stateid_locked(s, &reaplist); 5002 4980 spin_unlock(&clp->cl_lock); 5003 4981 free_ol_stateid_reaplist(&reaplist); 5004 4982 } else { 5005 4983 spin_unlock(&clp->cl_lock); 5006 4984 free_ol_stateid_reaplist(&reaplist); 5007 - move_to_close_lru(s, clp->net); 4985 + if (unhashed) 4986 + move_to_close_lru(s, clp->net); 5008 4987 } 5009 4988 } 5010 4989 ··· 5071 5044 out: 5072 5045 return status; 5073 5046 } 5074 - 5075 - 5076 - #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) 5077 5047 5078 5048 static inline u64 5079 5049 end_offset(u64 start, u64 len) ··· 5163 5139 } 5164 5140 5165 5141 static struct nfs4_lockowner * 5166 - find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, 5167 - struct nfs4_client *clp) 5142 + find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner) 5168 5143 { 5169 5144 unsigned int strhashval = ownerstr_hashval(owner); 5170 5145 struct nfs4_stateowner *so; ··· 5181 5158 } 5182 5159 5183 5160 static struct nfs4_lockowner * 5184 - find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, 5185 - struct nfs4_client *clp) 5161 + find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj *owner) 5186 5162 { 5187 5163 struct nfs4_lockowner *lo; 5188 5164 5189 5165 spin_lock(&clp->cl_lock); 5190 - lo = find_lockowner_str_locked(clid, owner, clp); 5166 + lo = find_lockowner_str_locked(clp, owner); 5191 5167 spin_unlock(&clp->cl_lock); 5192 5168 return lo; 5193 5169 } ··· 5230 5208 lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; 5231 5209 lo->lo_owner.so_ops = &lockowner_ops; 5232 5210 spin_lock(&clp->cl_lock); 5233 - ret = find_lockowner_str_locked(&clp->cl_clientid, 5234 - &lock->lk_new_owner, clp); 5211 + ret = find_lockowner_str_locked(clp, &lock->lk_new_owner); 5235 5212 if (ret == NULL) { 5236 5213 list_add(&lo->lo_owner.so_strhash, 5237 5214 &clp->cl_ownerstr_hashtbl[strhashval]); 5238 5215 ret = lo; 5239 5216 } else 5240 - nfs4_free_lockowner(&lo->lo_owner); 5217 + nfs4_free_stateowner(&lo->lo_owner); 5218 + 5241 5219 spin_unlock(&clp->cl_lock); 5242 5220 return ret; 5243 5221 } ··· 5320 5298 static int 5321 5299 check_lock_length(u64 offset, u64 length) 5322 5300 { 5323 - return ((length == 0) || ((length != NFS4_MAX_UINT64) && 5324 - LOFF_OVERFLOW(offset, length))); 5301 + return ((length == 0) || ((length != NFS4_MAX_UINT64) && 5302 + (length > ~offset))); 5325 5303 } 5326 5304 5327 5305 static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) ··· 5350 5328 struct nfs4_lockowner *lo; 5351 5329 unsigned int strhashval; 5352 5330 5353 - lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl); 5331 + lo = find_lockowner_str(cl, &lock->lk_new_owner); 5354 5332 if (!lo) { 5355 - strhashval = ownerstr_hashval(&lock->v.new.owner); 5333 + strhashval = ownerstr_hashval(&lock->lk_new_owner); 5356 5334 lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); 5357 5335 if (lo == NULL) 5358 5336 return nfserr_jukebox; ··· 5413 5391 if (lock->lk_is_new) { 5414 5392 if (nfsd4_has_session(cstate)) 5415 5393 /* See rfc 5661 18.10.3: given clientid is ignored: */ 5416 - memcpy(&lock->v.new.clientid, 5394 + memcpy(&lock->lk_new_clientid, 5417 5395 &cstate->session->se_client->cl_clientid, 5418 5396 sizeof(clientid_t)); 5419 5397 ··· 5431 5409 open_sop = openowner(open_stp->st_stateowner); 5432 5410 status = nfserr_bad_stateid; 5433 5411 if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, 5434 - &lock->v.new.clientid)) 5412 + &lock->lk_new_clientid)) 5435 5413 goto out; 5436 5414 status = lookup_or_create_lock_state(cstate, open_stp, lock, 5437 5415 &lock_stp, &new); ··· 5625 5603 goto out; 5626 5604 } 5627 5605 5628 - lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner, 5629 - cstate->clp); 5606 + lo = find_lockowner_str(cstate->clp, &lockt->lt_owner); 5630 5607 if (lo) 5631 5608 file_lock->fl_owner = (fl_owner_t)lo; 5632 5609 file_lock->fl_pid = current->tgid; ··· 6040 6019 6041 6020 static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, 6042 6021 struct list_head *collect, 6043 - void (*func)(struct nfs4_ol_stateid *)) 6022 + bool (*func)(struct nfs4_ol_stateid *)) 6044 6023 { 6045 6024 struct nfs4_openowner *oop; 6046 6025 struct nfs4_ol_stateid *stp, *st_next; ··· 6054 6033 list_for_each_entry_safe(lst, lst_next, 6055 6034 &stp->st_locks, st_locks) { 6056 6035 if (func) { 6057 - func(lst); 6058 - nfsd_inject_add_lock_to_list(lst, 6059 - collect); 6036 + if (func(lst)) 6037 + nfsd_inject_add_lock_to_list(lst, 6038 + collect); 6060 6039 } 6061 6040 ++count; 6062 6041 /* ··· 6326 6305 continue; 6327 6306 6328 6307 atomic_inc(&clp->cl_refcount); 6329 - unhash_delegation_locked(dp); 6308 + WARN_ON(!unhash_delegation_locked(dp)); 6330 6309 list_add(&dp->dl_recall_lru, victims); 6331 6310 } 6332 6311 ++count; ··· 6605 6584 return ret; 6606 6585 nn->boot_time = get_seconds(); 6607 6586 nn->grace_ended = false; 6587 + nn->nfsd4_manager.block_opens = true; 6608 6588 locks_start_grace(net, &nn->nfsd4_manager); 6609 6589 nfsd4_client_tracking_init(net); 6610 6590 printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", ··· 6624 6602 ret = set_callback_cred(); 6625 6603 if (ret) 6626 6604 return -ENOMEM; 6627 - laundry_wq = create_singlethread_workqueue("nfsd4"); 6605 + laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); 6628 6606 if (laundry_wq == NULL) { 6629 6607 ret = -ENOMEM; 6630 6608 goto out_recovery; ··· 6657 6635 spin_lock(&state_lock); 6658 6636 list_for_each_safe(pos, next, &nn->del_recall_lru) { 6659 6637 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6660 - unhash_delegation_locked(dp); 6638 + WARN_ON(!unhash_delegation_locked(dp)); 6661 6639 list_add(&dp->dl_recall_lru, &reaplist); 6662 6640 } 6663 6641 spin_unlock(&state_lock);

+100 -58

fs/nfsd/nfs4xdr.c

··· 2140 2140 return nfsd4_encode_user(xdr, rqstp, ace->who_uid); 2141 2141 } 2142 2142 2143 + static inline __be32 2144 + nfsd4_encode_layout_type(struct xdr_stream *xdr, enum pnfs_layouttype layout_type) 2145 + { 2146 + __be32 *p; 2147 + 2148 + if (layout_type) { 2149 + p = xdr_reserve_space(xdr, 8); 2150 + if (!p) 2151 + return nfserr_resource; 2152 + *p++ = cpu_to_be32(1); 2153 + *p++ = cpu_to_be32(layout_type); 2154 + } else { 2155 + p = xdr_reserve_space(xdr, 4); 2156 + if (!p) 2157 + return nfserr_resource; 2158 + *p++ = cpu_to_be32(0); 2159 + } 2160 + 2161 + return 0; 2162 + } 2163 + 2143 2164 #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ 2144 2165 FATTR4_WORD0_RDATTR_ERROR) 2145 2166 #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID ··· 2224 2203 err = vfs_getattr(&path, stat); 2225 2204 path_put(&path); 2226 2205 return err; 2206 + } 2207 + 2208 + static __be32 2209 + nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) 2210 + { 2211 + __be32 *p; 2212 + 2213 + if (bmval2) { 2214 + p = xdr_reserve_space(xdr, 16); 2215 + if (!p) 2216 + goto out_resource; 2217 + *p++ = cpu_to_be32(3); 2218 + *p++ = cpu_to_be32(bmval0); 2219 + *p++ = cpu_to_be32(bmval1); 2220 + *p++ = cpu_to_be32(bmval2); 2221 + } else if (bmval1) { 2222 + p = xdr_reserve_space(xdr, 12); 2223 + if (!p) 2224 + goto out_resource; 2225 + *p++ = cpu_to_be32(2); 2226 + *p++ = cpu_to_be32(bmval0); 2227 + *p++ = cpu_to_be32(bmval1); 2228 + } else { 2229 + p = xdr_reserve_space(xdr, 8); 2230 + if (!p) 2231 + goto out_resource; 2232 + *p++ = cpu_to_be32(1); 2233 + *p++ = cpu_to_be32(bmval0); 2234 + } 2235 + 2236 + return 0; 2237 + out_resource: 2238 + return nfserr_resource; 2227 2239 } 2228 2240 2229 2241 /* ··· 2355 2301 } 2356 2302 #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ 2357 2303 2358 - if (bmval2) { 2359 - p = xdr_reserve_space(xdr, 16); 2360 - if (!p) 2361 - goto out_resource; 2362 - *p++ = cpu_to_be32(3); 2363 - *p++ = cpu_to_be32(bmval0); 2364 - *p++ = cpu_to_be32(bmval1); 2365 - *p++ = cpu_to_be32(bmval2); 2366 - } else if (bmval1) { 2367 - p = xdr_reserve_space(xdr, 12); 2368 - if (!p) 2369 - goto out_resource; 2370 - *p++ = cpu_to_be32(2); 2371 - *p++ = cpu_to_be32(bmval0); 2372 - *p++ = cpu_to_be32(bmval1); 2373 - } else { 2374 - p = xdr_reserve_space(xdr, 8); 2375 - if (!p) 2376 - goto out_resource; 2377 - *p++ = cpu_to_be32(1); 2378 - *p++ = cpu_to_be32(bmval0); 2379 - } 2304 + status = nfsd4_encode_bitmap(xdr, bmval0, bmval1, bmval2); 2305 + if (status) 2306 + goto out; 2380 2307 2381 2308 attrlen_offset = xdr->buf->len; 2382 2309 p = xdr_reserve_space(xdr, 4); ··· 2710 2675 *p++ = cpu_to_be32(stat.mtime.tv_nsec); 2711 2676 } 2712 2677 if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { 2678 + struct kstat parent_stat; 2679 + u64 ino = stat.ino; 2680 + 2713 2681 p = xdr_reserve_space(xdr, 8); 2714 2682 if (!p) 2715 2683 goto out_resource; ··· 2721 2683 * and this is the root of a cross-mounted filesystem. 2722 2684 */ 2723 2685 if (ignore_crossmnt == 0 && 2724 - dentry == exp->ex_path.mnt->mnt_root) 2725 - get_parent_attributes(exp, &stat); 2726 - p = xdr_encode_hyper(p, stat.ino); 2686 + dentry == exp->ex_path.mnt->mnt_root) { 2687 + err = get_parent_attributes(exp, &parent_stat); 2688 + if (err) 2689 + goto out_nfserr; 2690 + ino = parent_stat.ino; 2691 + } 2692 + p = xdr_encode_hyper(p, ino); 2727 2693 } 2728 2694 #ifdef CONFIG_NFSD_PNFS 2729 - if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) || 2730 - (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) { 2731 - if (exp->ex_layout_type) { 2732 - p = xdr_reserve_space(xdr, 8); 2733 - if (!p) 2734 - goto out_resource; 2735 - *p++ = cpu_to_be32(1); 2736 - *p++ = cpu_to_be32(exp->ex_layout_type); 2737 - } else { 2738 - p = xdr_reserve_space(xdr, 4); 2739 - if (!p) 2740 - goto out_resource; 2741 - *p++ = cpu_to_be32(0); 2742 - } 2695 + if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) { 2696 + status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type); 2697 + if (status) 2698 + goto out; 2699 + } 2700 + 2701 + if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) { 2702 + status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type); 2703 + if (status) 2704 + goto out; 2743 2705 } 2744 2706 2745 2707 if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) { ··· 2749 2711 *p++ = cpu_to_be32(stat.blksize); 2750 2712 } 2751 2713 #endif /* CONFIG_NFSD_PNFS */ 2714 + if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { 2715 + status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0, 2716 + NFSD_SUPPATTR_EXCLCREAT_WORD1, 2717 + NFSD_SUPPATTR_EXCLCREAT_WORD2); 2718 + if (status) 2719 + goto out; 2720 + } 2721 + 2752 2722 if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { 2753 2723 status = nfsd4_encode_security_label(xdr, rqstp, context, 2754 2724 contextlen); 2755 2725 if (status) 2756 2726 goto out; 2757 - } 2758 - if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { 2759 - p = xdr_reserve_space(xdr, 16); 2760 - if (!p) 2761 - goto out_resource; 2762 - *p++ = cpu_to_be32(3); 2763 - *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0); 2764 - *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1); 2765 - *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2); 2766 2727 } 2767 2728 2768 2729 attrlen = htonl(xdr->buf->len - attrlen_offset - 4); ··· 3081 3044 __be32 *p; 3082 3045 3083 3046 if (!nfserr) { 3084 - p = xdr_reserve_space(xdr, 32); 3047 + p = xdr_reserve_space(xdr, 20); 3085 3048 if (!p) 3086 3049 return nfserr_resource; 3087 - p = encode_cinfo(p, &create->cr_cinfo); 3088 - *p++ = cpu_to_be32(2); 3089 - *p++ = cpu_to_be32(create->cr_bmval[0]); 3090 - *p++ = cpu_to_be32(create->cr_bmval[1]); 3050 + encode_cinfo(p, &create->cr_cinfo); 3051 + nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0], 3052 + create->cr_bmval[1], create->cr_bmval[2]); 3091 3053 } 3092 3054 return nfserr; 3093 3055 } ··· 3226 3190 nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); 3227 3191 if (nfserr) 3228 3192 goto out; 3229 - p = xdr_reserve_space(xdr, 40); 3193 + p = xdr_reserve_space(xdr, 24); 3230 3194 if (!p) 3231 3195 return nfserr_resource; 3232 3196 p = encode_cinfo(p, &open->op_cinfo); 3233 3197 *p++ = cpu_to_be32(open->op_rflags); 3234 - *p++ = cpu_to_be32(2); 3235 - *p++ = cpu_to_be32(open->op_bmval[0]); 3236 - *p++ = cpu_to_be32(open->op_bmval[1]); 3237 - *p++ = cpu_to_be32(open->op_delegate_type); 3238 3198 3199 + nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1], 3200 + open->op_bmval[2]); 3201 + if (nfserr) 3202 + goto out; 3203 + 3204 + p = xdr_reserve_space(xdr, 4); 3205 + if (!p) 3206 + return nfserr_resource; 3207 + 3208 + *p++ = cpu_to_be32(open->op_delegate_type); 3239 3209 switch (open->op_delegate_type) { 3240 3210 case NFS4_OPEN_DELEGATE_NONE: 3241 3211 break;

+13 -4

fs/nfsd/nfssvc.c

··· 391 391 return ret; 392 392 } 393 393 394 + static struct svc_serv_ops nfsd_thread_sv_ops = { 395 + .svo_shutdown = nfsd_last_thread, 396 + .svo_function = nfsd, 397 + .svo_enqueue_xprt = svc_xprt_do_enqueue, 398 + .svo_setup = svc_set_num_threads, 399 + .svo_module = THIS_MODULE, 400 + }; 401 + 394 402 int nfsd_create_serv(struct net *net) 395 403 { 396 404 int error; ··· 413 405 nfsd_max_blksize = nfsd_get_default_max_blksize(); 414 406 nfsd_reset_versions(); 415 407 nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, 416 - nfsd_last_thread, nfsd, THIS_MODULE); 408 + &nfsd_thread_sv_ops); 417 409 if (nn->nfsd_serv == NULL) 418 410 return -ENOMEM; 419 411 ··· 508 500 /* apply the new numbers */ 509 501 svc_get(nn->nfsd_serv); 510 502 for (i = 0; i < n; i++) { 511 - err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], 512 - nthreads[i]); 503 + err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, 504 + &nn->nfsd_serv->sv_pools[i], nthreads[i]); 513 505 if (err) 514 506 break; 515 507 } ··· 548 540 error = nfsd_startup_net(nrservs, net); 549 541 if (error) 550 542 goto out_destroy; 551 - error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); 543 + error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, 544 + NULL, nrservs); 552 545 if (error) 553 546 goto out_shutdown; 554 547 /* We are holding a reference to nn->nfsd_serv which

+1 -1

fs/nfsd/state.h

··· 67 67 struct rpc_message cb_msg; 68 68 struct nfsd4_callback_ops *cb_ops; 69 69 struct work_struct cb_work; 70 + int cb_seq_status; 70 71 int cb_status; 71 - bool cb_update_seq_nr; 72 72 bool cb_need_restart; 73 73 }; 74 74

-6

fs/nfsd/vfs.c

··· 1249 1249 1250 1250 #ifdef CONFIG_NFSD_V3 1251 1251 1252 - static inline int nfsd_create_is_exclusive(int createmode) 1253 - { 1254 - return createmode == NFS3_CREATE_EXCLUSIVE 1255 - || createmode == NFS4_CREATE_EXCLUSIVE4_1; 1256 - } 1257 - 1258 1252 /* 1259 1253 * NFSv3 and NFSv4 version of nfsd_create 1260 1254 */

+6

fs/nfsd/vfs.h

··· 131 131 return nfserrno(vfs_getattr(&p, stat)); 132 132 } 133 133 134 + static inline int nfsd_create_is_exclusive(int createmode) 135 + { 136 + return createmode == NFS3_CREATE_EXCLUSIVE 137 + || createmode == NFS4_CREATE_EXCLUSIVE4_1; 138 + } 139 + 134 140 #endif /* LINUX_NFSD_VFS_H */

+6

include/linux/fs.h

··· 943 943 944 944 struct lock_manager { 945 945 struct list_head list; 946 + /* 947 + * NFSv4 and up also want opens blocked during the grace period; 948 + * NLM doesn't care: 949 + */ 950 + bool block_opens; 946 951 }; 947 952 948 953 struct net; 949 954 void locks_start_grace(struct net *, struct lock_manager *); 950 955 void locks_end_grace(struct lock_manager *); 951 956 int locks_in_grace(struct net *); 957 + int opens_in_grace(struct net *); 952 958 953 959 /* that will die - we need it for nfs_lock_info */ 954 960 #include <linux/nfs_fs_i.h>

+7 -2

include/linux/sunrpc/cache.h

··· 46 46 * 47 47 */ 48 48 struct cache_head { 49 - struct cache_head * next; 49 + struct hlist_node cache_list; 50 50 time_t expiry_time; /* After time time, don't use the data */ 51 51 time_t last_refresh; /* If CACHE_PENDING, this is when upcall 52 52 * was sent, else this is when update was received ··· 73 73 struct cache_detail { 74 74 struct module * owner; 75 75 int hash_size; 76 - struct cache_head ** hash_table; 76 + struct hlist_head * hash_table; 77 77 rwlock_t hash_lock; 78 78 79 79 atomic_t inuse; /* active user-space update or lookup */ ··· 223 223 extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, 224 224 umode_t, struct cache_detail *); 225 225 extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); 226 + 227 + /* Must store cache_detail in seq_file->private if using next three functions */ 228 + extern void *cache_seq_start(struct seq_file *file, loff_t *pos); 229 + extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos); 230 + extern void cache_seq_stop(struct seq_file *file, void *p); 226 231 227 232 extern void qword_add(char **bpp, int *lp, char *str); 228 233 extern void qword_addhex(char **bpp, int *lp, char *buf, int blen);

+50 -18

include/linux/sunrpc/svc.h

··· 19 19 #include <linux/wait.h> 20 20 #include <linux/mm.h> 21 21 22 - /* 23 - * This is the RPC server thread function prototype 24 - */ 25 - typedef int (*svc_thread_fn)(void *); 26 - 27 22 /* statistics for svc_pool structures */ 28 23 struct svc_pool_stats { 29 24 atomic_long_t packets; ··· 48 53 * xprt is queued. */ 49 54 unsigned long sp_flags; 50 55 } ____cacheline_aligned_in_smp; 56 + 57 + struct svc_serv; 58 + 59 + struct svc_serv_ops { 60 + /* Callback to use when last thread exits. */ 61 + void (*svo_shutdown)(struct svc_serv *, struct net *); 62 + 63 + /* function for service threads to run */ 64 + int (*svo_function)(void *); 65 + 66 + /* queue up a transport for servicing */ 67 + void (*svo_enqueue_xprt)(struct svc_xprt *); 68 + 69 + /* set up thread (or whatever) execution context */ 70 + int (*svo_setup)(struct svc_serv *, struct svc_pool *, int); 71 + 72 + /* optional module to count when adding threads (pooled svcs only) */ 73 + struct module *svo_module; 74 + }; 51 75 52 76 /* 53 77 * RPC service. ··· 99 85 100 86 unsigned int sv_nrpools; /* number of thread pools */ 101 87 struct svc_pool * sv_pools; /* array of thread pools */ 102 - 103 - void (*sv_shutdown)(struct svc_serv *serv, 104 - struct net *net); 105 - /* Callback to use when last thread 106 - * exits. 107 - */ 108 - 109 - struct module * sv_module; /* optional module to count when 110 - * adding threads */ 111 - svc_thread_fn sv_function; /* main function for threads */ 88 + struct svc_serv_ops *sv_ops; /* server operations */ 112 89 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 113 90 struct list_head sv_cb_list; /* queue for callback requests 114 91 * that arrive over the same ··· 428 423 }; 429 424 430 425 /* 426 + * Mode for mapping cpus to pools. 427 + */ 428 + enum { 429 + SVC_POOL_AUTO = -1, /* choose one of the others */ 430 + SVC_POOL_GLOBAL, /* no mapping, just a single global pool 431 + * (legacy & UP mode) */ 432 + SVC_POOL_PERCPU, /* one pool per cpu */ 433 + SVC_POOL_PERNODE /* one pool per numa node */ 434 + }; 435 + 436 + struct svc_pool_map { 437 + int count; /* How many svc_servs use us */ 438 + int mode; /* Note: int not enum to avoid 439 + * warnings about "enumeration value 440 + * not handled in switch" */ 441 + unsigned int npools; 442 + unsigned int *pool_to; /* maps pool id to cpu or node */ 443 + unsigned int *to_pool; /* maps cpu or node to pool id */ 444 + }; 445 + 446 + extern struct svc_pool_map svc_pool_map; 447 + 448 + /* 431 449 * Function prototypes. 432 450 */ 433 451 int svc_rpcb_setup(struct svc_serv *serv, struct net *net); 434 452 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); 435 453 int svc_bind(struct svc_serv *serv, struct net *net); 436 454 struct svc_serv *svc_create(struct svc_program *, unsigned int, 437 - void (*shutdown)(struct svc_serv *, struct net *net)); 455 + struct svc_serv_ops *); 456 + struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, 457 + struct svc_pool *pool, int node); 438 458 struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, 439 459 struct svc_pool *pool, int node); 460 + void svc_rqst_free(struct svc_rqst *); 440 461 void svc_exit_thread(struct svc_rqst *); 462 + unsigned int svc_pool_map_get(void); 463 + void svc_pool_map_put(void); 441 464 struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, 442 - void (*shutdown)(struct svc_serv *, struct net *net), 443 - svc_thread_fn, struct module *); 465 + struct svc_serv_ops *); 444 466 int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); 445 467 int svc_pool_stats_open(struct svc_serv *serv, struct file *file); 446 468 void svc_destroy(struct svc_serv *);

+4 -87

include/linux/sunrpc/svc_rdma.h

··· 172 172 #define RDMAXPRT_SQ_PENDING 2 173 173 #define RDMAXPRT_CONN_PENDING 3 174 174 175 - #define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */ 176 - #if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) 177 - #define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD 178 - #else 179 - #define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) 180 - #endif 181 - 182 175 #define RPCRDMA_LISTEN_BACKLOG 10 183 176 /* The default ORD value is based on two outstanding full-size writes with a 184 177 * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ ··· 179 186 #define RPCRDMA_SQ_DEPTH_MULT 8 180 187 #define RPCRDMA_MAX_REQUESTS 32 181 188 #define RPCRDMA_MAX_REQ_SIZE 4096 189 + 190 + #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD 182 191 183 192 /* svc_rdma_marshal.c */ 184 193 extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); ··· 208 213 209 214 /* svc_rdma_sendto.c */ 210 215 extern int svc_rdma_sendto(struct svc_rqst *); 216 + extern struct rpcrdma_read_chunk * 217 + svc_rdma_get_read_chunk(struct rpcrdma_msg *); 211 218 212 219 /* svc_rdma_transport.c */ 213 220 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); ··· 222 225 extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); 223 226 extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); 224 227 extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); 225 - extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); 226 228 extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); 227 229 extern void svc_rdma_put_frmr(struct svcxprt_rdma *, 228 230 struct svc_rdma_fastreg_mr *); ··· 234 238 extern int svc_rdma_init(void); 235 239 extern void svc_rdma_cleanup(void); 236 240 237 - /* 238 - * Returns the address of the first read chunk or <nul> if no read chunk is 239 - * present 240 - */ 241 - static inline struct rpcrdma_read_chunk * 242 - svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) 243 - { 244 - struct rpcrdma_read_chunk *ch = 245 - (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; 246 - 247 - if (ch->rc_discrim == 0) 248 - return NULL; 249 - 250 - return ch; 251 - } 252 - 253 - /* 254 - * Returns the address of the first read write array element or <nul> if no 255 - * write array list is present 256 - */ 257 - static inline struct rpcrdma_write_array * 258 - svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) 259 - { 260 - if (rmsgp->rm_body.rm_chunks[0] != 0 261 - || rmsgp->rm_body.rm_chunks[1] == 0) 262 - return NULL; 263 - 264 - return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; 265 - } 266 - 267 - /* 268 - * Returns the address of the first reply array element or <nul> if no 269 - * reply array is present 270 - */ 271 - static inline struct rpcrdma_write_array * 272 - svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) 273 - { 274 - struct rpcrdma_read_chunk *rch; 275 - struct rpcrdma_write_array *wr_ary; 276 - struct rpcrdma_write_array *rp_ary; 277 - 278 - /* XXX: Need to fix when reply list may occur with read-list and/or 279 - * write list */ 280 - if (rmsgp->rm_body.rm_chunks[0] != 0 || 281 - rmsgp->rm_body.rm_chunks[1] != 0) 282 - return NULL; 283 - 284 - rch = svc_rdma_get_read_chunk(rmsgp); 285 - if (rch) { 286 - while (rch->rc_discrim) 287 - rch++; 288 - 289 - /* The reply list follows an empty write array located 290 - * at 'rc_position' here. The reply array is at rc_target. 291 - */ 292 - rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; 293 - 294 - goto found_it; 295 - } 296 - 297 - wr_ary = svc_rdma_get_write_array(rmsgp); 298 - if (wr_ary) { 299 - rp_ary = (struct rpcrdma_write_array *) 300 - &wr_ary-> 301 - wc_array[ntohl(wr_ary->wc_nchunks)].wc_target.rs_length; 302 - 303 - goto found_it; 304 - } 305 - 306 - /* No read list, no write list */ 307 - rp_ary = (struct rpcrdma_write_array *) 308 - &rmsgp->rm_body.rm_chunks[2]; 309 - 310 - found_it: 311 - if (rp_ary->wc_discrim == 0) 312 - return NULL; 313 - 314 - return rp_ary; 315 - } 316 241 #endif

+1

include/linux/sunrpc/svc_xprt.h

··· 116 116 struct svc_serv *); 117 117 int svc_create_xprt(struct svc_serv *, const char *, struct net *, 118 118 const int, const unsigned short, int); 119 + void svc_xprt_do_enqueue(struct svc_xprt *xprt); 119 120 void svc_xprt_enqueue(struct svc_xprt *xprt); 120 121 void svc_xprt_put(struct svc_xprt *xprt); 121 122 void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);

+14 -7

include/trace/events/sunrpc.h

··· 529 529 530 530 TP_STRUCT__entry( 531 531 __field(struct svc_xprt *, xprt) 532 - __field(struct svc_rqst *, rqst) 532 + __field_struct(struct sockaddr_storage, ss) 533 + __field(int, pid) 534 + __field(unsigned long, flags) 533 535 ), 534 536 535 537 TP_fast_assign( 536 538 __entry->xprt = xprt; 537 - __entry->rqst = rqst; 539 + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss)); 540 + __entry->pid = rqst? rqst->rq_task->pid : 0; 541 + __entry->flags = xprt ? xprt->xpt_flags : 0; 538 542 ), 539 543 540 544 TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt, 541 - (struct sockaddr *)&__entry->xprt->xpt_remote, 542 - __entry->rqst ? __entry->rqst->rq_task->pid : 0, 543 - show_svc_xprt_flags(__entry->xprt->xpt_flags)) 545 + (struct sockaddr *)&__entry->ss, 546 + __entry->pid, show_svc_xprt_flags(__entry->flags)) 544 547 ); 545 548 546 549 TRACE_EVENT(svc_xprt_dequeue, ··· 592 589 TP_STRUCT__entry( 593 590 __field(struct svc_xprt *, xprt) 594 591 __field(int, len) 592 + __field_struct(struct sockaddr_storage, ss) 593 + __field(unsigned long, flags) 595 594 ), 596 595 597 596 TP_fast_assign( 598 597 __entry->xprt = xprt; 598 + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss)); 599 599 __entry->len = len; 600 + __entry->flags = xprt ? xprt->xpt_flags : 0; 600 601 ), 601 602 602 603 TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt, 603 - (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len, 604 - show_svc_xprt_flags(__entry->xprt->xpt_flags)) 604 + (struct sockaddr *)&__entry->ss, 605 + __entry->len, show_svc_xprt_flags(__entry->flags)) 605 606 ); 606 607 #endif /* _TRACE_SUNRPC_H */ 607 608

+1

include/uapi/linux/nfsacl.h

··· 22 22 #define NFS_ACLCNT 0x0002 23 23 #define NFS_DFACL 0x0004 24 24 #define NFS_DFACLCNT 0x0008 25 + #define NFS_ACL_MASK 0x000f 25 26 26 27 /* Flag for Default ACL entries */ 27 28 #define NFS_ACL_DEFAULT 0x1000

+53 -50

net/sunrpc/cache.c

··· 44 44 static void cache_init(struct cache_head *h) 45 45 { 46 46 time_t now = seconds_since_boot(); 47 - h->next = NULL; 47 + INIT_HLIST_NODE(&h->cache_list); 48 48 h->flags = 0; 49 49 kref_init(&h->ref); 50 50 h->expiry_time = now + CACHE_NEW_EXPIRY; ··· 54 54 struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, 55 55 struct cache_head *key, int hash) 56 56 { 57 - struct cache_head **head, **hp; 58 - struct cache_head *new = NULL, *freeme = NULL; 57 + struct cache_head *new = NULL, *freeme = NULL, *tmp = NULL; 58 + struct hlist_head *head; 59 59 60 60 head = &detail->hash_table[hash]; 61 61 62 62 read_lock(&detail->hash_lock); 63 63 64 - for (hp=head; *hp != NULL ; hp = &(*hp)->next) { 65 - struct cache_head *tmp = *hp; 64 + hlist_for_each_entry(tmp, head, cache_list) { 66 65 if (detail->match(tmp, key)) { 67 66 if (cache_is_expired(detail, tmp)) 68 67 /* This entry is expired, we will discard it. */ ··· 87 88 write_lock(&detail->hash_lock); 88 89 89 90 /* check if entry appeared while we slept */ 90 - for (hp=head; *hp != NULL ; hp = &(*hp)->next) { 91 - struct cache_head *tmp = *hp; 91 + hlist_for_each_entry(tmp, head, cache_list) { 92 92 if (detail->match(tmp, key)) { 93 93 if (cache_is_expired(detail, tmp)) { 94 - *hp = tmp->next; 95 - tmp->next = NULL; 94 + hlist_del_init(&tmp->cache_list); 96 95 detail->entries --; 97 96 freeme = tmp; 98 97 break; ··· 101 104 return tmp; 102 105 } 103 106 } 104 - new->next = *head; 105 - *head = new; 107 + 108 + hlist_add_head(&new->cache_list, head); 106 109 detail->entries++; 107 110 cache_get(new); 108 111 write_unlock(&detail->hash_lock); ··· 140 143 * If 'old' is not VALID, we update it directly, 141 144 * otherwise we need to replace it 142 145 */ 143 - struct cache_head **head; 144 146 struct cache_head *tmp; 145 147 146 148 if (!test_bit(CACHE_VALID, &old->flags)) { ··· 164 168 } 165 169 cache_init(tmp); 166 170 detail->init(tmp, old); 167 - head = &detail->hash_table[hash]; 168 171 169 172 write_lock(&detail->hash_lock); 170 173 if (test_bit(CACHE_NEGATIVE, &new->flags)) 171 174 set_bit(CACHE_NEGATIVE, &tmp->flags); 172 175 else 173 176 detail->update(tmp, new); 174 - tmp->next = *head; 175 - *head = tmp; 177 + hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); 176 178 detail->entries++; 177 179 cache_get(tmp); 178 180 cache_fresh_locked(tmp, new->expiry_time); ··· 410 416 /* find a non-empty bucket in the table */ 411 417 while (current_detail && 412 418 current_index < current_detail->hash_size && 413 - current_detail->hash_table[current_index] == NULL) 419 + hlist_empty(&current_detail->hash_table[current_index])) 414 420 current_index++; 415 421 416 422 /* find a cleanable entry in the bucket and clean it, or set to next bucket */ 417 423 418 424 if (current_detail && current_index < current_detail->hash_size) { 419 - struct cache_head *ch, **cp; 425 + struct cache_head *ch = NULL; 420 426 struct cache_detail *d; 427 + struct hlist_head *head; 428 + struct hlist_node *tmp; 421 429 422 430 write_lock(&current_detail->hash_lock); 423 431 424 432 /* Ok, now to clean this strand */ 425 433 426 - cp = & current_detail->hash_table[current_index]; 427 - for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) { 434 + head = &current_detail->hash_table[current_index]; 435 + hlist_for_each_entry_safe(ch, tmp, head, cache_list) { 428 436 if (current_detail->nextcheck > ch->expiry_time) 429 437 current_detail->nextcheck = ch->expiry_time+1; 430 438 if (!cache_is_expired(current_detail, ch)) 431 439 continue; 432 440 433 - *cp = ch->next; 434 - ch->next = NULL; 441 + hlist_del_init(&ch->cache_list); 435 442 current_detail->entries--; 436 443 rv = 1; 437 444 break; ··· 1265 1270 * get a header, then pass each real item in the cache 1266 1271 */ 1267 1272 1268 - struct handle { 1269 - struct cache_detail *cd; 1270 - }; 1271 - 1272 - static void *c_start(struct seq_file *m, loff_t *pos) 1273 + void *cache_seq_start(struct seq_file *m, loff_t *pos) 1273 1274 __acquires(cd->hash_lock) 1274 1275 { 1275 1276 loff_t n = *pos; 1276 1277 unsigned int hash, entry; 1277 1278 struct cache_head *ch; 1278 - struct cache_detail *cd = ((struct handle*)m->private)->cd; 1279 - 1279 + struct cache_detail *cd = m->private; 1280 1280 1281 1281 read_lock(&cd->hash_lock); 1282 1282 if (!n--) ··· 1279 1289 hash = n >> 32; 1280 1290 entry = n & ((1LL<<32) - 1); 1281 1291 1282 - for (ch=cd->hash_table[hash]; ch; ch=ch->next) 1292 + hlist_for_each_entry(ch, &cd->hash_table[hash], cache_list) 1283 1293 if (!entry--) 1284 1294 return ch; 1285 1295 n &= ~((1LL<<32) - 1); ··· 1287 1297 hash++; 1288 1298 n += 1LL<<32; 1289 1299 } while(hash < cd->hash_size && 1290 - cd->hash_table[hash]==NULL); 1300 + hlist_empty(&cd->hash_table[hash])); 1291 1301 if (hash >= cd->hash_size) 1292 1302 return NULL; 1293 1303 *pos = n+1; 1294 - return cd->hash_table[hash]; 1304 + return hlist_entry_safe(cd->hash_table[hash].first, 1305 + struct cache_head, cache_list); 1295 1306 } 1307 + EXPORT_SYMBOL_GPL(cache_seq_start); 1296 1308 1297 - static void *c_next(struct seq_file *m, void *p, loff_t *pos) 1309 + void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) 1298 1310 { 1299 1311 struct cache_head *ch = p; 1300 1312 int hash = (*pos >> 32); 1301 - struct cache_detail *cd = ((struct handle*)m->private)->cd; 1313 + struct cache_detail *cd = m->private; 1302 1314 1303 1315 if (p == SEQ_START_TOKEN) 1304 1316 hash = 0; 1305 - else if (ch->next == NULL) { 1317 + else if (ch->cache_list.next == NULL) { 1306 1318 hash++; 1307 1319 *pos += 1LL<<32; 1308 1320 } else { 1309 1321 ++*pos; 1310 - return ch->next; 1322 + return hlist_entry_safe(ch->cache_list.next, 1323 + struct cache_head, cache_list); 1311 1324 } 1312 1325 *pos &= ~((1LL<<32) - 1); 1313 1326 while (hash < cd->hash_size && 1314 - cd->hash_table[hash] == NULL) { 1327 + hlist_empty(&cd->hash_table[hash])) { 1315 1328 hash++; 1316 1329 *pos += 1LL<<32; 1317 1330 } 1318 1331 if (hash >= cd->hash_size) 1319 1332 return NULL; 1320 1333 ++*pos; 1321 - return cd->hash_table[hash]; 1334 + return hlist_entry_safe(cd->hash_table[hash].first, 1335 + struct cache_head, cache_list); 1322 1336 } 1337 + EXPORT_SYMBOL_GPL(cache_seq_next); 1323 1338 1324 - static void c_stop(struct seq_file *m, void *p) 1339 + void cache_seq_stop(struct seq_file *m, void *p) 1325 1340 __releases(cd->hash_lock) 1326 1341 { 1327 - struct cache_detail *cd = ((struct handle*)m->private)->cd; 1342 + struct cache_detail *cd = m->private; 1328 1343 read_unlock(&cd->hash_lock); 1329 1344 } 1345 + EXPORT_SYMBOL_GPL(cache_seq_stop); 1330 1346 1331 1347 static int c_show(struct seq_file *m, void *p) 1332 1348 { 1333 1349 struct cache_head *cp = p; 1334 - struct cache_detail *cd = ((struct handle*)m->private)->cd; 1350 + struct cache_detail *cd = m->private; 1335 1351 1336 1352 if (p == SEQ_START_TOKEN) 1337 1353 return cd->cache_show(m, cd, NULL); ··· 1360 1364 } 1361 1365 1362 1366 static const struct seq_operations cache_content_op = { 1363 - .start = c_start, 1364 - .next = c_next, 1365 - .stop = c_stop, 1367 + .start = cache_seq_start, 1368 + .next = cache_seq_next, 1369 + .stop = cache_seq_stop, 1366 1370 .show = c_show, 1367 1371 }; 1368 1372 1369 1373 static int content_open(struct inode *inode, struct file *file, 1370 1374 struct cache_detail *cd) 1371 1375 { 1372 - struct handle *han; 1376 + struct seq_file *seq; 1377 + int err; 1373 1378 1374 1379 if (!cd || !try_module_get(cd->owner)) 1375 1380 return -EACCES; 1376 - han = __seq_open_private(file, &cache_content_op, sizeof(*han)); 1377 - if (han == NULL) { 1381 + 1382 + err = seq_open(file, &cache_content_op); 1383 + if (err) { 1378 1384 module_put(cd->owner); 1379 - return -ENOMEM; 1385 + return err; 1380 1386 } 1381 1387 1382 - han->cd = cd; 1388 + seq = file->private_data; 1389 + seq->private = cd; 1383 1390 return 0; 1384 1391 } 1385 1392 1386 1393 static int content_release(struct inode *inode, struct file *file, 1387 1394 struct cache_detail *cd) 1388 1395 { 1389 - int ret = seq_release_private(inode, file); 1396 + int ret = seq_release(inode, file); 1390 1397 module_put(cd->owner); 1391 1398 return ret; 1392 1399 } ··· 1664 1665 struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) 1665 1666 { 1666 1667 struct cache_detail *cd; 1668 + int i; 1667 1669 1668 1670 cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL); 1669 1671 if (cd == NULL) 1670 1672 return ERR_PTR(-ENOMEM); 1671 1673 1672 - cd->hash_table = kzalloc(cd->hash_size * sizeof(struct cache_head *), 1674 + cd->hash_table = kzalloc(cd->hash_size * sizeof(struct hlist_head), 1673 1675 GFP_KERNEL); 1674 1676 if (cd->hash_table == NULL) { 1675 1677 kfree(cd); 1676 1678 return ERR_PTR(-ENOMEM); 1677 1679 } 1680 + 1681 + for (i = 0; i < cd->hash_size; i++) 1682 + INIT_HLIST_HEAD(&cd->hash_table[i]); 1678 1683 cd->net = net; 1679 1684 return cd; 1680 1685 }

+61 -64

net/sunrpc/svc.c

··· 34 34 35 35 static void svc_unregister(const struct svc_serv *serv, struct net *net); 36 36 37 - #define svc_serv_is_pooled(serv) ((serv)->sv_function) 37 + #define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) 38 38 39 - /* 40 - * Mode for mapping cpus to pools. 41 - */ 42 - enum { 43 - SVC_POOL_AUTO = -1, /* choose one of the others */ 44 - SVC_POOL_GLOBAL, /* no mapping, just a single global pool 45 - * (legacy & UP mode) */ 46 - SVC_POOL_PERCPU, /* one pool per cpu */ 47 - SVC_POOL_PERNODE /* one pool per numa node */ 48 - }; 49 39 #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL 50 40 51 41 /* 52 42 * Structure for mapping cpus to pools and vice versa. 53 43 * Setup once during sunrpc initialisation. 54 44 */ 55 - static struct svc_pool_map { 56 - int count; /* How many svc_servs use us */ 57 - int mode; /* Note: int not enum to avoid 58 - * warnings about "enumeration value 59 - * not handled in switch" */ 60 - unsigned int npools; 61 - unsigned int *pool_to; /* maps pool id to cpu or node */ 62 - unsigned int *to_pool; /* maps cpu or node to pool id */ 63 - } svc_pool_map = { 64 - .count = 0, 45 + struct svc_pool_map svc_pool_map = { 65 46 .mode = SVC_POOL_DEFAULT 66 47 }; 48 + EXPORT_SYMBOL_GPL(svc_pool_map); 49 + 67 50 static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ 68 51 69 52 static int ··· 219 236 * vice versa). Initialise the map if we're the first user. 220 237 * Returns the number of pools. 221 238 */ 222 - static unsigned int 239 + unsigned int 223 240 svc_pool_map_get(void) 224 241 { 225 242 struct svc_pool_map *m = &svc_pool_map; ··· 254 271 mutex_unlock(&svc_pool_map_mutex); 255 272 return m->npools; 256 273 } 257 - 274 + EXPORT_SYMBOL_GPL(svc_pool_map_get); 258 275 259 276 /* 260 277 * Drop a reference to the global map of cpus to pools. ··· 263 280 * mode using the pool_mode module option without 264 281 * rebooting or re-loading sunrpc.ko. 265 282 */ 266 - static void 283 + void 267 284 svc_pool_map_put(void) 268 285 { 269 286 struct svc_pool_map *m = &svc_pool_map; ··· 280 297 281 298 mutex_unlock(&svc_pool_map_mutex); 282 299 } 283 - 300 + EXPORT_SYMBOL_GPL(svc_pool_map_put); 284 301 285 302 static int svc_pool_map_get_node(unsigned int pidx) 286 303 { ··· 406 423 */ 407 424 static struct svc_serv * 408 425 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, 409 - void (*shutdown)(struct svc_serv *serv, struct net *net)) 426 + struct svc_serv_ops *ops) 410 427 { 411 428 struct svc_serv *serv; 412 429 unsigned int vers; ··· 423 440 bufsize = RPCSVC_MAXPAYLOAD; 424 441 serv->sv_max_payload = bufsize? bufsize : 4096; 425 442 serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); 426 - serv->sv_shutdown = shutdown; 443 + serv->sv_ops = ops; 427 444 xdrsize = 0; 428 445 while (prog) { 429 446 prog->pg_lovers = prog->pg_nvers-1; ··· 469 486 470 487 struct svc_serv * 471 488 svc_create(struct svc_program *prog, unsigned int bufsize, 472 - void (*shutdown)(struct svc_serv *serv, struct net *net)) 489 + struct svc_serv_ops *ops) 473 490 { 474 - return __svc_create(prog, bufsize, /*npools*/1, shutdown); 491 + return __svc_create(prog, bufsize, /*npools*/1, ops); 475 492 } 476 493 EXPORT_SYMBOL_GPL(svc_create); 477 494 478 495 struct svc_serv * 479 496 svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 480 - void (*shutdown)(struct svc_serv *serv, struct net *net), 481 - svc_thread_fn func, struct module *mod) 497 + struct svc_serv_ops *ops) 482 498 { 483 499 struct svc_serv *serv; 484 500 unsigned int npools = svc_pool_map_get(); 485 501 486 - serv = __svc_create(prog, bufsize, npools, shutdown); 502 + serv = __svc_create(prog, bufsize, npools, ops); 487 503 if (!serv) 488 504 goto out_err; 489 - 490 - serv->sv_function = func; 491 - serv->sv_module = mod; 492 505 return serv; 493 506 out_err: 494 507 svc_pool_map_put(); ··· 496 517 { 497 518 svc_close_net(serv, net); 498 519 499 - if (serv->sv_shutdown) 500 - serv->sv_shutdown(serv, net); 520 + if (serv->sv_ops->svo_shutdown) 521 + serv->sv_ops->svo_shutdown(serv, net); 501 522 } 502 523 EXPORT_SYMBOL_GPL(svc_shutdown_net); 503 524 ··· 583 604 } 584 605 585 606 struct svc_rqst * 586 - svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) 607 + svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) 587 608 { 588 609 struct svc_rqst *rqstp; 589 610 590 611 rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node); 591 612 if (!rqstp) 592 - goto out_enomem; 613 + return rqstp; 593 614 594 - serv->sv_nrthreads++; 595 615 __set_bit(RQ_BUSY, &rqstp->rq_flags); 596 616 spin_lock_init(&rqstp->rq_lock); 597 617 rqstp->rq_server = serv; 598 618 rqstp->rq_pool = pool; 619 + 620 + rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); 621 + if (!rqstp->rq_argp) 622 + goto out_enomem; 623 + 624 + rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); 625 + if (!rqstp->rq_resp) 626 + goto out_enomem; 627 + 628 + if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) 629 + goto out_enomem; 630 + 631 + return rqstp; 632 + out_enomem: 633 + svc_rqst_free(rqstp); 634 + return NULL; 635 + } 636 + EXPORT_SYMBOL_GPL(svc_rqst_alloc); 637 + 638 + struct svc_rqst * 639 + svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) 640 + { 641 + struct svc_rqst *rqstp; 642 + 643 + rqstp = svc_rqst_alloc(serv, pool, node); 644 + if (!rqstp) 645 + return ERR_PTR(-ENOMEM); 646 + 647 + serv->sv_nrthreads++; 599 648 spin_lock_bh(&pool->sp_lock); 600 649 pool->sp_nrthreads++; 601 650 list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); 602 651 spin_unlock_bh(&pool->sp_lock); 603 - 604 - rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); 605 - if (!rqstp->rq_argp) 606 - goto out_thread; 607 - 608 - rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); 609 - if (!rqstp->rq_resp) 610 - goto out_thread; 611 - 612 - if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) 613 - goto out_thread; 614 - 615 652 return rqstp; 616 - out_thread: 617 - svc_exit_thread(rqstp); 618 - out_enomem: 619 - return ERR_PTR(-ENOMEM); 620 653 } 621 654 EXPORT_SYMBOL_GPL(svc_prepare_thread); 622 655 ··· 730 739 break; 731 740 } 732 741 733 - __module_get(serv->sv_module); 734 - task = kthread_create_on_node(serv->sv_function, rqstp, 742 + __module_get(serv->sv_ops->svo_module); 743 + task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, 735 744 node, "%s", serv->sv_name); 736 745 if (IS_ERR(task)) { 737 746 error = PTR_ERR(task); 738 - module_put(serv->sv_module); 747 + module_put(serv->sv_ops->svo_module); 739 748 svc_exit_thread(rqstp); 740 749 break; 741 750 } ··· 763 772 * mutex" for the service. 764 773 */ 765 774 void 766 - svc_exit_thread(struct svc_rqst *rqstp) 775 + svc_rqst_free(struct svc_rqst *rqstp) 767 776 { 768 - struct svc_serv *serv = rqstp->rq_server; 769 - struct svc_pool *pool = rqstp->rq_pool; 770 - 771 777 svc_release_buffer(rqstp); 772 778 kfree(rqstp->rq_resp); 773 779 kfree(rqstp->rq_argp); 774 780 kfree(rqstp->rq_auth_data); 781 + kfree_rcu(rqstp, rq_rcu_head); 782 + } 783 + EXPORT_SYMBOL_GPL(svc_rqst_free); 784 + 785 + void 786 + svc_exit_thread(struct svc_rqst *rqstp) 787 + { 788 + struct svc_serv *serv = rqstp->rq_server; 789 + struct svc_pool *pool = rqstp->rq_pool; 775 790 776 791 spin_lock_bh(&pool->sp_lock); 777 792 pool->sp_nrthreads--; ··· 785 788 list_del_rcu(&rqstp->rq_all); 786 789 spin_unlock_bh(&pool->sp_lock); 787 790 788 - kfree_rcu(rqstp, rq_rcu_head); 791 + svc_rqst_free(rqstp); 789 792 790 793 /* Release the server */ 791 794 if (serv)

+5 -5

net/sunrpc/svc_xprt.c

··· 24 24 static struct cache_deferred_req *svc_defer(struct cache_req *req); 25 25 static void svc_age_temp_xprts(unsigned long closure); 26 26 static void svc_delete_xprt(struct svc_xprt *xprt); 27 - static void svc_xprt_do_enqueue(struct svc_xprt *xprt); 28 27 29 28 /* apparently the "standard" is that clients close 30 29 * idle connections after 5 minutes, servers after ··· 224 225 } 225 226 226 227 /* As soon as we clear busy, the xprt could be closed and 227 - * 'put', so we need a reference to call svc_xprt_do_enqueue with: 228 + * 'put', so we need a reference to call svc_enqueue_xprt with: 228 229 */ 229 230 svc_xprt_get(xprt); 230 231 smp_mb__before_atomic(); 231 232 clear_bit(XPT_BUSY, &xprt->xpt_flags); 232 - svc_xprt_do_enqueue(xprt); 233 + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); 233 234 svc_xprt_put(xprt); 234 235 } 235 236 ··· 319 320 return false; 320 321 } 321 322 322 - static void svc_xprt_do_enqueue(struct svc_xprt *xprt) 323 + void svc_xprt_do_enqueue(struct svc_xprt *xprt) 323 324 { 324 325 struct svc_pool *pool; 325 326 struct svc_rqst *rqstp = NULL; ··· 401 402 out: 402 403 trace_svc_xprt_do_enqueue(xprt, rqstp); 403 404 } 405 + EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); 404 406 405 407 /* 406 408 * Queue up a transport with data pending. If there are idle nfsd ··· 412 412 { 413 413 if (test_bit(XPT_BUSY, &xprt->xpt_flags)) 414 414 return; 415 - svc_xprt_do_enqueue(xprt); 415 + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); 416 416 } 417 417 EXPORT_SYMBOL_GPL(svc_xprt_enqueue); 418 418

+82 -1

net/sunrpc/xprtrdma/svc_rdma_sendto.c

··· 136 136 return dma_addr; 137 137 } 138 138 139 + /* Returns the address of the first read chunk or <nul> if no read chunk 140 + * is present 141 + */ 142 + struct rpcrdma_read_chunk * 143 + svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) 144 + { 145 + struct rpcrdma_read_chunk *ch = 146 + (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; 147 + 148 + if (ch->rc_discrim == xdr_zero) 149 + return NULL; 150 + return ch; 151 + } 152 + 153 + /* Returns the address of the first read write array element or <nul> 154 + * if no write array list is present 155 + */ 156 + static struct rpcrdma_write_array * 157 + svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) 158 + { 159 + if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || 160 + rmsgp->rm_body.rm_chunks[1] == xdr_zero) 161 + return NULL; 162 + return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; 163 + } 164 + 165 + /* Returns the address of the first reply array element or <nul> if no 166 + * reply array is present 167 + */ 168 + static struct rpcrdma_write_array * 169 + svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) 170 + { 171 + struct rpcrdma_read_chunk *rch; 172 + struct rpcrdma_write_array *wr_ary; 173 + struct rpcrdma_write_array *rp_ary; 174 + 175 + /* XXX: Need to fix when reply chunk may occur with read list 176 + * and/or write list. 177 + */ 178 + if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || 179 + rmsgp->rm_body.rm_chunks[1] != xdr_zero) 180 + return NULL; 181 + 182 + rch = svc_rdma_get_read_chunk(rmsgp); 183 + if (rch) { 184 + while (rch->rc_discrim != xdr_zero) 185 + rch++; 186 + 187 + /* The reply chunk follows an empty write array located 188 + * at 'rc_position' here. The reply array is at rc_target. 189 + */ 190 + rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; 191 + goto found_it; 192 + } 193 + 194 + wr_ary = svc_rdma_get_write_array(rmsgp); 195 + if (wr_ary) { 196 + int chunk = be32_to_cpu(wr_ary->wc_nchunks); 197 + 198 + rp_ary = (struct rpcrdma_write_array *) 199 + &wr_ary->wc_array[chunk].wc_target.rs_length; 200 + goto found_it; 201 + } 202 + 203 + /* No read list, no write list */ 204 + rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2]; 205 + 206 + found_it: 207 + if (rp_ary->wc_discrim == xdr_zero) 208 + return NULL; 209 + return rp_ary; 210 + } 211 + 139 212 /* Assumptions: 140 213 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE 141 214 */ ··· 457 384 int byte_count) 458 385 { 459 386 struct ib_send_wr send_wr; 387 + u32 xdr_off; 460 388 int sge_no; 461 389 int sge_bytes; 462 390 int page_no; ··· 492 418 ctxt->direction = DMA_TO_DEVICE; 493 419 494 420 /* Map the payload indicated by 'byte_count' */ 421 + xdr_off = 0; 495 422 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 496 - int xdr_off = 0; 497 423 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 498 424 byte_count -= sge_bytes; 499 425 ctxt->sge[sge_no].addr = ··· 530 456 ctxt->sge[page_no+1].length = 0; 531 457 } 532 458 rqstp->rq_next_page = rqstp->rq_respages + 1; 459 + 460 + /* The loop above bumps sc_dma_used for each sge. The 461 + * xdr_buf.tail gets a separate sge, but resides in the 462 + * same page as xdr_buf.head. Don't count it twice. 463 + */ 464 + if (sge_no > ctxt->count) 465 + atomic_dec(&rdma->sc_dma_used); 533 466 534 467 if (sge_no > rdma->sc_max_sge) { 535 468 pr_err("svcrdma: Too many sges (%d)\n", sge_no);

+2 -35

net/sunrpc/xprtrdma/svc_rdma_transport.c

··· 91 91 .xcl_name = "rdma", 92 92 .xcl_owner = THIS_MODULE, 93 93 .xcl_ops = &svc_rdma_ops, 94 - .xcl_max_payload = RPCRDMA_MAXPAYLOAD, 94 + .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA, 95 95 .xcl_ident = XPRT_TRANSPORT_RDMA, 96 96 }; 97 97 ··· 659 659 if (xprt) { 660 660 set_bit(XPT_CLOSE, &xprt->xpt_flags); 661 661 svc_xprt_enqueue(xprt); 662 + svc_xprt_put(xprt); 662 663 } 663 664 break; 664 665 default: ··· 1200 1199 static int svc_rdma_secure_port(struct svc_rqst *rqstp) 1201 1200 { 1202 1201 return 1; 1203 - } 1204 - 1205 - /* 1206 - * Attempt to register the kvec representing the RPC memory with the 1207 - * device. 1208 - * 1209 - * Returns: 1210 - * NULL : The device does not support fastreg or there were no more 1211 - * fastreg mr. 1212 - * frmr : The kvec register request was successfully posted. 1213 - * <0 : An error was encountered attempting to register the kvec. 1214 - */ 1215 - int svc_rdma_fastreg(struct svcxprt_rdma *xprt, 1216 - struct svc_rdma_fastreg_mr *frmr) 1217 - { 1218 - struct ib_send_wr fastreg_wr; 1219 - u8 key; 1220 - 1221 - /* Bump the key */ 1222 - key = (u8)(frmr->mr->lkey & 0x000000FF); 1223 - ib_update_fast_reg_key(frmr->mr, ++key); 1224 - 1225 - /* Prepare FASTREG WR */ 1226 - memset(&fastreg_wr, 0, sizeof fastreg_wr); 1227 - fastreg_wr.opcode = IB_WR_FAST_REG_MR; 1228 - fastreg_wr.send_flags = IB_SEND_SIGNALED; 1229 - fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; 1230 - fastreg_wr.wr.fast_reg.page_list = frmr->page_list; 1231 - fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; 1232 - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 1233 - fastreg_wr.wr.fast_reg.length = frmr->map_len; 1234 - fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; 1235 - fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; 1236 - return svc_rdma_send(xprt, &fastreg_wr); 1237 1202 } 1238 1203 1239 1204 int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)

-1

net/sunrpc/xprtrdma/xprt_rdma.h

··· 51 51 #include <linux/sunrpc/clnt.h> /* rpc_xprt */ 52 52 #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ 53 53 #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ 54 - #include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */ 55 54 56 55 #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ 57 56 #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */