Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-2.6.32' of git://linux-nfs.org/~bfields/linux

* 'for-2.6.32' of git://linux-nfs.org/~bfields/linux: (68 commits)
nfsd4: nfsv4 clients should cross mountpoints
nfsd: revise 4.1 status documentation
sunrpc/cache: avoid variable over-loading in cache_defer_req
sunrpc/cache: use list_del_init for the list_head entries in cache_deferred_req
nfsd: return success for non-NFS4 nfs4_state_start
nfsd41: Refactor create_client()
nfsd41: modify nfsd4.1 backchannel to use new xprt class
nfsd41: Backchannel: Implement cb_recall over NFSv4.1
nfsd41: Backchannel: cb_sequence callback
nfsd41: Backchannel: Setup sequence information
nfsd41: Backchannel: Server backchannel RPC wait queue
nfsd41: Backchannel: Add sequence arguments to callback RPC arguments
nfsd41: Backchannel: callback infrastructure
nfsd4: use common rpc_cred for all callbacks
nfsd4: allow nfs4 state startup to fail
SUNRPC: Defer the auth_gss upcall when the RPC call is asynchronous
nfsd4: fix null dereference creating nfsv4 callback client
nfsd4: fix whitespace in NFSPROC4_CLNT_CB_NULL definition
nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel
sunrpc/cache: simplify cache_fresh_locked and cache_fresh_unlocked.
...

+1640 -897
+54
Documentation/filesystems/nfs41-server.txt
··· 11 11 control file, the nfsd service must be taken down. Use your user-mode 12 12 nfs-utils to set this up; see rpc.nfsd(8) 13 13 14 + (Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and 15 + "-4", respectively. Therefore, code meant to work on both new and old 16 + kernels must turn 4.1 on or off *before* turning support for version 4 17 + on or off; rpc.nfsd does this correctly.) 18 + 14 19 The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based 15 20 on the latest NFSv4.1 Internet Draft: 16 21 http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29 ··· 29 24 are still under development out of tree. 30 25 See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design 31 26 for more information. 27 + 28 + The current implementation is intended for developers only: while it 29 + does support ordinary file operations on clients we have tested against 30 + (including the linux client), it is incomplete in ways which may limit 31 + features unexpectedly, cause known bugs in rare cases, or cause 32 + interoperability problems with future clients. Known issues: 33 + 34 + - gss support is questionable: currently mounts with kerberos 35 + from a linux client are possible, but we aren't really 36 + conformant with the spec (for example, we don't use kerberos 37 + on the backchannel correctly). 38 + - no trunking support: no clients currently take advantage of 39 + trunking, but this is a mandatory failure, and its use is 40 + recommended to clients in a number of places. (E.g. to ensure 41 + timely renewal in case an existing connection's retry timeouts 42 + have gotten too long; see section 8.3 of the draft.) 43 + Therefore, lack of this feature may cause future clients to 44 + fail. 45 + - Incomplete backchannel support: incomplete backchannel gss 46 + support and no support for BACKCHANNEL_CTL mean that 47 + callbacks (hence delegations and layouts) may not be 48 + available and clients confused by the incomplete 49 + implementation may fail. 50 + - Server reboot recovery is unsupported; if the server reboots, 51 + clients may fail. 52 + - We do not support SSV, which provides security for shared 53 + client-server state (thus preventing unauthorized tampering 54 + with locks and opens, for example). It is mandatory for 55 + servers to support this, though no clients use it yet. 56 + - Mandatory operations which we do not support, such as 57 + DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and 58 + TEST_STATEID, are not currently used by clients, but will be 59 + (and the spec recommends their uses in common cases), and 60 + clients should not be expected to know how to recover from the 61 + case where they are not supported. This will eventually cause 62 + interoperability failures. 63 + 64 + In addition, some limitations are inherited from the current NFSv4 65 + implementation: 66 + 67 + - Incomplete delegation enforcement: if a file is renamed or 68 + unlinked, a client holding a delegation may continue to 69 + indefinitely allow opens of the file under the old name. 32 70 33 71 The table below, taken from the NFSv4.1 document, lists 34 72 the operations that are mandatory to implement (REQ), optional ··· 189 141 +-------------------------+-----------+-------------+---------------+ 190 142 191 143 Implementation notes: 144 + 145 + DELEGPURGE: 146 + * mandatory only for servers that support CLAIM_DELEGATE_PREV and/or 147 + CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that 148 + persist across client reboots). Thus we need not implement this for 149 + now. 192 150 193 151 EXCHANGE_ID: 194 152 * only SP4_NONE state protection supported
+1 -1
fs/lockd/clntlock.c
··· 166 166 */ 167 167 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) 168 168 continue; 169 - if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) 169 + if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) 170 170 continue; 171 171 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) 172 172 continue;
+2 -2
fs/lockd/host.c
··· 111 111 */ 112 112 chain = &nlm_hosts[nlm_hash_address(ni->sap)]; 113 113 hlist_for_each_entry(host, pos, chain, h_hash) { 114 - if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) 114 + if (!rpc_cmp_addr(nlm_addr(host), ni->sap)) 115 115 continue; 116 116 117 117 /* See if we have an NSM handle for this client */ ··· 125 125 if (host->h_server != ni->server) 126 126 continue; 127 127 if (ni->server && 128 - !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) 128 + !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap)) 129 129 continue; 130 130 131 131 /* Move to head of hash chain. */
+1 -1
fs/lockd/mon.c
··· 209 209 struct nsm_handle *nsm; 210 210 211 211 list_for_each_entry(nsm, &nsm_handles, sm_link) 212 - if (nlm_cmp_addr(nsm_addr(nsm), sap)) 212 + if (rpc_cmp_addr(nsm_addr(nsm), sap)) 213 213 return nsm; 214 214 return NULL; 215 215 }
+1 -1
fs/lockd/svcsubs.c
··· 417 417 static int 418 418 nlmsvc_match_ip(void *datap, struct nlm_host *host) 419 419 { 420 - return nlm_cmp_addr(nlm_srcaddr(host), datap); 420 + return rpc_cmp_addr(nlm_srcaddr(host), datap); 421 421 } 422 422 423 423 /**
+2
fs/nfsd/export.c
··· 1341 1341 if (rv) 1342 1342 goto out; 1343 1343 rv = check_nfsd_access(exp, rqstp); 1344 + if (rv) 1345 + fh_put(fhp); 1344 1346 out: 1345 1347 exp_put(exp); 1346 1348 return rv;
+36 -39
fs/nfsd/nfs3xdr.c
··· 814 814 return p; 815 815 } 816 816 817 - static __be32 * 818 - encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, 819 - struct svc_fh *fhp) 820 - { 821 - p = encode_post_op_attr(cd->rqstp, p, fhp); 822 - *p++ = xdr_one; /* yes, a file handle follows */ 823 - p = encode_fh(p, fhp); 824 - fh_put(fhp); 825 - return p; 826 - } 827 - 828 817 static int 829 818 compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, 830 819 const char *name, int namlen) ··· 825 836 dparent = cd->fh.fh_dentry; 826 837 exp = cd->fh.fh_export; 827 838 828 - fh_init(fhp, NFS3_FHSIZE); 829 839 if (isdotent(name, namlen)) { 830 840 if (namlen == 2) { 831 841 dchild = dget_parent(dparent); 832 842 if (dchild == dparent) { 833 843 /* filesystem root - cannot return filehandle for ".." */ 834 844 dput(dchild); 835 - return 1; 845 + return -ENOENT; 836 846 } 837 847 } else 838 848 dchild = dget(dparent); 839 849 } else 840 850 dchild = lookup_one_len(name, dparent, namlen); 841 851 if (IS_ERR(dchild)) 842 - return 1; 843 - if (d_mountpoint(dchild) || 844 - fh_compose(fhp, exp, dchild, &cd->fh) != 0 || 845 - !dchild->d_inode) 846 - rv = 1; 852 + return -ENOENT; 853 + rv = -ENOENT; 854 + if (d_mountpoint(dchild)) 855 + goto out; 856 + rv = fh_compose(fhp, exp, dchild, &cd->fh); 857 + if (rv) 858 + goto out; 859 + if (!dchild->d_inode) 860 + goto out; 861 + rv = 0; 862 + out: 847 863 dput(dchild); 848 864 return rv; 865 + } 866 + 867 + __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) 868 + { 869 + struct svc_fh fh; 870 + int err; 871 + 872 + fh_init(&fh, NFS3_FHSIZE); 873 + err = compose_entry_fh(cd, &fh, name, namlen); 874 + if (err) { 875 + *p++ = 0; 876 + *p++ = 0; 877 + goto out; 878 + } 879 + p = encode_post_op_attr(cd->rqstp, p, &fh); 880 + *p++ = xdr_one; /* yes, a file handle follows */ 881 + p = encode_fh(p, &fh); 882 + out: 883 + fh_put(&fh); 884 + return p; 849 885 } 850 886 851 887 /* ··· 943 929 944 930 p = encode_entry_baggage(cd, p, name, namlen, ino); 945 931 946 - /* throw in readdirplus baggage */ 947 - if (plus) { 948 - struct svc_fh fh; 949 - 950 - if (compose_entry_fh(cd, &fh, name, namlen) > 0) { 951 - *p++ = 0; 952 - *p++ = 0; 953 - } else 954 - p = encode_entryplus_baggage(cd, p, &fh); 955 - } 932 + if (plus) 933 + p = encode_entryplus_baggage(cd, p, name, namlen); 956 934 num_entry_words = p - cd->buffer; 957 935 } else if (cd->rqstp->rq_respages[pn+1] != NULL) { 958 936 /* temporarily encode entry into next page, then move back to ··· 957 951 958 952 p1 = encode_entry_baggage(cd, p1, name, namlen, ino); 959 953 960 - /* throw in readdirplus baggage */ 961 - if (plus) { 962 - struct svc_fh fh; 963 - 964 - if (compose_entry_fh(cd, &fh, name, namlen) > 0) { 965 - /* zero out the filehandle */ 966 - *p1++ = 0; 967 - *p1++ = 0; 968 - } else 969 - p1 = encode_entryplus_baggage(cd, p1, &fh); 970 - } 954 + if (plus) 955 + p = encode_entryplus_baggage(cd, p1, name, namlen); 971 956 972 957 /* determine entry word length and lengths to go in pages */ 973 958 num_entry_words = p1 - tmp;
+2 -2
fs/nfsd/nfs4acl.c
··· 321 321 deny = ~pas.group & pas.other; 322 322 if (deny) { 323 323 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; 324 - ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; 324 + ace->flag = eflag; 325 325 ace->access_mask = deny_mask_from_posix(deny, flags); 326 326 ace->whotype = NFS4_ACL_WHO_GROUP; 327 327 ace++; ··· 335 335 if (deny) { 336 336 ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; 337 337 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; 338 - ace->access_mask = mask_from_posix(deny, flags); 338 + ace->access_mask = deny_mask_from_posix(deny, flags); 339 339 ace->whotype = NFS4_ACL_WHO_NAMED; 340 340 ace->who = pa->e_id; 341 341 ace++;
+221 -42
fs/nfsd/nfs4callback.c
··· 43 43 #include <linux/sunrpc/xdr.h> 44 44 #include <linux/sunrpc/svc.h> 45 45 #include <linux/sunrpc/clnt.h> 46 + #include <linux/sunrpc/svcsock.h> 46 47 #include <linux/nfsd/nfsd.h> 47 48 #include <linux/nfsd/state.h> 48 49 #include <linux/sunrpc/sched.h> 49 50 #include <linux/nfs4.h> 51 + #include <linux/sunrpc/xprtsock.h> 50 52 51 53 #define NFSDDBG_FACILITY NFSDDBG_PROC 52 54 53 55 #define NFSPROC4_CB_NULL 0 54 56 #define NFSPROC4_CB_COMPOUND 1 57 + #define NFS4_STATEID_SIZE 16 55 58 56 59 /* Index of predefined Linux callback client operations */ 57 60 58 61 enum { 59 - NFSPROC4_CLNT_CB_NULL = 0, 62 + NFSPROC4_CLNT_CB_NULL = 0, 60 63 NFSPROC4_CLNT_CB_RECALL, 64 + NFSPROC4_CLNT_CB_SEQUENCE, 61 65 }; 62 66 63 67 enum nfs_cb_opnum4 { 64 68 OP_CB_RECALL = 4, 69 + OP_CB_SEQUENCE = 11, 65 70 }; 66 71 67 72 #define NFS4_MAXTAGLEN 20 ··· 75 70 #define NFS4_dec_cb_null_sz 0 76 71 #define cb_compound_enc_hdr_sz 4 77 72 #define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) 73 + #define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) 74 + #define cb_sequence_enc_sz (sessionid_sz + 4 + \ 75 + 1 /* no referring calls list yet */) 76 + #define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) 77 + 78 78 #define op_enc_sz 1 79 79 #define op_dec_sz 2 80 80 #define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) 81 81 #define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) 82 82 #define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ 83 + cb_sequence_enc_sz + \ 83 84 1 + enc_stateid_sz + \ 84 85 enc_nfs4_fh_sz) 85 86 86 87 #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ 88 + cb_sequence_dec_sz + \ 87 89 op_dec_sz) 90 + 91 + struct nfs4_rpc_args { 92 + void *args_op; 93 + struct nfsd4_cb_sequence args_seq; 94 + }; 88 95 89 96 /* 90 97 * Generic encode routines from fs/nfs/nfs4xdr.c ··· 154 137 } while (0) 155 138 156 139 struct nfs4_cb_compound_hdr { 157 - int status; 158 - u32 ident; 140 + /* args */ 141 + u32 ident; /* minorversion 0 only */ 159 142 u32 nops; 160 143 __be32 *nops_p; 161 144 u32 minorversion; 145 + /* res */ 146 + int status; 162 147 u32 taglen; 163 148 char *tag; 164 149 }; ··· 257 238 hdr->nops++; 258 239 } 259 240 241 + static void 242 + encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, 243 + struct nfs4_cb_compound_hdr *hdr) 244 + { 245 + __be32 *p; 246 + 247 + if (hdr->minorversion == 0) 248 + return; 249 + 250 + RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); 251 + 252 + WRITE32(OP_CB_SEQUENCE); 253 + WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); 254 + WRITE32(args->cbs_clp->cl_cb_seq_nr); 255 + WRITE32(0); /* slotid, always 0 */ 256 + WRITE32(0); /* highest slotid always 0 */ 257 + WRITE32(0); /* cachethis always 0 */ 258 + WRITE32(0); /* FIXME: support referring_call_lists */ 259 + hdr->nops++; 260 + } 261 + 260 262 static int 261 263 nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) 262 264 { ··· 289 249 } 290 250 291 251 static int 292 - nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args) 252 + nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, 253 + struct nfs4_rpc_args *rpc_args) 293 254 { 294 255 struct xdr_stream xdr; 256 + struct nfs4_delegation *args = rpc_args->args_op; 295 257 struct nfs4_cb_compound_hdr hdr = { 296 258 .ident = args->dl_ident, 259 + .minorversion = rpc_args->args_seq.cbs_minorversion, 297 260 }; 298 261 299 262 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 300 263 encode_cb_compound_hdr(&xdr, &hdr); 264 + encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr); 301 265 encode_cb_recall(&xdr, args, &hdr); 302 266 encode_cb_nops(&hdr); 303 267 return 0; ··· 343 299 return 0; 344 300 } 345 301 302 + /* 303 + * Our current back channel implmentation supports a single backchannel 304 + * with a single slot. 305 + */ 306 + static int 307 + decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, 308 + struct rpc_rqst *rqstp) 309 + { 310 + struct nfs4_sessionid id; 311 + int status; 312 + u32 dummy; 313 + __be32 *p; 314 + 315 + if (res->cbs_minorversion == 0) 316 + return 0; 317 + 318 + status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); 319 + if (status) 320 + return status; 321 + 322 + /* 323 + * If the server returns different values for sessionID, slotID or 324 + * sequence number, the server is looney tunes. 325 + */ 326 + status = -ESERVERFAULT; 327 + 328 + READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); 329 + memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 330 + p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); 331 + if (memcmp(id.data, res->cbs_clp->cl_sessionid.data, 332 + NFS4_MAX_SESSIONID_LEN)) { 333 + dprintk("%s Invalid session id\n", __func__); 334 + goto out; 335 + } 336 + READ32(dummy); 337 + if (dummy != res->cbs_clp->cl_cb_seq_nr) { 338 + dprintk("%s Invalid sequence number\n", __func__); 339 + goto out; 340 + } 341 + READ32(dummy); /* slotid must be 0 */ 342 + if (dummy != 0) { 343 + dprintk("%s Invalid slotid\n", __func__); 344 + goto out; 345 + } 346 + /* FIXME: process highest slotid and target highest slotid */ 347 + status = 0; 348 + out: 349 + return status; 350 + } 351 + 352 + 346 353 static int 347 354 nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) 348 355 { ··· 401 306 } 402 307 403 308 static int 404 - nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p) 309 + nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, 310 + struct nfsd4_cb_sequence *seq) 405 311 { 406 312 struct xdr_stream xdr; 407 313 struct nfs4_cb_compound_hdr hdr; ··· 412 316 status = decode_cb_compound_hdr(&xdr, &hdr); 413 317 if (status) 414 318 goto out; 319 + if (seq) { 320 + status = decode_cb_sequence(&xdr, seq, rqstp); 321 + if (status) 322 + goto out; 323 + } 415 324 status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); 416 325 out: 417 326 return status; ··· 478 377 479 378 int setup_callback_client(struct nfs4_client *clp) 480 379 { 481 - struct sockaddr_in addr; 482 380 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 483 381 struct rpc_timeout timeparms = { 484 382 .to_initval = max_cb_time(), 485 383 .to_retries = 0, 486 384 }; 487 385 struct rpc_create_args args = { 488 - .protocol = IPPROTO_TCP, 489 - .address = (struct sockaddr *)&addr, 490 - .addrsize = sizeof(addr), 386 + .protocol = XPRT_TRANSPORT_TCP, 387 + .address = (struct sockaddr *) &cb->cb_addr, 388 + .addrsize = cb->cb_addrlen, 491 389 .timeout = &timeparms, 492 390 .program = &cb_program, 493 391 .prognumber = cb->cb_prog, ··· 499 399 500 400 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 501 401 return -EINVAL; 502 - 503 - /* Initialize address */ 504 - memset(&addr, 0, sizeof(addr)); 505 - addr.sin_family = AF_INET; 506 - addr.sin_port = htons(cb->cb_port); 507 - addr.sin_addr.s_addr = htonl(cb->cb_addr); 508 - 402 + if (cb->cb_minorversion) { 403 + args.bc_xprt = clp->cl_cb_xprt; 404 + args.protocol = XPRT_TRANSPORT_BC_TCP; 405 + } 509 406 /* Create RPC client */ 510 407 client = rpc_create(&args); 511 408 if (IS_ERR(client)) { ··· 536 439 .rpc_call_done = nfsd4_cb_probe_done, 537 440 }; 538 441 539 - static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb) 540 - { 541 - struct auth_cred acred = { 542 - .machine_cred = 1 543 - }; 442 + static struct rpc_cred *callback_cred; 544 443 545 - /* 546 - * Note in the gss case this doesn't actually have to wait for a 547 - * gss upcall (or any calls to the client); this just creates a 548 - * non-uptodate cred which the rpc state machine will fill in with 549 - * a refresh_upcall later. 550 - */ 551 - return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred, 552 - RPCAUTH_LOOKUP_NEW); 444 + int set_callback_cred(void) 445 + { 446 + callback_cred = rpc_lookup_machine_cred(); 447 + if (!callback_cred) 448 + return -ENOMEM; 449 + return 0; 553 450 } 451 + 554 452 555 453 void do_probe_callback(struct nfs4_client *clp) 556 454 { ··· 553 461 struct rpc_message msg = { 554 462 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 555 463 .rpc_argp = clp, 464 + .rpc_cred = callback_cred 556 465 }; 557 - struct rpc_cred *cred; 558 466 int status; 559 467 560 - cred = lookup_cb_cred(cb); 561 - if (IS_ERR(cred)) { 562 - status = PTR_ERR(cred); 563 - goto out; 564 - } 565 - cb->cb_cred = cred; 566 - msg.rpc_cred = cb->cb_cred; 567 468 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, 568 469 &nfsd4_cb_probe_ops, (void *)clp); 569 - out: 570 470 if (status) { 571 471 warn_no_callback_path(clp, status); 572 472 put_nfs4_client(clp); ··· 587 503 do_probe_callback(clp); 588 504 } 589 505 506 + /* 507 + * There's currently a single callback channel slot. 508 + * If the slot is available, then mark it busy. Otherwise, set the 509 + * thread for sleeping on the callback RPC wait queue. 510 + */ 511 + static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, 512 + struct rpc_task *task) 513 + { 514 + struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; 515 + u32 *ptr = (u32 *)clp->cl_sessionid.data; 516 + int status = 0; 517 + 518 + dprintk("%s: %u:%u:%u:%u\n", __func__, 519 + ptr[0], ptr[1], ptr[2], ptr[3]); 520 + 521 + if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { 522 + rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); 523 + dprintk("%s slot is busy\n", __func__); 524 + status = -EAGAIN; 525 + goto out; 526 + } 527 + 528 + /* 529 + * We'll need the clp during XDR encoding and decoding, 530 + * and the sequence during decoding to verify the reply 531 + */ 532 + args->args_seq.cbs_clp = clp; 533 + task->tk_msg.rpc_resp = &args->args_seq; 534 + 535 + out: 536 + dprintk("%s status=%d\n", __func__, status); 537 + return status; 538 + } 539 + 540 + /* 541 + * TODO: cb_sequence should support referring call lists, cachethis, multiple 542 + * slots, and mark callback channel down on communication errors. 543 + */ 544 + static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) 545 + { 546 + struct nfs4_delegation *dp = calldata; 547 + struct nfs4_client *clp = dp->dl_client; 548 + struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; 549 + u32 minorversion = clp->cl_cb_conn.cb_minorversion; 550 + int status = 0; 551 + 552 + args->args_seq.cbs_minorversion = minorversion; 553 + if (minorversion) { 554 + status = nfsd41_cb_setup_sequence(clp, task); 555 + if (status) { 556 + if (status != -EAGAIN) { 557 + /* terminate rpc task */ 558 + task->tk_status = status; 559 + task->tk_action = NULL; 560 + } 561 + return; 562 + } 563 + } 564 + rpc_call_start(task); 565 + } 566 + 567 + static void nfsd4_cb_done(struct rpc_task *task, void *calldata) 568 + { 569 + struct nfs4_delegation *dp = calldata; 570 + struct nfs4_client *clp = dp->dl_client; 571 + 572 + dprintk("%s: minorversion=%d\n", __func__, 573 + clp->cl_cb_conn.cb_minorversion); 574 + 575 + if (clp->cl_cb_conn.cb_minorversion) { 576 + /* No need for lock, access serialized in nfsd4_cb_prepare */ 577 + ++clp->cl_cb_seq_nr; 578 + clear_bit(0, &clp->cl_cb_slot_busy); 579 + rpc_wake_up_next(&clp->cl_cb_waitq); 580 + dprintk("%s: freed slot, new seqid=%d\n", __func__, 581 + clp->cl_cb_seq_nr); 582 + 583 + /* We're done looking into the sequence information */ 584 + task->tk_msg.rpc_resp = NULL; 585 + } 586 + } 587 + 590 588 static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 591 589 { 592 590 struct nfs4_delegation *dp = calldata; 593 591 struct nfs4_client *clp = dp->dl_client; 592 + 593 + nfsd4_cb_done(task, calldata); 594 594 595 595 switch (task->tk_status) { 596 596 case -EIO: ··· 688 520 break; 689 521 default: 690 522 /* success, or error we can't handle */ 691 - return; 523 + goto done; 692 524 } 693 525 if (dp->dl_retries--) { 694 526 rpc_delay(task, 2*HZ); 695 527 task->tk_status = 0; 696 528 rpc_restart_call(task); 529 + return; 697 530 } else { 698 531 atomic_set(&clp->cl_cb_conn.cb_set, 0); 699 532 warn_no_callback_path(clp, task->tk_status); 700 533 } 534 + done: 535 + kfree(task->tk_msg.rpc_argp); 701 536 } 702 537 703 538 static void nfsd4_cb_recall_release(void *calldata) ··· 713 542 } 714 543 715 544 static const struct rpc_call_ops nfsd4_cb_recall_ops = { 545 + .rpc_call_prepare = nfsd4_cb_prepare, 716 546 .rpc_call_done = nfsd4_cb_recall_done, 717 547 .rpc_release = nfsd4_cb_recall_release, 718 548 }; ··· 726 554 { 727 555 struct nfs4_client *clp = dp->dl_client; 728 556 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; 557 + struct nfs4_rpc_args *args; 729 558 struct rpc_message msg = { 730 559 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], 731 - .rpc_argp = dp, 732 - .rpc_cred = clp->cl_cb_conn.cb_cred 560 + .rpc_cred = callback_cred 733 561 }; 734 - int status; 562 + int status = -ENOMEM; 735 563 564 + args = kzalloc(sizeof(*args), GFP_KERNEL); 565 + if (!args) 566 + goto out; 567 + args->args_op = dp; 568 + msg.rpc_argp = args; 736 569 dp->dl_retries = 1; 737 570 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, 738 571 &nfsd4_cb_recall_ops, dp); 572 + out: 739 573 if (status) { 574 + kfree(args); 740 575 put_nfs4_client(clp); 741 576 nfs4_put_delegation(dp); 742 577 }
+45 -44
fs/nfsd/nfs4proc.c
··· 68 68 u32 *bmval, u32 *writable) 69 69 { 70 70 struct dentry *dentry = cstate->current_fh.fh_dentry; 71 - struct svc_export *exp = cstate->current_fh.fh_export; 72 71 73 72 /* 74 73 * Check about attributes are supported by the NFSv4 server or not. ··· 79 80 return nfserr_attrnotsupp; 80 81 81 82 /* 82 - * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported 83 + * Check FATTR4_WORD0_ACL can be supported 83 84 * in current environment or not. 84 85 */ 85 86 if (bmval[0] & FATTR4_WORD0_ACL) { 86 87 if (!IS_POSIXACL(dentry->d_inode)) 87 - return nfserr_attrnotsupp; 88 - } 89 - if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) { 90 - if (exp->ex_fslocs.locations == NULL) 91 88 return nfserr_attrnotsupp; 92 89 } 93 90 ··· 116 121 } 117 122 118 123 return status; 124 + } 125 + 126 + static int 127 + is_create_with_attrs(struct nfsd4_open *open) 128 + { 129 + return open->op_create == NFS4_OPEN_CREATE 130 + && (open->op_createmode == NFS4_CREATE_UNCHECKED 131 + || open->op_createmode == NFS4_CREATE_GUARDED 132 + || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1); 133 + } 134 + 135 + /* 136 + * if error occurs when setting the acl, just clear the acl bit 137 + * in the returned attr bitmap. 138 + */ 139 + static void 140 + do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, 141 + struct nfs4_acl *acl, u32 *bmval) 142 + { 143 + __be32 status; 144 + 145 + status = nfsd4_set_nfs4_acl(rqstp, fhp, acl); 146 + if (status) 147 + /* 148 + * We should probably fail the whole open at this point, 149 + * but we've already created the file, so it's too late; 150 + * So this seems the least of evils: 151 + */ 152 + bmval[0] &= ~FATTR4_WORD0_ACL; 119 153 } 120 154 121 155 static inline void ··· 229 205 } 230 206 if (status) 231 207 goto out; 208 + 209 + if (is_create_with_attrs(open) && open->op_acl != NULL) 210 + do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval); 232 211 233 212 set_change_info(&open->op_cinfo, current_fh); 234 213 fh_dup2(current_fh, &resfh); ··· 563 536 status = nfserr_badtype; 564 537 } 565 538 566 - if (!status) { 567 - fh_unlock(&cstate->current_fh); 568 - set_change_info(&create->cr_cinfo, &cstate->current_fh); 569 - fh_dup2(&cstate->current_fh, &resfh); 570 - } 539 + if (status) 540 + goto out; 571 541 542 + if (create->cr_acl != NULL) 543 + do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, 544 + create->cr_bmval); 545 + 546 + fh_unlock(&cstate->current_fh); 547 + set_change_info(&create->cr_cinfo, &cstate->current_fh); 548 + fh_dup2(&cstate->current_fh, &resfh); 549 + out: 572 550 fh_put(&resfh); 573 551 return status; 574 552 } ··· 979 947 static const char *nfsd4_op_name(unsigned opnum); 980 948 981 949 /* 982 - * This is a replay of a compound for which no cache entry pages 983 - * were used. Encode the sequence operation, and if cachethis is FALSE 984 - * encode the uncache rep error on the next operation. 985 - */ 986 - static __be32 987 - nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args, 988 - struct nfsd4_compoundres *resp) 989 - { 990 - struct nfsd4_op *op; 991 - 992 - dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__, 993 - resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis); 994 - 995 - /* Encode the replayed sequence operation */ 996 - BUG_ON(resp->opcnt != 1); 997 - op = &args->ops[resp->opcnt - 1]; 998 - nfsd4_encode_operation(resp, op); 999 - 1000 - /*return nfserr_retry_uncached_rep in next operation. */ 1001 - if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) { 1002 - op = &args->ops[resp->opcnt++]; 1003 - op->status = nfserr_retry_uncached_rep; 1004 - nfsd4_encode_operation(resp, op); 1005 - } 1006 - return op->status; 1007 - } 1008 - 1009 - /* 1010 950 * Enforce NFSv4.1 COMPOUND ordering rules. 1011 951 * 1012 952 * TODO: ··· 1087 1083 BUG_ON(op->status == nfs_ok); 1088 1084 1089 1085 encode_op: 1090 - /* Only from SEQUENCE or CREATE_SESSION */ 1086 + /* Only from SEQUENCE */ 1091 1087 if (resp->cstate.status == nfserr_replay_cache) { 1092 1088 dprintk("%s NFS4.1 replay from cache\n", __func__); 1093 - if (nfsd4_not_cached(resp)) 1094 - status = nfsd4_enc_uncached_replay(args, resp); 1095 - else 1096 - status = op->status; 1089 + status = op->status; 1097 1090 goto out; 1098 1091 } 1099 1092 if (op->status == nfserr_replay_me) {
+316 -363
fs/nfsd/nfs4state.c
··· 55 55 #include <linux/lockd/bind.h> 56 56 #include <linux/module.h> 57 57 #include <linux/sunrpc/svcauth_gss.h> 58 + #include <linux/sunrpc/clnt.h> 58 59 59 60 #define NFSDDBG_FACILITY NFSDDBG_PROC 60 61 ··· 414 413 } 415 414 416 415 /* 417 - * Give the client the number of slots it requests bound by 418 - * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. 419 - * 420 - * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we 421 - * should (up to a point) re-negotiate active sessions and reduce their 422 - * slot usage to make rooom for new connections. For now we just fail the 423 - * create session. 416 + * The protocol defines ca_maxresponssize_cached to include the size of 417 + * the rpc header, but all we need to cache is the data starting after 418 + * the end of the initial SEQUENCE operation--the rest we regenerate 419 + * each time. Therefore we can advertise a ca_maxresponssize_cached 420 + * value that is the number of bytes in our cache plus a few additional 421 + * bytes. In order to stay on the safe side, and not promise more than 422 + * we can cache, those additional bytes must be the minimum possible: 24 423 + * bytes of rpc header (xid through accept state, with AUTH_NULL 424 + * verifier), 12 for the compound header (with zero-length tag), and 44 425 + * for the SEQUENCE op response: 424 426 */ 425 - static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) 427 + #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) 428 + 429 + /* 430 + * Give the client the number of ca_maxresponsesize_cached slots it 431 + * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, 432 + * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more 433 + * than NFSD_MAX_SLOTS_PER_SESSION. 434 + * 435 + * If we run out of reserved DRC memory we should (up to a point) 436 + * re-negotiate active sessions and reduce their slot usage to make 437 + * rooom for new connections. For now we just fail the create session. 438 + */ 439 + static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) 426 440 { 427 - int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; 441 + int mem, size = fchan->maxresp_cached; 428 442 429 443 if (fchan->maxreqs < 1) 430 444 return nfserr_inval; 431 - else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) 432 - fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; 433 445 434 - spin_lock(&nfsd_serv->sv_lock); 435 - if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) 436 - np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; 437 - nfsd_serv->sv_drc_pages_used += np; 438 - spin_unlock(&nfsd_serv->sv_lock); 446 + if (size < NFSD_MIN_HDR_SEQ_SZ) 447 + size = NFSD_MIN_HDR_SEQ_SZ; 448 + size -= NFSD_MIN_HDR_SEQ_SZ; 449 + if (size > NFSD_SLOT_CACHE_SIZE) 450 + size = NFSD_SLOT_CACHE_SIZE; 439 451 440 - if (np <= 0) { 441 - status = nfserr_resource; 442 - fchan->maxreqs = 0; 443 - } else 444 - fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; 452 + /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */ 453 + mem = fchan->maxreqs * size; 454 + if (mem > NFSD_MAX_MEM_PER_SESSION) { 455 + fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size; 456 + if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) 457 + fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; 458 + mem = fchan->maxreqs * size; 459 + } 445 460 446 - return status; 461 + spin_lock(&nfsd_drc_lock); 462 + /* bound the total session drc memory ussage */ 463 + if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) { 464 + fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size; 465 + mem = fchan->maxreqs * size; 466 + } 467 + nfsd_drc_mem_used += mem; 468 + spin_unlock(&nfsd_drc_lock); 469 + 470 + if (fchan->maxreqs == 0) 471 + return nfserr_serverfault; 472 + 473 + fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; 474 + return 0; 447 475 } 448 476 449 477 /* ··· 496 466 fchan->maxresp_sz = maxcount; 497 467 session_fchan->maxresp_sz = fchan->maxresp_sz; 498 468 499 - /* Set the max response cached size our default which is 500 - * a multiple of PAGE_SIZE and small */ 501 - session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; 502 - fchan->maxresp_cached = session_fchan->maxresp_cached; 503 - 504 469 /* Use the client's maxops if possible */ 505 470 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) 506 471 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; 507 472 session_fchan->maxops = fchan->maxops; 508 473 509 - /* try to use the client requested number of slots */ 510 - if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) 511 - fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; 512 - 513 474 /* FIXME: Error means no more DRC pages so the server should 514 475 * recover pages from existing sessions. For now fail session 515 476 * creation. 516 477 */ 517 - status = set_forechannel_maxreqs(fchan); 478 + status = set_forechannel_drc_size(fchan); 518 479 480 + session_fchan->maxresp_cached = fchan->maxresp_cached; 519 481 session_fchan->maxreqs = fchan->maxreqs; 482 + 483 + dprintk("%s status %d\n", __func__, status); 520 484 return status; 485 + } 486 + 487 + static void 488 + free_session_slots(struct nfsd4_session *ses) 489 + { 490 + int i; 491 + 492 + for (i = 0; i < ses->se_fchannel.maxreqs; i++) 493 + kfree(ses->se_slots[i]); 521 494 } 522 495 523 496 static int ··· 528 495 struct nfsd4_create_session *cses) 529 496 { 530 497 struct nfsd4_session *new, tmp; 531 - int idx, status = nfserr_resource, slotsize; 498 + struct nfsd4_slot *sp; 499 + int idx, slotsize, cachesize, i; 500 + int status; 532 501 533 502 memset(&tmp, 0, sizeof(tmp)); 534 503 ··· 541 506 if (status) 542 507 goto out; 543 508 544 - /* allocate struct nfsd4_session and slot table in one piece */ 545 - slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot); 509 + BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) 510 + + sizeof(struct nfsd4_session) > PAGE_SIZE); 511 + 512 + status = nfserr_serverfault; 513 + /* allocate struct nfsd4_session and slot table pointers in one piece */ 514 + slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); 546 515 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); 547 516 if (!new) 548 517 goto out; 549 518 550 519 memcpy(new, &tmp, sizeof(*new)); 520 + 521 + /* allocate each struct nfsd4_slot and data cache in one piece */ 522 + cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; 523 + for (i = 0; i < new->se_fchannel.maxreqs; i++) { 524 + sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); 525 + if (!sp) 526 + goto out_free; 527 + new->se_slots[i] = sp; 528 + } 551 529 552 530 new->se_client = clp; 553 531 gen_sessionid(new); ··· 578 530 status = nfs_ok; 579 531 out: 580 532 return status; 533 + out_free: 534 + free_session_slots(new); 535 + kfree(new); 536 + goto out; 581 537 } 582 538 583 539 /* caller must hold sessionid_lock */ ··· 624 572 nfsd4_put_session(ses); 625 573 } 626 574 627 - static void nfsd4_release_respages(struct page **respages, short resused); 628 - 629 575 void 630 576 free_session(struct kref *kref) 631 577 { 632 578 struct nfsd4_session *ses; 633 - int i; 634 579 635 580 ses = container_of(kref, struct nfsd4_session, se_ref); 636 - for (i = 0; i < ses->se_fchannel.maxreqs; i++) { 637 - struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; 638 - nfsd4_release_respages(e->ce_respages, e->ce_resused); 639 - } 581 + spin_lock(&nfsd_drc_lock); 582 + nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; 583 + spin_unlock(&nfsd_drc_lock); 584 + free_session_slots(ses); 640 585 kfree(ses); 641 586 } 642 587 ··· 696 647 clp->cl_cb_conn.cb_client = NULL; 697 648 rpc_shutdown_client(clnt); 698 649 } 699 - if (clp->cl_cb_conn.cb_cred) { 700 - put_rpccred(clp->cl_cb_conn.cb_cred); 701 - clp->cl_cb_conn.cb_cred = NULL; 702 - } 703 650 } 704 651 705 652 static inline void 706 653 free_client(struct nfs4_client *clp) 707 654 { 708 655 shutdown_callback_client(clp); 709 - nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, 710 - clp->cl_slot.sl_cache_entry.ce_resused); 656 + if (clp->cl_cb_xprt) 657 + svc_xprt_put(clp->cl_cb_xprt); 711 658 if (clp->cl_cred.cr_group_info) 712 659 put_group_info(clp->cl_cred.cr_group_info); 713 660 kfree(clp->cl_principal); ··· 757 712 release_session(ses); 758 713 } 759 714 put_nfs4_client(clp); 760 - } 761 - 762 - static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) 763 - { 764 - struct nfs4_client *clp; 765 - 766 - clp = alloc_client(name); 767 - if (clp == NULL) 768 - return NULL; 769 - memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); 770 - atomic_set(&clp->cl_count, 1); 771 - atomic_set(&clp->cl_cb_conn.cb_set, 0); 772 - INIT_LIST_HEAD(&clp->cl_idhash); 773 - INIT_LIST_HEAD(&clp->cl_strhash); 774 - INIT_LIST_HEAD(&clp->cl_openowners); 775 - INIT_LIST_HEAD(&clp->cl_delegations); 776 - INIT_LIST_HEAD(&clp->cl_sessions); 777 - INIT_LIST_HEAD(&clp->cl_lru); 778 - return clp; 779 715 } 780 716 781 717 static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) ··· 819 793 p = (u32 *)clp->cl_confirm.data; 820 794 *p++ = get_seconds(); 821 795 *p++ = i++; 796 + } 797 + 798 + static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, 799 + struct svc_rqst *rqstp, nfs4_verifier *verf) 800 + { 801 + struct nfs4_client *clp; 802 + struct sockaddr *sa = svc_addr(rqstp); 803 + char *princ; 804 + 805 + clp = alloc_client(name); 806 + if (clp == NULL) 807 + return NULL; 808 + 809 + princ = svc_gss_principal(rqstp); 810 + if (princ) { 811 + clp->cl_principal = kstrdup(princ, GFP_KERNEL); 812 + if (clp->cl_principal == NULL) { 813 + free_client(clp); 814 + return NULL; 815 + } 816 + } 817 + 818 + memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); 819 + atomic_set(&clp->cl_count, 1); 820 + atomic_set(&clp->cl_cb_conn.cb_set, 0); 821 + INIT_LIST_HEAD(&clp->cl_idhash); 822 + INIT_LIST_HEAD(&clp->cl_strhash); 823 + INIT_LIST_HEAD(&clp->cl_openowners); 824 + INIT_LIST_HEAD(&clp->cl_delegations); 825 + INIT_LIST_HEAD(&clp->cl_sessions); 826 + INIT_LIST_HEAD(&clp->cl_lru); 827 + clear_bit(0, &clp->cl_cb_slot_busy); 828 + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 829 + copy_verf(clp, verf); 830 + rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); 831 + clp->cl_flavor = rqstp->rq_flavor; 832 + copy_cred(&clp->cl_cred, &rqstp->rq_cred); 833 + gen_confirm(clp); 834 + 835 + return clp; 822 836 } 823 837 824 838 static int check_name(struct xdr_netobj name) ··· 968 902 return NULL; 969 903 } 970 904 971 - /* a helper function for parse_callback */ 972 - static int 973 - parse_octet(unsigned int *lenp, char **addrp) 974 - { 975 - unsigned int len = *lenp; 976 - char *p = *addrp; 977 - int n = -1; 978 - char c; 979 - 980 - for (;;) { 981 - if (!len) 982 - break; 983 - len--; 984 - c = *p++; 985 - if (c == '.') 986 - break; 987 - if ((c < '0') || (c > '9')) { 988 - n = -1; 989 - break; 990 - } 991 - if (n < 0) 992 - n = 0; 993 - n = (n * 10) + (c - '0'); 994 - if (n > 255) { 995 - n = -1; 996 - break; 997 - } 998 - } 999 - *lenp = len; 1000 - *addrp = p; 1001 - return n; 1002 - } 1003 - 1004 - /* parse and set the setclientid ipv4 callback address */ 1005 - static int 1006 - parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) 1007 - { 1008 - int temp = 0; 1009 - u32 cbaddr = 0; 1010 - u16 cbport = 0; 1011 - u32 addrlen = addr_len; 1012 - char *addr = addr_val; 1013 - int i, shift; 1014 - 1015 - /* ipaddress */ 1016 - shift = 24; 1017 - for(i = 4; i > 0 ; i--) { 1018 - if ((temp = parse_octet(&addrlen, &addr)) < 0) { 1019 - return 0; 1020 - } 1021 - cbaddr |= (temp << shift); 1022 - if (shift > 0) 1023 - shift -= 8; 1024 - } 1025 - *cbaddrp = cbaddr; 1026 - 1027 - /* port */ 1028 - shift = 8; 1029 - for(i = 2; i > 0 ; i--) { 1030 - if ((temp = parse_octet(&addrlen, &addr)) < 0) { 1031 - return 0; 1032 - } 1033 - cbport |= (temp << shift); 1034 - if (shift > 0) 1035 - shift -= 8; 1036 - } 1037 - *cbportp = cbport; 1038 - return 1; 1039 - } 1040 - 1041 905 static void 1042 - gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) 906 + gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) 1043 907 { 1044 908 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 909 + unsigned short expected_family; 1045 910 1046 - /* Currently, we only support tcp for the callback channel */ 1047 - if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) 911 + /* Currently, we only support tcp and tcp6 for the callback channel */ 912 + if (se->se_callback_netid_len == 3 && 913 + !memcmp(se->se_callback_netid_val, "tcp", 3)) 914 + expected_family = AF_INET; 915 + else if (se->se_callback_netid_len == 4 && 916 + !memcmp(se->se_callback_netid_val, "tcp6", 4)) 917 + expected_family = AF_INET6; 918 + else 1048 919 goto out_err; 1049 920 1050 - if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, 1051 - &cb->cb_addr, &cb->cb_port))) 921 + cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, 922 + se->se_callback_addr_len, 923 + (struct sockaddr *) &cb->cb_addr, 924 + sizeof(cb->cb_addr)); 925 + 926 + if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) 1052 927 goto out_err; 928 + 929 + if (cb->cb_addr.ss_family == AF_INET6) 930 + ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; 931 + 1053 932 cb->cb_minorversion = 0; 1054 933 cb->cb_prog = se->se_callback_prog; 1055 934 cb->cb_ident = se->se_callback_ident; 1056 935 return; 1057 936 out_err: 937 + cb->cb_addr.ss_family = AF_UNSPEC; 938 + cb->cb_addrlen = 0; 1058 939 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " 1059 940 "will not receive delegations\n", 1060 941 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); ··· 1009 996 return; 1010 997 } 1011 998 1012 - void 1013 - nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) 1014 - { 1015 - struct nfsd4_compoundres *resp = rqstp->rq_resp; 1016 - 1017 - resp->cstate.statp = statp; 1018 - } 1019 - 1020 999 /* 1021 - * Dereference the result pages. 1022 - */ 1023 - static void 1024 - nfsd4_release_respages(struct page **respages, short resused) 1025 - { 1026 - int i; 1027 - 1028 - dprintk("--> %s\n", __func__); 1029 - for (i = 0; i < resused; i++) { 1030 - if (!respages[i]) 1031 - continue; 1032 - put_page(respages[i]); 1033 - respages[i] = NULL; 1034 - } 1035 - } 1036 - 1037 - static void 1038 - nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) 1039 - { 1040 - int i; 1041 - 1042 - for (i = 0; i < count; i++) { 1043 - topages[i] = frompages[i]; 1044 - if (!topages[i]) 1045 - continue; 1046 - get_page(topages[i]); 1047 - } 1048 - } 1049 - 1050 - /* 1051 - * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous 1052 - * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total 1053 - * length of the XDR response is less than se_fmaxresp_cached 1054 - * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a 1055 - * of the reply (e.g. readdir). 1056 - * 1057 - * Store the base and length of the rq_req.head[0] page 1058 - * of the NFSv4.1 data, just past the rpc header. 1000 + * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size. 1059 1001 */ 1060 1002 void 1061 1003 nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) 1062 1004 { 1063 - struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; 1064 - struct svc_rqst *rqstp = resp->rqstp; 1065 - struct nfsd4_compoundargs *args = rqstp->rq_argp; 1066 - struct nfsd4_op *op = &args->ops[resp->opcnt]; 1067 - struct kvec *resv = &rqstp->rq_res.head[0]; 1005 + struct nfsd4_slot *slot = resp->cstate.slot; 1006 + unsigned int base; 1068 1007 1069 - dprintk("--> %s entry %p\n", __func__, entry); 1008 + dprintk("--> %s slot %p\n", __func__, slot); 1070 1009 1071 - /* Don't cache a failed OP_SEQUENCE. */ 1072 - if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) 1073 - return; 1074 - 1075 - nfsd4_release_respages(entry->ce_respages, entry->ce_resused); 1076 - entry->ce_opcnt = resp->opcnt; 1077 - entry->ce_status = resp->cstate.status; 1078 - 1079 - /* 1080 - * Don't need a page to cache just the sequence operation - the slot 1081 - * does this for us! 1082 - */ 1010 + slot->sl_opcnt = resp->opcnt; 1011 + slot->sl_status = resp->cstate.status; 1083 1012 1084 1013 if (nfsd4_not_cached(resp)) { 1085 - entry->ce_resused = 0; 1086 - entry->ce_rpchdrlen = 0; 1087 - dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__, 1088 - resp->cstate.slot->sl_cache_entry.ce_cachethis); 1014 + slot->sl_datalen = 0; 1089 1015 return; 1090 1016 } 1091 - entry->ce_resused = rqstp->rq_resused; 1092 - if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1) 1093 - entry->ce_resused = NFSD_PAGES_PER_SLOT + 1; 1094 - nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages, 1095 - entry->ce_resused); 1096 - entry->ce_datav.iov_base = resp->cstate.statp; 1097 - entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp - 1098 - (char *)page_address(rqstp->rq_respages[0])); 1099 - /* Current request rpc header length*/ 1100 - entry->ce_rpchdrlen = (char *)resp->cstate.statp - 1101 - (char *)page_address(rqstp->rq_respages[0]); 1017 + slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; 1018 + base = (char *)resp->cstate.datap - 1019 + (char *)resp->xbuf->head[0].iov_base; 1020 + if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data, 1021 + slot->sl_datalen)) 1022 + WARN("%s: sessions DRC could not cache compound\n", __func__); 1023 + return; 1102 1024 } 1103 1025 1104 1026 /* 1105 - * We keep the rpc header, but take the nfs reply from the replycache. 1027 + * Encode the replay sequence operation from the slot values. 1028 + * If cachethis is FALSE encode the uncached rep error on the next 1029 + * operation which sets resp->p and increments resp->opcnt for 1030 + * nfs4svc_encode_compoundres. 1031 + * 1106 1032 */ 1107 - static int 1108 - nfsd41_copy_replay_data(struct nfsd4_compoundres *resp, 1109 - struct nfsd4_cache_entry *entry) 1033 + static __be32 1034 + nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, 1035 + struct nfsd4_compoundres *resp) 1110 1036 { 1111 - struct svc_rqst *rqstp = resp->rqstp; 1112 - struct kvec *resv = &resp->rqstp->rq_res.head[0]; 1113 - int len; 1037 + struct nfsd4_op *op; 1038 + struct nfsd4_slot *slot = resp->cstate.slot; 1114 1039 1115 - /* Current request rpc header length*/ 1116 - len = (char *)resp->cstate.statp - 1117 - (char *)page_address(rqstp->rq_respages[0]); 1118 - if (entry->ce_datav.iov_len + len > PAGE_SIZE) { 1119 - dprintk("%s v41 cached reply too large (%Zd).\n", __func__, 1120 - entry->ce_datav.iov_len); 1121 - return 0; 1040 + dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__, 1041 + resp->opcnt, resp->cstate.slot->sl_cachethis); 1042 + 1043 + /* Encode the replayed sequence operation */ 1044 + op = &args->ops[resp->opcnt - 1]; 1045 + nfsd4_encode_operation(resp, op); 1046 + 1047 + /* Return nfserr_retry_uncached_rep in next operation. */ 1048 + if (args->opcnt > 1 && slot->sl_cachethis == 0) { 1049 + op = &args->ops[resp->opcnt++]; 1050 + op->status = nfserr_retry_uncached_rep; 1051 + nfsd4_encode_operation(resp, op); 1122 1052 } 1123 - /* copy the cached reply nfsd data past the current rpc header */ 1124 - memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base, 1125 - entry->ce_datav.iov_len); 1126 - resv->iov_len = len + entry->ce_datav.iov_len; 1127 - return 1; 1053 + return op->status; 1128 1054 } 1129 1055 1130 1056 /* 1131 - * Keep the first page of the replay. Copy the NFSv4.1 data from the first 1132 - * cached page. Replace any futher replay pages from the cache. 1057 + * The sequence operation is not cached because we can use the slot and 1058 + * session values. 1133 1059 */ 1134 1060 __be32 1135 1061 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, 1136 1062 struct nfsd4_sequence *seq) 1137 1063 { 1138 - struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; 1064 + struct nfsd4_slot *slot = resp->cstate.slot; 1139 1065 __be32 status; 1140 1066 1141 - dprintk("--> %s entry %p\n", __func__, entry); 1067 + dprintk("--> %s slot %p\n", __func__, slot); 1142 1068 1143 - /* 1144 - * If this is just the sequence operation, we did not keep 1145 - * a page in the cache entry because we can just use the 1146 - * slot info stored in struct nfsd4_sequence that was checked 1147 - * against the slot in nfsd4_sequence(). 1148 - * 1149 - * This occurs when seq->cachethis is FALSE, or when the client 1150 - * session inactivity timer fires and a solo sequence operation 1151 - * is sent (lease renewal). 1152 - */ 1153 - if (seq && nfsd4_not_cached(resp)) { 1154 - seq->maxslots = resp->cstate.session->se_fchannel.maxreqs; 1155 - return nfs_ok; 1156 - } 1069 + /* Either returns 0 or nfserr_retry_uncached */ 1070 + status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); 1071 + if (status == nfserr_retry_uncached_rep) 1072 + return status; 1157 1073 1158 - if (!nfsd41_copy_replay_data(resp, entry)) { 1159 - /* 1160 - * Not enough room to use the replay rpc header, send the 1161 - * cached header. Release all the allocated result pages. 1162 - */ 1163 - svc_free_res_pages(resp->rqstp); 1164 - nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages, 1165 - entry->ce_resused); 1166 - } else { 1167 - /* Release all but the first allocated result page */ 1074 + /* The sequence operation has been encoded, cstate->datap set. */ 1075 + memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); 1168 1076 1169 - resp->rqstp->rq_resused--; 1170 - svc_free_res_pages(resp->rqstp); 1171 - 1172 - nfsd4_copy_pages(&resp->rqstp->rq_respages[1], 1173 - &entry->ce_respages[1], 1174 - entry->ce_resused - 1); 1175 - } 1176 - 1177 - resp->rqstp->rq_resused = entry->ce_resused; 1178 - resp->opcnt = entry->ce_opcnt; 1179 - resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen; 1180 - status = entry->ce_status; 1077 + resp->opcnt = slot->sl_opcnt; 1078 + resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); 1079 + status = slot->sl_status; 1181 1080 1182 1081 return status; 1183 1082 } ··· 1119 1194 int status; 1120 1195 unsigned int strhashval; 1121 1196 char dname[HEXDIR_LEN]; 1197 + char addr_str[INET6_ADDRSTRLEN]; 1122 1198 nfs4_verifier verf = exid->verifier; 1123 - u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; 1199 + struct sockaddr *sa = svc_addr(rqstp); 1124 1200 1201 + rpc_ntop(sa, addr_str, sizeof(addr_str)); 1125 1202 dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " 1126 - " ip_addr=%u flags %x, spa_how %d\n", 1203 + "ip_addr=%s flags %x, spa_how %d\n", 1127 1204 __func__, rqstp, exid, exid->clname.len, exid->clname.data, 1128 - ip_addr, exid->flags, exid->spa_how); 1205 + addr_str, exid->flags, exid->spa_how); 1129 1206 1130 1207 if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) 1131 1208 return nfserr_inval; ··· 1208 1281 1209 1282 out_new: 1210 1283 /* Normal case */ 1211 - new = create_client(exid->clname, dname); 1284 + new = create_client(exid->clname, dname, rqstp, &verf); 1212 1285 if (new == NULL) { 1213 - status = nfserr_resource; 1286 + status = nfserr_serverfault; 1214 1287 goto out; 1215 1288 } 1216 1289 1217 - copy_verf(new, &verf); 1218 - copy_cred(&new->cl_cred, &rqstp->rq_cred); 1219 - new->cl_addr = ip_addr; 1220 1290 gen_clid(new); 1221 - gen_confirm(new); 1222 1291 add_to_unconfirmed(new, strhashval); 1223 1292 out_copy: 1224 1293 exid->clientid.cl_boot = new->cl_clientid.cl_boot; 1225 1294 exid->clientid.cl_id = new->cl_clientid.cl_id; 1226 1295 1227 - new->cl_slot.sl_seqid = 0; 1228 1296 exid->seqid = 1; 1229 1297 nfsd4_set_ex_flags(new, exid); 1230 1298 1231 1299 dprintk("nfsd4_exchange_id seqid %d flags %x\n", 1232 - new->cl_slot.sl_seqid, new->cl_exchange_flags); 1300 + new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); 1233 1301 status = nfs_ok; 1234 1302 1235 1303 out: ··· 1235 1313 } 1236 1314 1237 1315 static int 1238 - check_slot_seqid(u32 seqid, struct nfsd4_slot *slot) 1316 + check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) 1239 1317 { 1240 - dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid, 1241 - slot->sl_seqid); 1318 + dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, 1319 + slot_seqid); 1242 1320 1243 1321 /* The slot is in use, and no response has been sent. */ 1244 - if (slot->sl_inuse) { 1245 - if (seqid == slot->sl_seqid) 1322 + if (slot_inuse) { 1323 + if (seqid == slot_seqid) 1246 1324 return nfserr_jukebox; 1247 1325 else 1248 1326 return nfserr_seq_misordered; 1249 1327 } 1250 1328 /* Normal */ 1251 - if (likely(seqid == slot->sl_seqid + 1)) 1329 + if (likely(seqid == slot_seqid + 1)) 1252 1330 return nfs_ok; 1253 1331 /* Replay */ 1254 - if (seqid == slot->sl_seqid) 1332 + if (seqid == slot_seqid) 1255 1333 return nfserr_replay_cache; 1256 1334 /* Wraparound */ 1257 - if (seqid == 1 && (slot->sl_seqid + 1) == 0) 1335 + if (seqid == 1 && (slot_seqid + 1) == 0) 1258 1336 return nfs_ok; 1259 1337 /* Misordered replay or misordered new request */ 1260 1338 return nfserr_seq_misordered; 1339 + } 1340 + 1341 + /* 1342 + * Cache the create session result into the create session single DRC 1343 + * slot cache by saving the xdr structure. sl_seqid has been set. 1344 + * Do this for solo or embedded create session operations. 1345 + */ 1346 + static void 1347 + nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, 1348 + struct nfsd4_clid_slot *slot, int nfserr) 1349 + { 1350 + slot->sl_status = nfserr; 1351 + memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); 1352 + } 1353 + 1354 + static __be32 1355 + nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, 1356 + struct nfsd4_clid_slot *slot) 1357 + { 1358 + memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses)); 1359 + return slot->sl_status; 1261 1360 } 1262 1361 1263 1362 __be32 ··· 1286 1343 struct nfsd4_compound_state *cstate, 1287 1344 struct nfsd4_create_session *cr_ses) 1288 1345 { 1289 - u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; 1290 - struct nfsd4_compoundres *resp = rqstp->rq_resp; 1346 + struct sockaddr *sa = svc_addr(rqstp); 1291 1347 struct nfs4_client *conf, *unconf; 1292 - struct nfsd4_slot *slot = NULL; 1348 + struct nfsd4_clid_slot *cs_slot = NULL; 1293 1349 int status = 0; 1294 1350 1295 1351 nfs4_lock_state(); ··· 1296 1354 conf = find_confirmed_client(&cr_ses->clientid); 1297 1355 1298 1356 if (conf) { 1299 - slot = &conf->cl_slot; 1300 - status = check_slot_seqid(cr_ses->seqid, slot); 1357 + cs_slot = &conf->cl_cs_slot; 1358 + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); 1301 1359 if (status == nfserr_replay_cache) { 1302 1360 dprintk("Got a create_session replay! seqid= %d\n", 1303 - slot->sl_seqid); 1304 - cstate->slot = slot; 1305 - cstate->status = status; 1361 + cs_slot->sl_seqid); 1306 1362 /* Return the cached reply status */ 1307 - status = nfsd4_replay_cache_entry(resp, NULL); 1363 + status = nfsd4_replay_create_session(cr_ses, cs_slot); 1308 1364 goto out; 1309 - } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { 1365 + } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { 1310 1366 status = nfserr_seq_misordered; 1311 1367 dprintk("Sequence misordered!\n"); 1312 1368 dprintk("Expected seqid= %d but got seqid= %d\n", 1313 - slot->sl_seqid, cr_ses->seqid); 1369 + cs_slot->sl_seqid, cr_ses->seqid); 1314 1370 goto out; 1315 1371 } 1316 - conf->cl_slot.sl_seqid++; 1372 + cs_slot->sl_seqid++; 1317 1373 } else if (unconf) { 1318 1374 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || 1319 - (ip_addr != unconf->cl_addr)) { 1375 + !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { 1320 1376 status = nfserr_clid_inuse; 1321 1377 goto out; 1322 1378 } 1323 1379 1324 - slot = &unconf->cl_slot; 1325 - status = check_slot_seqid(cr_ses->seqid, slot); 1380 + cs_slot = &unconf->cl_cs_slot; 1381 + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); 1326 1382 if (status) { 1327 1383 /* an unconfirmed replay returns misordered */ 1328 1384 status = nfserr_seq_misordered; 1329 - goto out; 1385 + goto out_cache; 1330 1386 } 1331 1387 1332 - slot->sl_seqid++; /* from 0 to 1 */ 1388 + cs_slot->sl_seqid++; /* from 0 to 1 */ 1333 1389 move_to_confirmed(unconf); 1334 1390 1335 1391 /* ··· 1336 1396 cr_ses->flags &= ~SESSION4_PERSIST; 1337 1397 cr_ses->flags &= ~SESSION4_RDMA; 1338 1398 1399 + if (cr_ses->flags & SESSION4_BACK_CHAN) { 1400 + unconf->cl_cb_xprt = rqstp->rq_xprt; 1401 + svc_xprt_get(unconf->cl_cb_xprt); 1402 + rpc_copy_addr( 1403 + (struct sockaddr *)&unconf->cl_cb_conn.cb_addr, 1404 + sa); 1405 + unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa); 1406 + unconf->cl_cb_conn.cb_minorversion = 1407 + cstate->minorversion; 1408 + unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; 1409 + unconf->cl_cb_seq_nr = 1; 1410 + nfsd4_probe_callback(unconf); 1411 + } 1339 1412 conf = unconf; 1340 1413 } else { 1341 1414 status = nfserr_stale_clientid; ··· 1361 1408 1362 1409 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, 1363 1410 NFS4_MAX_SESSIONID_LEN); 1364 - cr_ses->seqid = slot->sl_seqid; 1411 + cr_ses->seqid = cs_slot->sl_seqid; 1365 1412 1366 - slot->sl_inuse = true; 1367 - cstate->slot = slot; 1368 - /* Ensure a page is used for the cache */ 1369 - slot->sl_cache_entry.ce_cachethis = 1; 1413 + out_cache: 1414 + /* cache solo and embedded create sessions under the state lock */ 1415 + nfsd4_cache_create_session(cr_ses, cs_slot, status); 1370 1416 out: 1371 1417 nfs4_unlock_state(); 1372 1418 dprintk("%s returns %d\n", __func__, ntohl(status)); ··· 1430 1478 if (seq->slotid >= session->se_fchannel.maxreqs) 1431 1479 goto out; 1432 1480 1433 - slot = &session->se_slots[seq->slotid]; 1481 + slot = session->se_slots[seq->slotid]; 1434 1482 dprintk("%s: slotid %d\n", __func__, seq->slotid); 1435 1483 1436 - status = check_slot_seqid(seq->seqid, slot); 1484 + /* We do not negotiate the number of slots yet, so set the 1485 + * maxslots to the session maxreqs which is used to encode 1486 + * sr_highest_slotid and the sr_target_slot id to maxslots */ 1487 + seq->maxslots = session->se_fchannel.maxreqs; 1488 + 1489 + status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse); 1437 1490 if (status == nfserr_replay_cache) { 1438 1491 cstate->slot = slot; 1439 1492 cstate->session = session; 1440 1493 /* Return the cached reply status and set cstate->status 1441 - * for nfsd4_svc_encode_compoundres processing */ 1494 + * for nfsd4_proc_compound processing */ 1442 1495 status = nfsd4_replay_cache_entry(resp, seq); 1443 1496 cstate->status = nfserr_replay_cache; 1444 - goto replay_cache; 1497 + goto out; 1445 1498 } 1446 1499 if (status) 1447 1500 goto out; ··· 1454 1497 /* Success! bump slot seqid */ 1455 1498 slot->sl_inuse = true; 1456 1499 slot->sl_seqid = seq->seqid; 1457 - slot->sl_cache_entry.ce_cachethis = seq->cachethis; 1458 - /* Always set the cache entry cachethis for solo sequence */ 1459 - if (nfsd4_is_solo_sequence(resp)) 1460 - slot->sl_cache_entry.ce_cachethis = 1; 1500 + slot->sl_cachethis = seq->cachethis; 1461 1501 1462 1502 cstate->slot = slot; 1463 1503 cstate->session = session; 1464 1504 1465 - replay_cache: 1466 - /* Renew the clientid on success and on replay. 1467 - * Hold a session reference until done processing the compound: 1505 + /* Hold a session reference until done processing the compound: 1468 1506 * nfsd4_put_session called only if the cstate slot is set. 1469 1507 */ 1470 - renew_client(session->se_client); 1471 1508 nfsd4_get_session(session); 1472 1509 out: 1473 1510 spin_unlock(&sessionid_lock); 1511 + /* Renew the clientid on success and on replay */ 1512 + if (cstate->session) { 1513 + nfs4_lock_state(); 1514 + renew_client(session->se_client); 1515 + nfs4_unlock_state(); 1516 + } 1474 1517 dprintk("%s: return %d\n", __func__, ntohl(status)); 1475 1518 return status; 1476 1519 } ··· 1479 1522 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1480 1523 struct nfsd4_setclientid *setclid) 1481 1524 { 1482 - struct sockaddr_in *sin = svc_addr_in(rqstp); 1525 + struct sockaddr *sa = svc_addr(rqstp); 1483 1526 struct xdr_netobj clname = { 1484 1527 .len = setclid->se_namelen, 1485 1528 .data = setclid->se_name, ··· 1488 1531 unsigned int strhashval; 1489 1532 struct nfs4_client *conf, *unconf, *new; 1490 1533 __be32 status; 1491 - char *princ; 1492 1534 char dname[HEXDIR_LEN]; 1493 1535 1494 1536 if (!check_name(clname)) ··· 1510 1554 /* RFC 3530 14.2.33 CASE 0: */ 1511 1555 status = nfserr_clid_inuse; 1512 1556 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { 1513 - dprintk("NFSD: setclientid: string in use by client" 1514 - " at %pI4\n", &conf->cl_addr); 1557 + char addr_str[INET6_ADDRSTRLEN]; 1558 + rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, 1559 + sizeof(addr_str)); 1560 + dprintk("NFSD: setclientid: string in use by client " 1561 + "at %s\n", addr_str); 1515 1562 goto out; 1516 1563 } 1517 1564 } ··· 1532 1573 */ 1533 1574 if (unconf) 1534 1575 expire_client(unconf); 1535 - new = create_client(clname, dname); 1576 + new = create_client(clname, dname, rqstp, &clverifier); 1536 1577 if (new == NULL) 1537 1578 goto out; 1538 1579 gen_clid(new); ··· 1549 1590 */ 1550 1591 expire_client(unconf); 1551 1592 } 1552 - new = create_client(clname, dname); 1593 + new = create_client(clname, dname, rqstp, &clverifier); 1553 1594 if (new == NULL) 1554 1595 goto out; 1555 1596 copy_clid(new, conf); ··· 1559 1600 * probable client reboot; state will be removed if 1560 1601 * confirmed. 1561 1602 */ 1562 - new = create_client(clname, dname); 1603 + new = create_client(clname, dname, rqstp, &clverifier); 1563 1604 if (new == NULL) 1564 1605 goto out; 1565 1606 gen_clid(new); ··· 1570 1611 * confirmed. 1571 1612 */ 1572 1613 expire_client(unconf); 1573 - new = create_client(clname, dname); 1614 + new = create_client(clname, dname, rqstp, &clverifier); 1574 1615 if (new == NULL) 1575 1616 goto out; 1576 1617 gen_clid(new); 1577 1618 } 1578 - copy_verf(new, &clverifier); 1579 - new->cl_addr = sin->sin_addr.s_addr; 1580 - new->cl_flavor = rqstp->rq_flavor; 1581 - princ = svc_gss_principal(rqstp); 1582 - if (princ) { 1583 - new->cl_principal = kstrdup(princ, GFP_KERNEL); 1584 - if (new->cl_principal == NULL) { 1585 - free_client(new); 1586 - goto out; 1587 - } 1588 - } 1589 - copy_cred(&new->cl_cred, &rqstp->rq_cred); 1590 - gen_confirm(new); 1591 - gen_callback(new, setclid); 1619 + gen_callback(new, setclid, rpc_get_scope_id(sa)); 1592 1620 add_to_unconfirmed(new, strhashval); 1593 1621 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 1594 1622 setclid->se_clientid.cl_id = new->cl_clientid.cl_id; ··· 1597 1651 struct nfsd4_compound_state *cstate, 1598 1652 struct nfsd4_setclientid_confirm *setclientid_confirm) 1599 1653 { 1600 - struct sockaddr_in *sin = svc_addr_in(rqstp); 1654 + struct sockaddr *sa = svc_addr(rqstp); 1601 1655 struct nfs4_client *conf, *unconf; 1602 1656 nfs4_verifier confirm = setclientid_confirm->sc_confirm; 1603 1657 clientid_t * clid = &setclientid_confirm->sc_clientid; ··· 1616 1670 unconf = find_unconfirmed_client(clid); 1617 1671 1618 1672 status = nfserr_clid_inuse; 1619 - if (conf && conf->cl_addr != sin->sin_addr.s_addr) 1673 + if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa)) 1620 1674 goto out; 1621 - if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) 1675 + if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa)) 1622 1676 goto out; 1623 1677 1624 1678 /* ··· 4018 4072 4019 4073 /* initialization to perform when the nfsd service is started: */ 4020 4074 4021 - static void 4075 + static int 4022 4076 __nfs4_state_start(void) 4023 4077 { 4024 4078 unsigned long grace_time; ··· 4030 4084 printk(KERN_INFO "NFSD: starting %ld-second grace period\n", 4031 4085 grace_time/HZ); 4032 4086 laundry_wq = create_singlethread_workqueue("nfsd4"); 4087 + if (laundry_wq == NULL) 4088 + return -ENOMEM; 4033 4089 queue_delayed_work(laundry_wq, &laundromat_work, grace_time); 4034 4090 set_max_delegations(); 4091 + return set_callback_cred(); 4035 4092 } 4036 4093 4037 - void 4094 + int 4038 4095 nfs4_state_start(void) 4039 4096 { 4097 + int ret; 4098 + 4040 4099 if (nfs4_init) 4041 - return; 4100 + return 0; 4042 4101 nfsd4_load_reboot_recovery_data(); 4043 - __nfs4_state_start(); 4102 + ret = __nfs4_state_start(); 4103 + if (ret) 4104 + return ret; 4044 4105 nfs4_init = 1; 4045 - return; 4106 + return 0; 4046 4107 } 4047 4108 4048 4109 time_t
+18 -24
fs/nfsd/nfs4xdr.c
··· 1599 1599 static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) 1600 1600 { 1601 1601 struct svc_fh tmp_fh; 1602 - char *path, *rootpath; 1602 + char *path = NULL, *rootpath; 1603 + size_t rootlen; 1603 1604 1604 1605 fh_init(&tmp_fh, NFS4_FHSIZE); 1605 1606 *stat = exp_pseudoroot(rqstp, &tmp_fh); ··· 1610 1609 1611 1610 path = exp->ex_pathname; 1612 1611 1613 - if (strncmp(path, rootpath, strlen(rootpath))) { 1612 + rootlen = strlen(rootpath); 1613 + if (strncmp(path, rootpath, rootlen)) { 1614 1614 dprintk("nfsd: fs_locations failed;" 1615 1615 "%s is not contained in %s\n", path, rootpath); 1616 1616 *stat = nfserr_notsupp; 1617 - return NULL; 1617 + path = NULL; 1618 + goto out; 1618 1619 } 1619 - 1620 - return path + strlen(rootpath); 1620 + path += rootlen; 1621 + out: 1622 + fh_put(&tmp_fh); 1623 + return path; 1621 1624 } 1622 1625 1623 1626 /* ··· 1798 1793 goto out_nfserr; 1799 1794 } 1800 1795 } 1801 - if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { 1802 - if (exp->ex_fslocs.locations == NULL) { 1803 - bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS; 1804 - } 1805 - } 1806 1796 if ((buflen -= 16) < 0) 1807 1797 goto out_resource; 1808 1798 ··· 1825 1825 goto out_resource; 1826 1826 if (!aclsupport) 1827 1827 word0 &= ~FATTR4_WORD0_ACL; 1828 - if (!exp->ex_fslocs.locations) 1829 - word0 &= ~FATTR4_WORD0_FS_LOCATIONS; 1830 1828 if (!word2) { 1831 1829 WRITE32(2); 1832 1830 WRITE32(word0); ··· 3062 3064 WRITE32(0); 3063 3065 3064 3066 ADJUST_ARGS(); 3067 + resp->cstate.datap = p; /* DRC cache data pointer */ 3065 3068 return 0; 3066 3069 } 3067 3070 ··· 3165 3166 return status; 3166 3167 3167 3168 session = resp->cstate.session; 3168 - if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0) 3169 + if (session == NULL || slot->sl_cachethis == 0) 3169 3170 return status; 3170 3171 3171 3172 if (resp->opcnt >= args->opcnt) ··· 3290 3291 /* 3291 3292 * All that remains is to write the tag and operation count... 3292 3293 */ 3294 + struct nfsd4_compound_state *cs = &resp->cstate; 3293 3295 struct kvec *iov; 3294 3296 p = resp->tagp; 3295 3297 *p++ = htonl(resp->taglen); ··· 3304 3304 iov = &rqstp->rq_res.head[0]; 3305 3305 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; 3306 3306 BUG_ON(iov->iov_len > PAGE_SIZE); 3307 - if (nfsd4_has_session(&resp->cstate)) { 3308 - if (resp->cstate.status == nfserr_replay_cache && 3309 - !nfsd4_not_cached(resp)) { 3310 - iov->iov_len = resp->cstate.iovlen; 3311 - } else { 3312 - nfsd4_store_cache_entry(resp); 3313 - dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); 3314 - resp->cstate.slot->sl_inuse = 0; 3315 - } 3316 - if (resp->cstate.session) 3317 - nfsd4_put_session(resp->cstate.session); 3307 + if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) { 3308 + nfsd4_store_cache_entry(resp); 3309 + dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); 3310 + resp->cstate.slot->sl_inuse = false; 3311 + nfsd4_put_session(resp->cstate.session); 3318 3312 } 3319 3313 return 1; 3320 3314 }
+3 -5
fs/nfsd/nfsctl.c
··· 174 174 }; 175 175 176 176 extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 177 + extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); 177 178 178 179 static struct file_operations pool_stats_operations = { 179 180 .open = nfsd_pool_stats_open, 180 181 .read = seq_read, 181 182 .llseek = seq_lseek, 182 - .release = seq_release, 183 + .release = nfsd_pool_stats_release, 183 184 .owner = THIS_MODULE, 184 185 }; 185 186 ··· 777 776 size -= len; 778 777 mesg += len; 779 778 } 780 - 781 - mutex_unlock(&nfsd_mutex); 782 - return (mesg-buf); 783 - 779 + rv = mesg - buf; 784 780 out_free: 785 781 kfree(nthreads); 786 782 mutex_unlock(&nfsd_mutex);
+87 -73
fs/nfsd/nfsfh.c
··· 397 397 fh->ofh_dirino = 0; 398 398 } 399 399 400 - __be32 401 - fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, 402 - struct svc_fh *ref_fh) 400 + static bool is_root_export(struct svc_export *exp) 403 401 { 404 - /* ref_fh is a reference file handle. 405 - * if it is non-null and for the same filesystem, then we should compose 406 - * a filehandle which is of the same version, where possible. 407 - * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca 408 - * Then create a 32byte filehandle using nfs_fhbase_old 409 - * 410 - */ 402 + return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root; 403 + } 411 404 405 + static struct super_block *exp_sb(struct svc_export *exp) 406 + { 407 + return exp->ex_path.dentry->d_inode->i_sb; 408 + } 409 + 410 + static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) 411 + { 412 + switch (fsid_type) { 413 + case FSID_DEV: 414 + if (!old_valid_dev(exp_sb(exp)->s_dev)) 415 + return 0; 416 + /* FALL THROUGH */ 417 + case FSID_MAJOR_MINOR: 418 + case FSID_ENCODE_DEV: 419 + return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV; 420 + case FSID_NUM: 421 + return exp->ex_flags & NFSEXP_FSID; 422 + case FSID_UUID8: 423 + case FSID_UUID16: 424 + if (!is_root_export(exp)) 425 + return 0; 426 + /* fall through */ 427 + case FSID_UUID4_INUM: 428 + case FSID_UUID16_INUM: 429 + return exp->ex_uuid != NULL; 430 + } 431 + return 1; 432 + } 433 + 434 + 435 + static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh) 436 + { 412 437 u8 version; 413 - u8 fsid_type = 0; 414 - struct inode * inode = dentry->d_inode; 415 - struct dentry *parent = dentry->d_parent; 416 - __u32 *datap; 417 - dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev; 418 - int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root); 419 - 420 - dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", 421 - MAJOR(ex_dev), MINOR(ex_dev), 422 - (long) exp->ex_path.dentry->d_inode->i_ino, 423 - parent->d_name.name, dentry->d_name.name, 424 - (inode ? inode->i_ino : 0)); 425 - 426 - /* Choose filehandle version and fsid type based on 427 - * the reference filehandle (if it is in the same export) 428 - * or the export options. 429 - */ 430 - retry: 438 + u8 fsid_type; 439 + retry: 431 440 version = 1; 432 441 if (ref_fh && ref_fh->fh_export == exp) { 433 442 version = ref_fh->fh_handle.fh_version; 434 443 fsid_type = ref_fh->fh_handle.fh_fsid_type; 435 444 436 - if (ref_fh == fhp) 437 - fh_put(ref_fh); 438 445 ref_fh = NULL; 439 446 440 447 switch (version) { ··· 454 447 goto retry; 455 448 } 456 449 457 - /* Need to check that this type works for this 458 - * export point. As the fsid -> filesystem mapping 459 - * was guided by user-space, there is no guarantee 460 - * that the filesystem actually supports that fsid 461 - * type. If it doesn't we loop around again without 462 - * ref_fh set. 450 + /* 451 + * As the fsid -> filesystem mapping was guided by 452 + * user-space, there is no guarantee that the filesystem 453 + * actually supports that fsid type. If it doesn't we 454 + * loop around again without ref_fh set. 463 455 */ 464 - switch(fsid_type) { 465 - case FSID_DEV: 466 - if (!old_valid_dev(ex_dev)) 467 - goto retry; 468 - /* FALL THROUGH */ 469 - case FSID_MAJOR_MINOR: 470 - case FSID_ENCODE_DEV: 471 - if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags 472 - & FS_REQUIRES_DEV)) 473 - goto retry; 474 - break; 475 - case FSID_NUM: 476 - if (! (exp->ex_flags & NFSEXP_FSID)) 477 - goto retry; 478 - break; 479 - case FSID_UUID8: 480 - case FSID_UUID16: 481 - if (!root_export) 482 - goto retry; 483 - /* fall through */ 484 - case FSID_UUID4_INUM: 485 - case FSID_UUID16_INUM: 486 - if (exp->ex_uuid == NULL) 487 - goto retry; 488 - break; 489 - } 456 + if (!fsid_type_ok_for_exp(fsid_type, exp)) 457 + goto retry; 490 458 } else if (exp->ex_flags & NFSEXP_FSID) { 491 459 fsid_type = FSID_NUM; 492 460 } else if (exp->ex_uuid) { 493 461 if (fhp->fh_maxsize >= 64) { 494 - if (root_export) 462 + if (is_root_export(exp)) 495 463 fsid_type = FSID_UUID16; 496 464 else 497 465 fsid_type = FSID_UUID16_INUM; 498 466 } else { 499 - if (root_export) 467 + if (is_root_export(exp)) 500 468 fsid_type = FSID_UUID8; 501 469 else 502 470 fsid_type = FSID_UUID4_INUM; 503 471 } 504 - } else if (!old_valid_dev(ex_dev)) 472 + } else if (!old_valid_dev(exp_sb(exp)->s_dev)) 505 473 /* for newer device numbers, we must use a newer fsid format */ 506 474 fsid_type = FSID_ENCODE_DEV; 507 475 else 508 476 fsid_type = FSID_DEV; 477 + fhp->fh_handle.fh_version = version; 478 + if (version) 479 + fhp->fh_handle.fh_fsid_type = fsid_type; 480 + } 481 + 482 + __be32 483 + fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, 484 + struct svc_fh *ref_fh) 485 + { 486 + /* ref_fh is a reference file handle. 487 + * if it is non-null and for the same filesystem, then we should compose 488 + * a filehandle which is of the same version, where possible. 489 + * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca 490 + * Then create a 32byte filehandle using nfs_fhbase_old 491 + * 492 + */ 493 + 494 + struct inode * inode = dentry->d_inode; 495 + struct dentry *parent = dentry->d_parent; 496 + __u32 *datap; 497 + dev_t ex_dev = exp_sb(exp)->s_dev; 498 + 499 + dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", 500 + MAJOR(ex_dev), MINOR(ex_dev), 501 + (long) exp->ex_path.dentry->d_inode->i_ino, 502 + parent->d_name.name, dentry->d_name.name, 503 + (inode ? inode->i_ino : 0)); 504 + 505 + /* Choose filehandle version and fsid type based on 506 + * the reference filehandle (if it is in the same export) 507 + * or the export options. 508 + */ 509 + set_version_and_fsid_type(fhp, exp, ref_fh); 509 510 510 511 if (ref_fh == fhp) 511 512 fh_put(ref_fh); ··· 531 516 fhp->fh_export = exp; 532 517 cache_get(&exp->h); 533 518 534 - if (version == 0xca) { 519 + if (fhp->fh_handle.fh_version == 0xca) { 535 520 /* old style filehandle please */ 536 521 memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); 537 522 fhp->fh_handle.fh_size = NFS_FHSIZE; ··· 545 530 _fh_update_old(dentry, exp, &fhp->fh_handle); 546 531 } else { 547 532 int len; 548 - fhp->fh_handle.fh_version = 1; 549 533 fhp->fh_handle.fh_auth_type = 0; 550 534 datap = fhp->fh_handle.fh_auth+0; 551 - fhp->fh_handle.fh_fsid_type = fsid_type; 552 - mk_fsid(fsid_type, datap, ex_dev, 535 + mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, 553 536 exp->ex_path.dentry->d_inode->i_ino, 554 537 exp->ex_fsid, exp->ex_uuid); 555 538 556 - len = key_len(fsid_type); 539 + len = key_len(fhp->fh_handle.fh_fsid_type); 557 540 datap += len/4; 558 541 fhp->fh_handle.fh_size = 4 + len; 559 542 560 543 if (inode) 561 544 _fh_update(fhp, exp, dentry); 562 - if (fhp->fh_handle.fh_fileid_type == 255) 545 + if (fhp->fh_handle.fh_fileid_type == 255) { 546 + fh_put(fhp); 563 547 return nfserr_opnotsupp; 548 + } 564 549 } 565 550 566 551 return 0; ··· 654 639 case FSID_DEV: 655 640 case FSID_ENCODE_DEV: 656 641 case FSID_MAJOR_MINOR: 657 - if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags 658 - & FS_REQUIRES_DEV) 642 + if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV) 659 643 return FSIDSOURCE_DEV; 660 644 break; 661 645 case FSID_NUM:
+40 -14
fs/nfsd/nfssvc.c
··· 34 34 #include <linux/nfsd/syscall.h> 35 35 #include <linux/lockd/bind.h> 36 36 #include <linux/nfsacl.h> 37 + #include <linux/seq_file.h> 37 38 38 39 #define NFSDDBG_FACILITY NFSDDBG_SVC 39 40 ··· 66 65 */ 67 66 DEFINE_MUTEX(nfsd_mutex); 68 67 struct svc_serv *nfsd_serv; 68 + 69 + /* 70 + * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. 71 + * nfsd_drc_max_pages limits the total amount of memory available for 72 + * version 4.1 DRC caches. 73 + * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. 74 + */ 75 + spinlock_t nfsd_drc_lock; 76 + unsigned int nfsd_drc_max_mem; 77 + unsigned int nfsd_drc_mem_used; 69 78 70 79 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 71 80 static struct svc_stat nfsd_acl_svcstats; ··· 246 235 */ 247 236 static void set_max_drc(void) 248 237 { 249 - /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ 250 - #define NFSD_DRC_SIZE_SHIFT 7 251 - nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() 252 - >> NFSD_DRC_SIZE_SHIFT; 253 - nfsd_serv->sv_drc_pages_used = 0; 254 - dprintk("%s svc_drc_max_pages %u\n", __func__, 255 - nfsd_serv->sv_drc_max_pages); 238 + #define NFSD_DRC_SIZE_SHIFT 10 239 + nfsd_drc_max_mem = (nr_free_buffer_pages() 240 + >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; 241 + nfsd_drc_mem_used = 0; 242 + spin_lock_init(&nfsd_drc_lock); 243 + dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); 256 244 } 257 245 258 246 int nfsd_create_serv(void) ··· 411 401 error = nfsd_racache_init(2*nrservs); 412 402 if (error<0) 413 403 goto out; 414 - nfs4_state_start(); 404 + error = nfs4_state_start(); 405 + if (error) 406 + goto out; 415 407 416 408 nfsd_reset_versions(); 417 409 ··· 581 569 + rqstp->rq_res.head[0].iov_len; 582 570 rqstp->rq_res.head[0].iov_len += sizeof(__be32); 583 571 584 - /* NFSv4.1 DRC requires statp */ 585 - if (rqstp->rq_vers == 4) 586 - nfsd4_set_statp(rqstp, statp); 587 - 588 572 /* Now call the procedure handler, and encode NFS status. */ 589 573 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); 590 574 nfserr = map_new_errors(rqstp->rq_vers, nfserr); ··· 615 607 616 608 int nfsd_pool_stats_open(struct inode *inode, struct file *file) 617 609 { 618 - if (nfsd_serv == NULL) 610 + int ret; 611 + mutex_lock(&nfsd_mutex); 612 + if (nfsd_serv == NULL) { 613 + mutex_unlock(&nfsd_mutex); 619 614 return -ENODEV; 620 - return svc_pool_stats_open(nfsd_serv, file); 615 + } 616 + /* bump up the psudo refcount while traversing */ 617 + svc_get(nfsd_serv); 618 + ret = svc_pool_stats_open(nfsd_serv, file); 619 + mutex_unlock(&nfsd_mutex); 620 + return ret; 621 + } 622 + 623 + int nfsd_pool_stats_release(struct inode *inode, struct file *file) 624 + { 625 + int ret = seq_release(inode, file); 626 + mutex_lock(&nfsd_mutex); 627 + /* this function really, really should have been called svc_put() */ 628 + svc_destroy(nfsd_serv); 629 + mutex_unlock(&nfsd_mutex); 630 + return ret; 621 631 }
+8 -1
fs/nfsd/vfs.c
··· 89 89 #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 90 90 static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 91 91 92 + static inline int 93 + nfsd_v4client(struct svc_rqst *rq) 94 + { 95 + return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; 96 + } 97 + 92 98 /* 93 99 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 94 100 * a mount point. ··· 121 115 path_put(&path); 122 116 goto out; 123 117 } 124 - if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { 118 + if (nfsd_v4client(rqstp) || 119 + (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { 125 120 /* successfully crossed mount point */ 126 121 /* 127 122 * This is subtle: path.dentry is *not* on path.mnt
-43
include/linux/lockd/lockd.h
··· 338 338 } 339 339 } 340 340 341 - static inline int __nlm_cmp_addr4(const struct sockaddr *sap1, 342 - const struct sockaddr *sap2) 343 - { 344 - const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; 345 - const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; 346 - return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; 347 - } 348 - 349 - #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 350 - static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, 351 - const struct sockaddr *sap2) 352 - { 353 - const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; 354 - const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; 355 - return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); 356 - } 357 - #else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ 358 - static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, 359 - const struct sockaddr *sap2) 360 - { 361 - return 0; 362 - } 363 - #endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ 364 - 365 - /* 366 - * Compare two host addresses 367 - * 368 - * Return TRUE if the addresses are the same; otherwise FALSE. 369 - */ 370 - static inline int nlm_cmp_addr(const struct sockaddr *sap1, 371 - const struct sockaddr *sap2) 372 - { 373 - if (sap1->sa_family == sap2->sa_family) { 374 - switch (sap1->sa_family) { 375 - case AF_INET: 376 - return __nlm_cmp_addr4(sap1, sap2); 377 - case AF_INET6: 378 - return __nlm_cmp_addr6(sap1, sap2); 379 - } 380 - } 381 - return 0; 382 - } 383 - 384 341 /* 385 342 * Compare two NLM locks. 386 343 * When the second lock is of type F_UNLCK, this acts like a wildcard.
+1 -1
include/linux/nfs4.h
··· 234 234 Needs to be updated if more operations are defined in future.*/ 235 235 236 236 #define FIRST_NFS4_OP OP_ACCESS 237 - #define LAST_NFS4_OP OP_RELEASE_LOCKOWNER 237 + #define LAST_NFS4_OP OP_RECLAIM_COMPLETE 238 238 239 239 enum nfsstat4 { 240 240 NFS4_OK = 0,
+5 -2
include/linux/nfsd/nfsd.h
··· 56 56 extern u32 nfsd_supported_minorversion; 57 57 extern struct mutex nfsd_mutex; 58 58 extern struct svc_serv *nfsd_serv; 59 + extern spinlock_t nfsd_drc_lock; 60 + extern unsigned int nfsd_drc_max_mem; 61 + extern unsigned int nfsd_drc_mem_used; 59 62 60 63 extern struct seq_operations nfs_exports_op; 61 64 ··· 166 163 extern unsigned int max_delegations; 167 164 int nfs4_state_init(void); 168 165 void nfsd4_free_slabs(void); 169 - void nfs4_state_start(void); 166 + int nfs4_state_start(void); 170 167 void nfs4_state_shutdown(void); 171 168 time_t nfs4_lease_time(void); 172 169 void nfs4_reset_lease(time_t leasetime); ··· 174 171 #else 175 172 static inline int nfs4_state_init(void) { return 0; } 176 173 static inline void nfsd4_free_slabs(void) { } 177 - static inline void nfs4_state_start(void) { } 174 + static inline int nfs4_state_start(void) { return 0; } 178 175 static inline void nfs4_state_shutdown(void) { } 179 176 static inline time_t nfs4_lease_time(void) { return 0; } 180 177 static inline void nfs4_reset_lease(time_t leasetime) { }
+54 -23
include/linux/nfsd/state.h
··· 60 60 #define si_stateownerid si_opaque.so_stateownerid 61 61 #define si_fileid si_opaque.so_fileid 62 62 63 + struct nfsd4_cb_sequence { 64 + /* args/res */ 65 + u32 cbs_minorversion; 66 + struct nfs4_client *cbs_clp; 67 + }; 68 + 63 69 struct nfs4_delegation { 64 70 struct list_head dl_perfile; 65 71 struct list_head dl_perclnt; ··· 87 81 /* client delegation callback info */ 88 82 struct nfs4_cb_conn { 89 83 /* SETCLIENTID info */ 90 - u32 cb_addr; 91 - unsigned short cb_port; 84 + struct sockaddr_storage cb_addr; 85 + size_t cb_addrlen; 92 86 u32 cb_prog; 93 87 u32 cb_minorversion; 94 88 u32 cb_ident; /* minorversion 0 only */ 95 89 /* RPC client info */ 96 90 atomic_t cb_set; /* successful CB_NULL call */ 97 91 struct rpc_clnt * cb_client; 98 - struct rpc_cred * cb_cred; 99 92 }; 100 93 101 - /* Maximum number of slots per session. 128 is useful for long haul TCP */ 102 - #define NFSD_MAX_SLOTS_PER_SESSION 128 103 - /* Maximum number of pages per slot cache entry */ 104 - #define NFSD_PAGES_PER_SLOT 1 94 + /* Maximum number of slots per session. 160 is useful for long haul TCP */ 95 + #define NFSD_MAX_SLOTS_PER_SESSION 160 105 96 /* Maximum number of operations per session compound */ 106 97 #define NFSD_MAX_OPS_PER_COMPOUND 16 107 - 108 - struct nfsd4_cache_entry { 109 - __be32 ce_status; 110 - struct kvec ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */ 111 - struct page *ce_respages[NFSD_PAGES_PER_SLOT + 1]; 112 - int ce_cachethis; 113 - short ce_resused; 114 - int ce_opcnt; 115 - int ce_rpchdrlen; 116 - }; 98 + /* Maximum session per slot cache size */ 99 + #define NFSD_SLOT_CACHE_SIZE 1024 100 + /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ 101 + #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 102 + #define NFSD_MAX_MEM_PER_SESSION \ 103 + (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE) 117 104 118 105 struct nfsd4_slot { 119 - bool sl_inuse; 120 - u32 sl_seqid; 121 - struct nfsd4_cache_entry sl_cache_entry; 106 + bool sl_inuse; 107 + bool sl_cachethis; 108 + u16 sl_opcnt; 109 + u32 sl_seqid; 110 + __be32 sl_status; 111 + u32 sl_datalen; 112 + char sl_data[]; 122 113 }; 123 114 124 115 struct nfsd4_channel_attrs { ··· 129 126 u32 rdma_attrs; 130 127 }; 131 128 129 + struct nfsd4_create_session { 130 + clientid_t clientid; 131 + struct nfs4_sessionid sessionid; 132 + u32 seqid; 133 + u32 flags; 134 + struct nfsd4_channel_attrs fore_channel; 135 + struct nfsd4_channel_attrs back_channel; 136 + u32 callback_prog; 137 + u32 uid; 138 + u32 gid; 139 + }; 140 + 141 + /* The single slot clientid cache structure */ 142 + struct nfsd4_clid_slot { 143 + u32 sl_seqid; 144 + __be32 sl_status; 145 + struct nfsd4_create_session sl_cr_ses; 146 + }; 147 + 132 148 struct nfsd4_session { 133 149 struct kref se_ref; 134 150 struct list_head se_hash; /* hash by sessionid */ ··· 157 135 struct nfs4_sessionid se_sessionid; 158 136 struct nfsd4_channel_attrs se_fchannel; 159 137 struct nfsd4_channel_attrs se_bchannel; 160 - struct nfsd4_slot se_slots[]; /* forward channel slots */ 138 + struct nfsd4_slot *se_slots[]; /* forward channel slots */ 161 139 }; 162 140 163 141 static inline void ··· 202 180 char cl_recdir[HEXDIR_LEN]; /* recovery dir */ 203 181 nfs4_verifier cl_verifier; /* generated by client */ 204 182 time_t cl_time; /* time of last lease renewal */ 205 - __be32 cl_addr; /* client ipaddress */ 183 + struct sockaddr_storage cl_addr; /* client ipaddress */ 206 184 u32 cl_flavor; /* setclientid pseudoflavor */ 207 185 char *cl_principal; /* setclientid principal name */ 208 186 struct svc_cred cl_cred; /* setclientid principal */ ··· 214 192 215 193 /* for nfs41 */ 216 194 struct list_head cl_sessions; 217 - struct nfsd4_slot cl_slot; /* create_session slot */ 195 + struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ 218 196 u32 cl_exchange_flags; 219 197 struct nfs4_sessionid cl_sessionid; 198 + 199 + /* for nfs41 callbacks */ 200 + /* We currently support a single back channel with a single slot */ 201 + unsigned long cl_cb_slot_busy; 202 + u32 cl_cb_seq_nr; 203 + struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */ 204 + struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ 205 + /* wait here for slots */ 220 206 }; 221 207 222 208 /* struct nfs4_client_reset ··· 375 345 extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 376 346 extern void put_nfs4_client(struct nfs4_client *clp); 377 347 extern void nfs4_free_stateowner(struct kref *kref); 348 + extern int set_callback_cred(void); 378 349 extern void nfsd4_probe_callback(struct nfs4_client *clp); 379 350 extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 380 351 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
+3 -16
include/linux/nfsd/xdr4.h
··· 51 51 /* For sessions DRC */ 52 52 struct nfsd4_session *session; 53 53 struct nfsd4_slot *slot; 54 - __be32 *statp; 54 + __be32 *datap; 55 55 size_t iovlen; 56 56 u32 minorversion; 57 57 u32 status; ··· 366 366 int spa_how; 367 367 }; 368 368 369 - struct nfsd4_create_session { 370 - clientid_t clientid; 371 - struct nfs4_sessionid sessionid; 372 - u32 seqid; 373 - u32 flags; 374 - struct nfsd4_channel_attrs fore_channel; 375 - struct nfsd4_channel_attrs back_channel; 376 - u32 callback_prog; 377 - u32 uid; 378 - u32 gid; 379 - }; 380 - 381 369 struct nfsd4_sequence { 382 370 struct nfs4_sessionid sessionid; /* request/response */ 383 371 u32 seqid; /* request/response */ ··· 467 479 static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) 468 480 { 469 481 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; 470 - return args->opcnt == 1; 482 + return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; 471 483 } 472 484 473 485 static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) 474 486 { 475 - return !resp->cstate.slot->sl_cache_entry.ce_cachethis || 476 - nfsd4_is_solo_sequence(resp); 487 + return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp); 477 488 } 478 489 479 490 #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
+2 -2
include/linux/sunrpc/auth.h
··· 111 111 void (*crdestroy)(struct rpc_cred *); 112 112 113 113 int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); 114 - void (*crbind)(struct rpc_task *, struct rpc_cred *); 114 + void (*crbind)(struct rpc_task *, struct rpc_cred *, int); 115 115 __be32 * (*crmarshal)(struct rpc_task *, __be32 *); 116 116 int (*crrefresh)(struct rpc_task *); 117 117 __be32 * (*crvalidate)(struct rpc_task *, __be32 *); ··· 140 140 void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); 141 141 struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); 142 142 void rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int); 143 - void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *); 143 + void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int); 144 144 void put_rpccred(struct rpc_cred *); 145 145 void rpcauth_unbindcred(struct rpc_task *); 146 146 __be32 * rpcauth_marshcred(struct rpc_task *, __be32 *);
+114
include/linux/sunrpc/clnt.h
··· 22 22 #include <linux/sunrpc/timer.h> 23 23 #include <asm/signal.h> 24 24 #include <linux/path.h> 25 + #include <net/ipv6.h> 25 26 26 27 struct rpc_inode; 27 28 ··· 114 113 rpc_authflavor_t authflavor; 115 114 unsigned long flags; 116 115 char *client_name; 116 + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ 117 117 }; 118 118 119 119 /* Values for "flags" field */ ··· 189 187 190 188 #define IPV6_SCOPE_DELIMITER '%' 191 189 #define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") 190 + 191 + static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, 192 + const struct sockaddr *sap2) 193 + { 194 + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; 195 + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; 196 + 197 + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; 198 + } 199 + 200 + static inline bool __rpc_copy_addr4(struct sockaddr *dst, 201 + const struct sockaddr *src) 202 + { 203 + const struct sockaddr_in *ssin = (struct sockaddr_in *) src; 204 + struct sockaddr_in *dsin = (struct sockaddr_in *) dst; 205 + 206 + dsin->sin_family = ssin->sin_family; 207 + dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; 208 + return true; 209 + } 210 + 211 + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 212 + static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, 213 + const struct sockaddr *sap2) 214 + { 215 + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; 216 + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; 217 + return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); 218 + } 219 + 220 + static inline bool __rpc_copy_addr6(struct sockaddr *dst, 221 + const struct sockaddr *src) 222 + { 223 + const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; 224 + struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; 225 + 226 + dsin6->sin6_family = ssin6->sin6_family; 227 + ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr); 228 + return true; 229 + } 230 + #else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ 231 + static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, 232 + const struct sockaddr *sap2) 233 + { 234 + return false; 235 + } 236 + 237 + static inline bool __rpc_copy_addr6(struct sockaddr *dst, 238 + const struct sockaddr *src) 239 + { 240 + return false; 241 + } 242 + #endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ 243 + 244 + /** 245 + * rpc_cmp_addr - compare the address portion of two sockaddrs. 246 + * @sap1: first sockaddr 247 + * @sap2: second sockaddr 248 + * 249 + * Just compares the family and address portion. Ignores port, scope, etc. 250 + * Returns true if the addrs are equal, false if they aren't. 251 + */ 252 + static inline bool rpc_cmp_addr(const struct sockaddr *sap1, 253 + const struct sockaddr *sap2) 254 + { 255 + if (sap1->sa_family == sap2->sa_family) { 256 + switch (sap1->sa_family) { 257 + case AF_INET: 258 + return __rpc_cmp_addr4(sap1, sap2); 259 + case AF_INET6: 260 + return __rpc_cmp_addr6(sap1, sap2); 261 + } 262 + } 263 + return false; 264 + } 265 + 266 + /** 267 + * rpc_copy_addr - copy the address portion of one sockaddr to another 268 + * @dst: destination sockaddr 269 + * @src: source sockaddr 270 + * 271 + * Just copies the address portion and family. Ignores port, scope, etc. 272 + * Caller is responsible for making certain that dst is large enough to hold 273 + * the address in src. Returns true if address family is supported. Returns 274 + * false otherwise. 275 + */ 276 + static inline bool rpc_copy_addr(struct sockaddr *dst, 277 + const struct sockaddr *src) 278 + { 279 + switch (src->sa_family) { 280 + case AF_INET: 281 + return __rpc_copy_addr4(dst, src); 282 + case AF_INET6: 283 + return __rpc_copy_addr6(dst, src); 284 + } 285 + return false; 286 + } 287 + 288 + /** 289 + * rpc_get_scope_id - return scopeid for a given sockaddr 290 + * @sa: sockaddr to get scopeid from 291 + * 292 + * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if 293 + * not an AF_INET6 address. 294 + */ 295 + static inline u32 rpc_get_scope_id(const struct sockaddr *sa) 296 + { 297 + if (sa->sa_family != AF_INET6) 298 + return 0; 299 + 300 + return ((struct sockaddr_in6 *) sa)->sin6_scope_id; 301 + } 192 302 193 303 #endif /* __KERNEL__ */ 194 304 #endif /* _LINUX_SUNRPC_CLNT_H */
-2
include/linux/sunrpc/svc.h
··· 94 94 struct module * sv_module; /* optional module to count when 95 95 * adding threads */ 96 96 svc_thread_fn sv_function; /* main function for threads */ 97 - unsigned int sv_drc_max_pages; /* Total pages for DRC */ 98 - unsigned int sv_drc_pages_used;/* DRC pages used */ 99 97 #if defined(CONFIG_NFS_V4_1) 100 98 struct list_head sv_cb_list; /* queue for callback requests 101 99 * that arrive over the same
+1
include/linux/sunrpc/svc_xprt.h
··· 65 65 size_t xpt_locallen; /* length of address */ 66 66 struct sockaddr_storage xpt_remote; /* remote peer's address */ 67 67 size_t xpt_remotelen; /* length of address */ 68 + struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ 68 69 }; 69 70 70 71 int svc_reg_xprt_class(struct svc_xprt_class *);
+1
include/linux/sunrpc/svcsock.h
··· 28 28 /* private TCP part */ 29 29 u32 sk_reclen; /* length of record */ 30 30 u32 sk_tcplen; /* current read length */ 31 + struct rpc_xprt *sk_bc_xprt; /* NFSv4.1 backchannel xprt */ 31 32 }; 32 33 33 34 /*
+19
include/linux/sunrpc/xprt.h
··· 124 124 void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); 125 125 }; 126 126 127 + /* 128 + * RPC transport identifiers 129 + * 130 + * To preserve compatibility with the historical use of raw IP protocol 131 + * id's for transport selection, UDP and TCP identifiers are specified 132 + * with the previous values. No such restriction exists for new transports, 133 + * except that they may not collide with these values (17 and 6, 134 + * respectively). 135 + */ 136 + #define XPRT_TRANSPORT_BC (1 << 31) 137 + enum xprt_transports { 138 + XPRT_TRANSPORT_UDP = IPPROTO_UDP, 139 + XPRT_TRANSPORT_TCP = IPPROTO_TCP, 140 + XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, 141 + XPRT_TRANSPORT_RDMA = 256 142 + }; 143 + 127 144 struct rpc_xprt { 128 145 struct kref kref; /* Reference count */ 129 146 struct rpc_xprt_ops * ops; /* transport methods */ ··· 196 179 spinlock_t reserve_lock; /* lock slot table */ 197 180 u32 xid; /* Next XID value to use */ 198 181 struct rpc_task * snd_task; /* Task blocked in send */ 182 + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ 199 183 #if defined(CONFIG_NFS_V4_1) 200 184 struct svc_serv *bc_serv; /* The RPC service which will */ 201 185 /* process the callback */ ··· 249 231 struct sockaddr * srcaddr; /* optional local address */ 250 232 struct sockaddr * dstaddr; /* remote peer address */ 251 233 size_t addrlen; 234 + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ 252 235 }; 253 236 254 237 struct xprt_class {
-5
include/linux/sunrpc/xprtrdma.h
··· 41 41 #define _LINUX_SUNRPC_XPRTRDMA_H 42 42 43 43 /* 44 - * RPC transport identifier for RDMA 45 - */ 46 - #define XPRT_TRANSPORT_RDMA 256 47 - 48 - /* 49 44 * rpcbind (v3+) RDMA netid. 50 45 */ 51 46 #define RPCBIND_NETID_RDMA "rdma"
-11
include/linux/sunrpc/xprtsock.h
··· 13 13 void cleanup_socket_xprt(void); 14 14 15 15 /* 16 - * RPC transport identifiers for UDP, TCP 17 - * 18 - * To preserve compatibility with the historical use of raw IP protocol 19 - * id's for transport selection, these are specified with the previous 20 - * values. No such restriction exists for new transports, except that 21 - * they may not collide with these values (17 and 6, respectively). 22 - */ 23 - #define XPRT_TRANSPORT_UDP IPPROTO_UDP 24 - #define XPRT_TRANSPORT_TCP IPPROTO_TCP 25 - 26 - /* 27 16 * RPC slot table sizes for UDP, TCP transports 28 17 */ 29 18 extern unsigned int xprt_udp_slot_table_entries;
+12 -8
net/sunrpc/auth.c
··· 385 385 EXPORT_SYMBOL_GPL(rpcauth_init_cred); 386 386 387 387 void 388 - rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) 388 + rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) 389 389 { 390 390 task->tk_msg.rpc_cred = get_rpccred(cred); 391 391 dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, ··· 394 394 EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); 395 395 396 396 static void 397 - rpcauth_bind_root_cred(struct rpc_task *task) 397 + rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) 398 398 { 399 399 struct rpc_auth *auth = task->tk_client->cl_auth; 400 400 struct auth_cred acred = { ··· 405 405 406 406 dprintk("RPC: %5u looking up %s cred\n", 407 407 task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); 408 - ret = auth->au_ops->lookup_cred(auth, &acred, 0); 408 + ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); 409 409 if (!IS_ERR(ret)) 410 410 task->tk_msg.rpc_cred = ret; 411 411 else ··· 413 413 } 414 414 415 415 static void 416 - rpcauth_bind_new_cred(struct rpc_task *task) 416 + rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) 417 417 { 418 418 struct rpc_auth *auth = task->tk_client->cl_auth; 419 419 struct rpc_cred *ret; 420 420 421 421 dprintk("RPC: %5u looking up %s cred\n", 422 422 task->tk_pid, auth->au_ops->au_name); 423 - ret = rpcauth_lookupcred(auth, 0); 423 + ret = rpcauth_lookupcred(auth, lookupflags); 424 424 if (!IS_ERR(ret)) 425 425 task->tk_msg.rpc_cred = ret; 426 426 else ··· 430 430 void 431 431 rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) 432 432 { 433 + int lookupflags = 0; 434 + 435 + if (flags & RPC_TASK_ASYNC) 436 + lookupflags |= RPCAUTH_LOOKUP_NEW; 433 437 if (cred != NULL) 434 - cred->cr_ops->crbind(task, cred); 438 + cred->cr_ops->crbind(task, cred, lookupflags); 435 439 else if (flags & RPC_TASK_ROOTCREDS) 436 - rpcauth_bind_root_cred(task); 440 + rpcauth_bind_root_cred(task, lookupflags); 437 441 else 438 - rpcauth_bind_new_cred(task); 442 + rpcauth_bind_new_cred(task, lookupflags); 439 443 } 440 444 441 445 void
+2 -2
net/sunrpc/auth_generic.c
··· 55 55 EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); 56 56 57 57 static void 58 - generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) 58 + generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) 59 59 { 60 60 struct rpc_auth *auth = task->tk_client->cl_auth; 61 61 struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; 62 62 struct rpc_cred *ret; 63 63 64 - ret = auth->au_ops->lookup_cred(auth, acred, 0); 64 + ret = auth->au_ops->lookup_cred(auth, acred, lookupflags); 65 65 if (!IS_ERR(ret)) 66 66 task->tk_msg.rpc_cred = ret; 67 67 else
+4 -2
net/sunrpc/auth_gss/svcauth_gss.c
··· 1374 1374 if (stat) 1375 1375 goto out_err; 1376 1376 break; 1377 - default: 1378 - goto out_err; 1377 + /* 1378 + * For any other gc_svc value, svcauth_gss_accept() already set 1379 + * the auth_error appropriately; just fall through: 1380 + */ 1379 1381 } 1380 1382 1381 1383 out:
+61 -48
net/sunrpc/cache.c
··· 103 103 EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); 104 104 105 105 106 - static void queue_loose(struct cache_detail *detail, struct cache_head *ch); 106 + static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); 107 107 108 - static int cache_fresh_locked(struct cache_head *head, time_t expiry) 108 + static void cache_fresh_locked(struct cache_head *head, time_t expiry) 109 109 { 110 110 head->expiry_time = expiry; 111 111 head->last_refresh = get_seconds(); 112 - return !test_and_set_bit(CACHE_VALID, &head->flags); 112 + set_bit(CACHE_VALID, &head->flags); 113 113 } 114 114 115 115 static void cache_fresh_unlocked(struct cache_head *head, 116 - struct cache_detail *detail, int new) 116 + struct cache_detail *detail) 117 117 { 118 - if (new) 119 - cache_revisit_request(head); 120 118 if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { 121 119 cache_revisit_request(head); 122 - queue_loose(detail, head); 120 + cache_dequeue(detail, head); 123 121 } 124 122 } 125 123 ··· 130 132 */ 131 133 struct cache_head **head; 132 134 struct cache_head *tmp; 133 - int is_new; 134 135 135 136 if (!test_bit(CACHE_VALID, &old->flags)) { 136 137 write_lock(&detail->hash_lock); ··· 138 141 set_bit(CACHE_NEGATIVE, &old->flags); 139 142 else 140 143 detail->update(old, new); 141 - is_new = cache_fresh_locked(old, new->expiry_time); 144 + cache_fresh_locked(old, new->expiry_time); 142 145 write_unlock(&detail->hash_lock); 143 - cache_fresh_unlocked(old, detail, is_new); 146 + cache_fresh_unlocked(old, detail); 144 147 return old; 145 148 } 146 149 write_unlock(&detail->hash_lock); ··· 164 167 *head = tmp; 165 168 detail->entries++; 166 169 cache_get(tmp); 167 - is_new = cache_fresh_locked(tmp, new->expiry_time); 170 + cache_fresh_locked(tmp, new->expiry_time); 168 171 cache_fresh_locked(old, 0); 169 172 write_unlock(&detail->hash_lock); 170 - cache_fresh_unlocked(tmp, detail, is_new); 171 - cache_fresh_unlocked(old, detail, 0); 173 + cache_fresh_unlocked(tmp, detail); 174 + cache_fresh_unlocked(old, detail); 172 175 cache_put(old, detail); 173 176 return tmp; 174 177 } ··· 181 184 return cd->cache_upcall(cd, h); 182 185 } 183 186 187 + static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) 188 + { 189 + if (!test_bit(CACHE_VALID, &h->flags) || 190 + h->expiry_time < get_seconds()) 191 + return -EAGAIN; 192 + else if (detail->flush_time > h->last_refresh) 193 + return -EAGAIN; 194 + else { 195 + /* entry is valid */ 196 + if (test_bit(CACHE_NEGATIVE, &h->flags)) 197 + return -ENOENT; 198 + else 199 + return 0; 200 + } 201 + } 202 + 184 203 /* 185 204 * This is the generic cache management routine for all 186 205 * the authentication caches. ··· 205 192 * 206 193 * 207 194 * Returns 0 if the cache_head can be used, or cache_puts it and returns 208 - * -EAGAIN if upcall is pending, 209 - * -ETIMEDOUT if upcall failed and should be retried, 195 + * -EAGAIN if upcall is pending and request has been queued 196 + * -ETIMEDOUT if upcall failed or request could not be queue or 197 + * upcall completed but item is still invalid (implying that 198 + * the cache item has been replaced with a newer one). 210 199 * -ENOENT if cache entry was negative 211 200 */ 212 201 int cache_check(struct cache_detail *detail, ··· 218 203 long refresh_age, age; 219 204 220 205 /* First decide return status as best we can */ 221 - if (!test_bit(CACHE_VALID, &h->flags) || 222 - h->expiry_time < get_seconds()) 223 - rv = -EAGAIN; 224 - else if (detail->flush_time > h->last_refresh) 225 - rv = -EAGAIN; 226 - else { 227 - /* entry is valid */ 228 - if (test_bit(CACHE_NEGATIVE, &h->flags)) 229 - rv = -ENOENT; 230 - else rv = 0; 231 - } 206 + rv = cache_is_valid(detail, h); 232 207 233 208 /* now see if we want to start an upcall */ 234 209 refresh_age = (h->expiry_time - h->last_refresh); ··· 234 229 switch (cache_make_upcall(detail, h)) { 235 230 case -EINVAL: 236 231 clear_bit(CACHE_PENDING, &h->flags); 232 + cache_revisit_request(h); 237 233 if (rv == -EAGAIN) { 238 234 set_bit(CACHE_NEGATIVE, &h->flags); 239 - cache_fresh_unlocked(h, detail, 240 - cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY)); 235 + cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY); 236 + cache_fresh_unlocked(h, detail); 241 237 rv = -ENOENT; 242 238 } 243 239 break; ··· 251 245 } 252 246 } 253 247 254 - if (rv == -EAGAIN) 255 - if (cache_defer_req(rqstp, h) != 0) 256 - rv = -ETIMEDOUT; 257 - 248 + if (rv == -EAGAIN) { 249 + if (cache_defer_req(rqstp, h) < 0) { 250 + /* Request is not deferred */ 251 + rv = cache_is_valid(detail, h); 252 + if (rv == -EAGAIN) 253 + rv = -ETIMEDOUT; 254 + } 255 + } 258 256 if (rv) 259 257 cache_put(h, detail); 260 258 return rv; ··· 406 396 ) 407 397 continue; 408 398 if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) 409 - queue_loose(current_detail, ch); 399 + cache_dequeue(current_detail, ch); 410 400 411 401 if (atomic_read(&ch->ref.refcount) == 1) 412 402 break; ··· 422 412 if (!ch) 423 413 current_index ++; 424 414 spin_unlock(&cache_list_lock); 425 - if (ch) 415 + if (ch) { 416 + cache_revisit_request(ch); 426 417 cache_put(ch, d); 418 + } 427 419 } else 428 420 spin_unlock(&cache_list_lock); 429 421 ··· 500 488 501 489 static int cache_defer_req(struct cache_req *req, struct cache_head *item) 502 490 { 503 - struct cache_deferred_req *dreq; 491 + struct cache_deferred_req *dreq, *discard; 504 492 int hash = DFR_HASH(item); 505 493 506 494 if (cache_defer_cnt >= DFR_MAX) { ··· 508 496 * or continue and drop the oldest below 509 497 */ 510 498 if (net_random()&1) 511 - return -ETIMEDOUT; 499 + return -ENOMEM; 512 500 } 513 501 dreq = req->defer(req); 514 502 if (dreq == NULL) 515 - return -ETIMEDOUT; 503 + return -ENOMEM; 516 504 517 505 dreq->item = item; 518 506 ··· 525 513 list_add(&dreq->hash, &cache_defer_hash[hash]); 526 514 527 515 /* it is in, now maybe clean up */ 528 - dreq = NULL; 516 + discard = NULL; 529 517 if (++cache_defer_cnt > DFR_MAX) { 530 - dreq = list_entry(cache_defer_list.prev, 531 - struct cache_deferred_req, recent); 532 - list_del(&dreq->recent); 533 - list_del(&dreq->hash); 518 + discard = list_entry(cache_defer_list.prev, 519 + struct cache_deferred_req, recent); 520 + list_del_init(&discard->recent); 521 + list_del_init(&discard->hash); 534 522 cache_defer_cnt--; 535 523 } 536 524 spin_unlock(&cache_defer_lock); 537 525 538 - if (dreq) { 526 + if (discard) 539 527 /* there was one too many */ 540 - dreq->revisit(dreq, 1); 541 - } 528 + discard->revisit(discard, 1); 529 + 542 530 if (!test_bit(CACHE_PENDING, &item->flags)) { 543 531 /* must have just been validated... */ 544 532 cache_revisit_request(item); 533 + return -EAGAIN; 545 534 } 546 535 return 0; 547 536 } ··· 564 551 dreq = list_entry(lp, struct cache_deferred_req, hash); 565 552 lp = lp->next; 566 553 if (dreq->item == item) { 567 - list_del(&dreq->hash); 554 + list_del_init(&dreq->hash); 568 555 list_move(&dreq->recent, &pending); 569 556 cache_defer_cnt--; 570 557 } ··· 590 577 591 578 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { 592 579 if (dreq->owner == owner) { 593 - list_del(&dreq->hash); 580 + list_del_init(&dreq->hash); 594 581 list_move(&dreq->recent, &pending); 595 582 cache_defer_cnt--; 596 583 } ··· 900 887 901 888 902 889 903 - static void queue_loose(struct cache_detail *detail, struct cache_head *ch) 890 + static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch) 904 891 { 905 892 struct cache_queue *cq; 906 893 spin_lock(&queue_lock);
+1
net/sunrpc/clnt.c
··· 288 288 .srcaddr = args->saddress, 289 289 .dstaddr = args->address, 290 290 .addrlen = args->addrsize, 291 + .bc_xprt = args->bc_xprt, 291 292 }; 292 293 char servername[48]; 293 294
+2 -5
net/sunrpc/sched.c
··· 21 21 22 22 #include <linux/sunrpc/clnt.h> 23 23 24 + #include "sunrpc.h" 25 + 24 26 #ifdef RPC_DEBUG 25 27 #define RPCDBG_FACILITY RPCDBG_SCHED 26 28 #define RPC_TASK_MAGIC_ID 0xf00baa ··· 712 710 { 713 711 __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); 714 712 } 715 - 716 - struct rpc_buffer { 717 - size_t len; 718 - char data[]; 719 - }; 720 713 721 714 /** 722 715 * rpc_malloc - allocate an RPC buffer
+14
net/sunrpc/sunrpc.h
··· 27 27 #ifndef _NET_SUNRPC_SUNRPC_H 28 28 #define _NET_SUNRPC_SUNRPC_H 29 29 30 + #include <linux/net.h> 31 + 32 + /* 33 + * Header for dynamically allocated rpc buffers. 34 + */ 35 + struct rpc_buffer { 36 + size_t len; 37 + char data[]; 38 + }; 39 + 30 40 static inline int rpc_reply_expected(struct rpc_task *task) 31 41 { 32 42 return (task->tk_msg.rpc_proc != NULL) && 33 43 (task->tk_msg.rpc_proc->p_decode != NULL); 34 44 } 45 + 46 + int svc_send_common(struct socket *sock, struct xdr_buf *xdr, 47 + struct page *headpage, unsigned long headoffset, 48 + struct page *tailpage, unsigned long tailoffset); 35 49 36 50 #endif /* _NET_SUNRPC_SUNRPC_H */ 37 51
+9 -16
net/sunrpc/svc_xprt.c
··· 160 160 mutex_init(&xprt->xpt_mutex); 161 161 spin_lock_init(&xprt->xpt_lock); 162 162 set_bit(XPT_BUSY, &xprt->xpt_flags); 163 + rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); 163 164 } 164 165 EXPORT_SYMBOL_GPL(svc_xprt_init); 165 166 ··· 711 710 spin_unlock_bh(&pool->sp_lock); 712 711 713 712 len = 0; 714 - if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { 715 - dprintk("svc_recv: found XPT_CLOSE\n"); 716 - svc_delete_xprt(xprt); 717 - } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { 713 + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { 718 714 struct svc_xprt *newxpt; 719 715 newxpt = xprt->xpt_ops->xpo_accept(xprt); 720 716 if (newxpt) { ··· 737 739 svc_xprt_received(newxpt); 738 740 } 739 741 svc_xprt_received(xprt); 740 - } else { 742 + } else if (!test_bit(XPT_CLOSE, &xprt->xpt_flags)) { 741 743 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", 742 744 rqstp, pool->sp_id, xprt, 743 745 atomic_read(&xprt->xpt_ref.refcount)); ··· 748 750 } else 749 751 len = xprt->xpt_ops->xpo_recvfrom(rqstp); 750 752 dprintk("svc: got len=%d\n", len); 753 + } 754 + 755 + if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { 756 + dprintk("svc_recv: found XPT_CLOSE\n"); 757 + svc_delete_xprt(xprt); 751 758 } 752 759 753 760 /* No data, incomplete (TCP) read, or accept() */ ··· 811 808 else 812 809 len = xprt->xpt_ops->xpo_sendto(rqstp); 813 810 mutex_unlock(&xprt->xpt_mutex); 811 + rpc_wake_up(&xprt->xpt_bc_pending); 814 812 svc_xprt_release(rqstp); 815 813 816 814 if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) ··· 1170 1166 1171 1167 dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); 1172 1168 1173 - lock_kernel(); 1174 - /* bump up the pseudo refcount while traversing */ 1175 - svc_get(serv); 1176 - unlock_kernel(); 1177 - 1178 1169 if (!pidx) 1179 1170 return SEQ_START_TOKEN; 1180 1171 return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]); ··· 1197 1198 1198 1199 static void svc_pool_stats_stop(struct seq_file *m, void *p) 1199 1200 { 1200 - struct svc_serv *serv = m->private; 1201 - 1202 - lock_kernel(); 1203 - /* this function really, really should have been called svc_put() */ 1204 - svc_destroy(serv); 1205 - unlock_kernel(); 1206 1201 } 1207 1202 1208 1203 static int svc_pool_stats_show(struct seq_file *m, void *p)
+1
net/sunrpc/svcauth_unix.c
··· 668 668 case 0: 669 669 *gip = ug->gi; 670 670 get_group_info(*gip); 671 + cache_put(&ug->h, &unix_gid_cache); 671 672 return 0; 672 673 default: 673 674 return -EAGAIN;
+244 -91
net/sunrpc/svcsock.c
··· 49 49 #include <linux/sunrpc/msg_prot.h> 50 50 #include <linux/sunrpc/svcsock.h> 51 51 #include <linux/sunrpc/stats.h> 52 + #include <linux/sunrpc/xprt.h> 52 53 53 54 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 54 55 ··· 154 153 } 155 154 156 155 /* 157 - * Generic sendto routine 156 + * send routine intended to be shared by the fore- and back-channel 158 157 */ 159 - static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) 158 + int svc_send_common(struct socket *sock, struct xdr_buf *xdr, 159 + struct page *headpage, unsigned long headoffset, 160 + struct page *tailpage, unsigned long tailoffset) 160 161 { 161 - struct svc_sock *svsk = 162 - container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 163 - struct socket *sock = svsk->sk_sock; 164 - int slen; 165 - union { 166 - struct cmsghdr hdr; 167 - long all[SVC_PKTINFO_SPACE / sizeof(long)]; 168 - } buffer; 169 - struct cmsghdr *cmh = &buffer.hdr; 170 - int len = 0; 171 162 int result; 172 163 int size; 173 164 struct page **ppage = xdr->pages; 174 165 size_t base = xdr->page_base; 175 166 unsigned int pglen = xdr->page_len; 176 167 unsigned int flags = MSG_MORE; 177 - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 168 + int slen; 169 + int len = 0; 178 170 179 171 slen = xdr->len; 180 - 181 - if (rqstp->rq_prot == IPPROTO_UDP) { 182 - struct msghdr msg = { 183 - .msg_name = &rqstp->rq_addr, 184 - .msg_namelen = rqstp->rq_addrlen, 185 - .msg_control = cmh, 186 - .msg_controllen = sizeof(buffer), 187 - .msg_flags = MSG_MORE, 188 - }; 189 - 190 - svc_set_cmsg_data(rqstp, cmh); 191 - 192 - if (sock_sendmsg(sock, &msg, 0) < 0) 193 - goto out; 194 - } 195 172 196 173 /* send head */ 197 174 if (slen == xdr->head[0].iov_len) 198 175 flags = 0; 199 - len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, 176 + len = kernel_sendpage(sock, headpage, headoffset, 200 177 xdr->head[0].iov_len, flags); 201 178 if (len != xdr->head[0].iov_len) 202 179 goto out; ··· 198 219 base = 0; 199 220 ppage++; 200 221 } 222 + 201 223 /* send tail */ 202 224 if (xdr->tail[0].iov_len) { 203 - result = kernel_sendpage(sock, rqstp->rq_respages[0], 204 - ((unsigned long)xdr->tail[0].iov_base) 205 - & (PAGE_SIZE-1), 206 - xdr->tail[0].iov_len, 0); 207 - 225 + result = kernel_sendpage(sock, tailpage, tailoffset, 226 + xdr->tail[0].iov_len, 0); 208 227 if (result > 0) 209 228 len += result; 210 229 } 230 + 231 + out: 232 + return len; 233 + } 234 + 235 + 236 + /* 237 + * Generic sendto routine 238 + */ 239 + static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) 240 + { 241 + struct svc_sock *svsk = 242 + container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 243 + struct socket *sock = svsk->sk_sock; 244 + union { 245 + struct cmsghdr hdr; 246 + long all[SVC_PKTINFO_SPACE / sizeof(long)]; 247 + } buffer; 248 + struct cmsghdr *cmh = &buffer.hdr; 249 + int len = 0; 250 + unsigned long tailoff; 251 + unsigned long headoff; 252 + RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 253 + 254 + if (rqstp->rq_prot == IPPROTO_UDP) { 255 + struct msghdr msg = { 256 + .msg_name = &rqstp->rq_addr, 257 + .msg_namelen = rqstp->rq_addrlen, 258 + .msg_control = cmh, 259 + .msg_controllen = sizeof(buffer), 260 + .msg_flags = MSG_MORE, 261 + }; 262 + 263 + svc_set_cmsg_data(rqstp, cmh); 264 + 265 + if (sock_sendmsg(sock, &msg, 0) < 0) 266 + goto out; 267 + } 268 + 269 + tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1); 270 + headoff = 0; 271 + len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff, 272 + rqstp->rq_respages[0], tailoff); 273 + 211 274 out: 212 275 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", 213 276 svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, ··· 453 432 } 454 433 455 434 /* 435 + * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo 436 + */ 437 + static int svc_udp_get_dest_address4(struct svc_rqst *rqstp, 438 + struct cmsghdr *cmh) 439 + { 440 + struct in_pktinfo *pki = CMSG_DATA(cmh); 441 + if (cmh->cmsg_type != IP_PKTINFO) 442 + return 0; 443 + rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; 444 + return 1; 445 + } 446 + 447 + /* 448 + * See net/ipv6/datagram.c : datagram_recv_ctl 449 + */ 450 + static int svc_udp_get_dest_address6(struct svc_rqst *rqstp, 451 + struct cmsghdr *cmh) 452 + { 453 + struct in6_pktinfo *pki = CMSG_DATA(cmh); 454 + if (cmh->cmsg_type != IPV6_PKTINFO) 455 + return 0; 456 + ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); 457 + return 1; 458 + } 459 + 460 + /* 456 461 * Copy the UDP datagram's destination address to the rqstp structure. 457 462 * The 'destination' address in this case is the address to which the 458 463 * peer sent the datagram, i.e. our local address. For multihomed 459 464 * hosts, this can change from msg to msg. Note that only the IP 460 465 * address changes, the port number should remain the same. 461 466 */ 462 - static void svc_udp_get_dest_address(struct svc_rqst *rqstp, 463 - struct cmsghdr *cmh) 467 + static int svc_udp_get_dest_address(struct svc_rqst *rqstp, 468 + struct cmsghdr *cmh) 464 469 { 465 - struct svc_sock *svsk = 466 - container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 467 - switch (svsk->sk_sk->sk_family) { 468 - case AF_INET: { 469 - struct in_pktinfo *pki = CMSG_DATA(cmh); 470 - rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; 471 - break; 472 - } 473 - case AF_INET6: { 474 - struct in6_pktinfo *pki = CMSG_DATA(cmh); 475 - ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); 476 - break; 477 - } 470 + switch (cmh->cmsg_level) { 471 + case SOL_IP: 472 + return svc_udp_get_dest_address4(rqstp, cmh); 473 + case SOL_IPV6: 474 + return svc_udp_get_dest_address6(rqstp, cmh); 478 475 } 476 + 477 + return 0; 479 478 } 480 479 481 480 /* ··· 572 531 573 532 rqstp->rq_prot = IPPROTO_UDP; 574 533 575 - if (cmh->cmsg_level != IPPROTO_IP || 576 - cmh->cmsg_type != IP_PKTINFO) { 534 + if (!svc_udp_get_dest_address(rqstp, cmh)) { 577 535 if (net_ratelimit()) 578 - printk("rpcsvc: received unknown control message:" 579 - "%d/%d\n", 580 - cmh->cmsg_level, cmh->cmsg_type); 536 + printk(KERN_WARNING 537 + "svc: received unknown control message %d/%d; " 538 + "dropping RPC reply datagram\n", 539 + cmh->cmsg_level, cmh->cmsg_type); 581 540 skb_free_datagram(svsk->sk_sk, skb); 582 541 return 0; 583 542 } 584 - svc_udp_get_dest_address(rqstp, cmh); 585 543 586 544 if (skb_is_nonlinear(skb)) { 587 545 /* we have to copy */ ··· 691 651 692 652 static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) 693 653 { 694 - int one = 1; 695 - mm_segment_t oldfs; 654 + int err, level, optname, one = 1; 696 655 697 656 svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); 698 657 clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); ··· 710 671 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 711 672 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 712 673 713 - oldfs = get_fs(); 714 - set_fs(KERNEL_DS); 715 674 /* make sure we get destination address info */ 716 - svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO, 717 - (char __user *)&one, sizeof(one)); 718 - set_fs(oldfs); 675 + switch (svsk->sk_sk->sk_family) { 676 + case AF_INET: 677 + level = SOL_IP; 678 + optname = IP_PKTINFO; 679 + break; 680 + case AF_INET6: 681 + level = SOL_IPV6; 682 + optname = IPV6_RECVPKTINFO; 683 + break; 684 + default: 685 + BUG(); 686 + } 687 + err = kernel_setsockopt(svsk->sk_sock, level, optname, 688 + (char *)&one, sizeof(one)); 689 + dprintk("svc: kernel_setsockopt returned %d\n", err); 719 690 } 720 691 721 692 /* ··· 875 826 } 876 827 877 828 /* 878 - * Receive data from a TCP socket. 829 + * Receive data. 830 + * If we haven't gotten the record length yet, get the next four bytes. 831 + * Otherwise try to gobble up as much as possible up to the complete 832 + * record length. 879 833 */ 880 - static int svc_tcp_recvfrom(struct svc_rqst *rqstp) 834 + static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) 881 835 { 882 - struct svc_sock *svsk = 883 - container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 884 836 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 885 - int len; 886 - struct kvec *vec; 887 - int pnum, vlen; 888 - 889 - dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 890 - svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), 891 - test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), 892 - test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); 837 + int len; 893 838 894 839 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) 895 840 /* sndbuf needs to have room for one request ··· 904 861 905 862 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 906 863 907 - /* Receive data. If we haven't got the record length yet, get 908 - * the next four bytes. Otherwise try to gobble up as much as 909 - * possible up to the complete record length. 910 - */ 911 864 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 912 865 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; 913 866 struct kvec iov; ··· 918 879 dprintk("svc: short recvfrom while reading record " 919 880 "length (%d of %d)\n", len, want); 920 881 svc_xprt_received(&svsk->sk_xprt); 921 - return -EAGAIN; /* record header not complete */ 882 + goto err_again; /* record header not complete */ 922 883 } 923 884 924 885 svsk->sk_reclen = ntohl(svsk->sk_reclen); ··· 933 894 "per record not supported\n"); 934 895 goto err_delete; 935 896 } 897 + 936 898 svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; 937 899 dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); 938 900 if (svsk->sk_reclen > serv->sv_max_mesg) { ··· 954 914 dprintk("svc: incomplete TCP record (%d of %d)\n", 955 915 len, svsk->sk_reclen); 956 916 svc_xprt_received(&svsk->sk_xprt); 957 - return -EAGAIN; /* record not complete */ 917 + goto err_again; /* record not complete */ 958 918 } 959 919 len = svsk->sk_reclen; 960 920 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 961 921 922 + return len; 923 + error: 924 + if (len == -EAGAIN) { 925 + dprintk("RPC: TCP recv_record got EAGAIN\n"); 926 + svc_xprt_received(&svsk->sk_xprt); 927 + } 928 + return len; 929 + err_delete: 930 + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 931 + err_again: 932 + return -EAGAIN; 933 + } 934 + 935 + static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, 936 + struct rpc_rqst **reqpp, struct kvec *vec) 937 + { 938 + struct rpc_rqst *req = NULL; 939 + u32 *p; 940 + u32 xid; 941 + u32 calldir; 942 + int len; 943 + 944 + len = svc_recvfrom(rqstp, vec, 1, 8); 945 + if (len < 0) 946 + goto error; 947 + 948 + p = (u32 *)rqstp->rq_arg.head[0].iov_base; 949 + xid = *p++; 950 + calldir = *p; 951 + 952 + if (calldir == 0) { 953 + /* REQUEST is the most common case */ 954 + vec[0] = rqstp->rq_arg.head[0]; 955 + } else { 956 + /* REPLY */ 957 + if (svsk->sk_bc_xprt) 958 + req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid); 959 + 960 + if (!req) { 961 + printk(KERN_NOTICE 962 + "%s: Got unrecognized reply: " 963 + "calldir 0x%x sk_bc_xprt %p xid %08x\n", 964 + __func__, ntohl(calldir), 965 + svsk->sk_bc_xprt, xid); 966 + vec[0] = rqstp->rq_arg.head[0]; 967 + goto out; 968 + } 969 + 970 + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 971 + sizeof(struct xdr_buf)); 972 + /* copy the xid and call direction */ 973 + memcpy(req->rq_private_buf.head[0].iov_base, 974 + rqstp->rq_arg.head[0].iov_base, 8); 975 + vec[0] = req->rq_private_buf.head[0]; 976 + } 977 + out: 978 + vec[0].iov_base += 8; 979 + vec[0].iov_len -= 8; 980 + len = svsk->sk_reclen - 8; 981 + error: 982 + *reqpp = req; 983 + return len; 984 + } 985 + 986 + /* 987 + * Receive data from a TCP socket. 988 + */ 989 + static int svc_tcp_recvfrom(struct svc_rqst *rqstp) 990 + { 991 + struct svc_sock *svsk = 992 + container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 993 + struct svc_serv *serv = svsk->sk_xprt.xpt_server; 994 + int len; 995 + struct kvec *vec; 996 + int pnum, vlen; 997 + struct rpc_rqst *req = NULL; 998 + 999 + dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 1000 + svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), 1001 + test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), 1002 + test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); 1003 + 1004 + len = svc_tcp_recv_record(svsk, rqstp); 1005 + if (len < 0) 1006 + goto error; 1007 + 962 1008 vec = rqstp->rq_vec; 963 1009 vec[0] = rqstp->rq_arg.head[0]; 964 1010 vlen = PAGE_SIZE; 1011 + 1012 + /* 1013 + * We have enough data for the whole tcp record. Let's try and read the 1014 + * first 8 bytes to get the xid and the call direction. We can use this 1015 + * to figure out if this is a call or a reply to a callback. If 1016 + * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. 1017 + * In that case, don't bother with the calldir and just read the data. 1018 + * It will be rejected in svc_process. 1019 + */ 1020 + if (len >= 8) { 1021 + len = svc_process_calldir(svsk, rqstp, &req, vec); 1022 + if (len < 0) 1023 + goto err_again; 1024 + vlen -= 8; 1025 + } 1026 + 965 1027 pnum = 1; 966 1028 while (vlen < len) { 967 - vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]); 1029 + vec[pnum].iov_base = (req) ? 1030 + page_address(req->rq_private_buf.pages[pnum - 1]) : 1031 + page_address(rqstp->rq_pages[pnum]); 968 1032 vec[pnum].iov_len = PAGE_SIZE; 969 1033 pnum++; 970 1034 vlen += PAGE_SIZE; ··· 1078 934 /* Now receive data */ 1079 935 len = svc_recvfrom(rqstp, vec, pnum, len); 1080 936 if (len < 0) 1081 - goto error; 937 + goto err_again; 1082 938 939 + /* 940 + * Account for the 8 bytes we read earlier 941 + */ 942 + len += 8; 943 + 944 + if (req) { 945 + xprt_complete_rqst(req->rq_task, len); 946 + len = 0; 947 + goto out; 948 + } 1083 949 dprintk("svc: TCP complete record (%d bytes)\n", len); 1084 950 rqstp->rq_arg.len = len; 1085 951 rqstp->rq_arg.page_base = 0; ··· 1103 949 rqstp->rq_xprt_ctxt = NULL; 1104 950 rqstp->rq_prot = IPPROTO_TCP; 1105 951 952 + out: 1106 953 /* Reset TCP read info */ 1107 954 svsk->sk_reclen = 0; 1108 955 svsk->sk_tcplen = 0; ··· 1115 960 1116 961 return len; 1117 962 1118 - err_delete: 1119 - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1120 - return -EAGAIN; 1121 - 1122 - error: 963 + err_again: 1123 964 if (len == -EAGAIN) { 1124 965 dprintk("RPC: TCP recvfrom got EAGAIN\n"); 1125 966 svc_xprt_received(&svsk->sk_xprt); 1126 - } else { 967 + return len; 968 + } 969 + error: 970 + if (len != -EAGAIN) { 1127 971 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1128 972 svsk->sk_xprt.xpt_server->sv_name, -len); 1129 - goto err_delete; 973 + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1130 974 } 1131 - 1132 - return len; 975 + return -EAGAIN; 1133 976 } 1134 977 1135 978 /*
+11 -4
net/sunrpc/xprt.c
··· 832 832 spin_unlock_bh(&xprt->transport_lock); 833 833 } 834 834 835 + static inline int xprt_has_timer(struct rpc_xprt *xprt) 836 + { 837 + return xprt->idle_timeout != 0; 838 + } 839 + 835 840 /** 836 841 * xprt_prepare_transmit - reserve the transport before sending a request 837 842 * @task: RPC task about to send a request ··· 1018 1013 if (!list_empty(&req->rq_list)) 1019 1014 list_del(&req->rq_list); 1020 1015 xprt->last_used = jiffies; 1021 - if (list_empty(&xprt->recv)) 1016 + if (list_empty(&xprt->recv) && xprt_has_timer(xprt)) 1022 1017 mod_timer(&xprt->timer, 1023 1018 xprt->last_used + xprt->idle_timeout); 1024 1019 spin_unlock_bh(&xprt->transport_lock); ··· 1087 1082 #endif /* CONFIG_NFS_V4_1 */ 1088 1083 1089 1084 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1090 - setup_timer(&xprt->timer, xprt_init_autodisconnect, 1091 - (unsigned long)xprt); 1085 + if (xprt_has_timer(xprt)) 1086 + setup_timer(&xprt->timer, xprt_init_autodisconnect, 1087 + (unsigned long)xprt); 1088 + else 1089 + init_timer(&xprt->timer); 1092 1090 xprt->last_used = jiffies; 1093 1091 xprt->cwnd = RPC_INITCWND; 1094 1092 xprt->bind_index = 0; ··· 1110 1102 1111 1103 dprintk("RPC: created transport %p with %u slots\n", xprt, 1112 1104 xprt->max_reqs); 1113 - 1114 1105 return xprt; 1115 1106 } 1116 1107
+2 -2
net/sunrpc/xprtrdma/svc_rdma_transport.c
··· 730 730 goto err; 731 731 732 732 mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); 733 - if (!mr) 733 + if (IS_ERR(mr)) 734 734 goto err_free_frmr; 735 735 736 736 pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, 737 737 RPCSVC_MAXPAGES); 738 - if (!pl) 738 + if (IS_ERR(pl)) 739 739 goto err_free_mr; 740 740 741 741 frmr->mr = mr;
+240 -2
net/sunrpc/xprtsock.c
··· 32 32 #include <linux/tcp.h> 33 33 #include <linux/sunrpc/clnt.h> 34 34 #include <linux/sunrpc/sched.h> 35 + #include <linux/sunrpc/svcsock.h> 35 36 #include <linux/sunrpc/xprtsock.h> 36 37 #include <linux/file.h> 37 38 #ifdef CONFIG_NFS_V4_1 ··· 44 43 #include <net/udp.h> 45 44 #include <net/tcp.h> 46 45 46 + #include "sunrpc.h" 47 47 /* 48 48 * xprtsock tunables 49 49 */ ··· 2100 2098 xprt->stat.bklog_u); 2101 2099 } 2102 2100 2101 + /* 2102 + * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason 2103 + * we allocate pages instead doing a kmalloc like rpc_malloc is because we want 2104 + * to use the server side send routines. 2105 + */ 2106 + void *bc_malloc(struct rpc_task *task, size_t size) 2107 + { 2108 + struct page *page; 2109 + struct rpc_buffer *buf; 2110 + 2111 + BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer)); 2112 + page = alloc_page(GFP_KERNEL); 2113 + 2114 + if (!page) 2115 + return NULL; 2116 + 2117 + buf = page_address(page); 2118 + buf->len = PAGE_SIZE; 2119 + 2120 + return buf->data; 2121 + } 2122 + 2123 + /* 2124 + * Free the space allocated in the bc_alloc routine 2125 + */ 2126 + void bc_free(void *buffer) 2127 + { 2128 + struct rpc_buffer *buf; 2129 + 2130 + if (!buffer) 2131 + return; 2132 + 2133 + buf = container_of(buffer, struct rpc_buffer, data); 2134 + free_page((unsigned long)buf); 2135 + } 2136 + 2137 + /* 2138 + * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex 2139 + * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request. 2140 + */ 2141 + static int bc_sendto(struct rpc_rqst *req) 2142 + { 2143 + int len; 2144 + struct xdr_buf *xbufp = &req->rq_snd_buf; 2145 + struct rpc_xprt *xprt = req->rq_xprt; 2146 + struct sock_xprt *transport = 2147 + container_of(xprt, struct sock_xprt, xprt); 2148 + struct socket *sock = transport->sock; 2149 + unsigned long headoff; 2150 + unsigned long tailoff; 2151 + 2152 + /* 2153 + * Set up the rpc header and record marker stuff 2154 + */ 2155 + xs_encode_tcp_record_marker(xbufp); 2156 + 2157 + tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; 2158 + headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; 2159 + len = svc_send_common(sock, xbufp, 2160 + virt_to_page(xbufp->head[0].iov_base), headoff, 2161 + xbufp->tail[0].iov_base, tailoff); 2162 + 2163 + if (len != xbufp->len) { 2164 + printk(KERN_NOTICE "Error sending entire callback!\n"); 2165 + len = -EAGAIN; 2166 + } 2167 + 2168 + return len; 2169 + } 2170 + 2171 + /* 2172 + * The send routine. Borrows from svc_send 2173 + */ 2174 + static int bc_send_request(struct rpc_task *task) 2175 + { 2176 + struct rpc_rqst *req = task->tk_rqstp; 2177 + struct svc_xprt *xprt; 2178 + struct svc_sock *svsk; 2179 + u32 len; 2180 + 2181 + dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid)); 2182 + /* 2183 + * Get the server socket associated with this callback xprt 2184 + */ 2185 + xprt = req->rq_xprt->bc_xprt; 2186 + svsk = container_of(xprt, struct svc_sock, sk_xprt); 2187 + 2188 + /* 2189 + * Grab the mutex to serialize data as the connection is shared 2190 + * with the fore channel 2191 + */ 2192 + if (!mutex_trylock(&xprt->xpt_mutex)) { 2193 + rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL); 2194 + if (!mutex_trylock(&xprt->xpt_mutex)) 2195 + return -EAGAIN; 2196 + rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task); 2197 + } 2198 + if (test_bit(XPT_DEAD, &xprt->xpt_flags)) 2199 + len = -ENOTCONN; 2200 + else 2201 + len = bc_sendto(req); 2202 + mutex_unlock(&xprt->xpt_mutex); 2203 + 2204 + if (len > 0) 2205 + len = 0; 2206 + 2207 + return len; 2208 + } 2209 + 2210 + /* 2211 + * The close routine. Since this is client initiated, we do nothing 2212 + */ 2213 + 2214 + static void bc_close(struct rpc_xprt *xprt) 2215 + { 2216 + return; 2217 + } 2218 + 2219 + /* 2220 + * The xprt destroy routine. Again, because this connection is client 2221 + * initiated, we do nothing 2222 + */ 2223 + 2224 + static void bc_destroy(struct rpc_xprt *xprt) 2225 + { 2226 + return; 2227 + } 2228 + 2103 2229 static struct rpc_xprt_ops xs_udp_ops = { 2104 2230 .set_buffer_size = xs_udp_set_buffer_size, 2105 2231 .reserve_xprt = xprt_reserve_xprt_cong, ··· 2261 2131 #endif /* CONFIG_NFS_V4_1 */ 2262 2132 .close = xs_tcp_close, 2263 2133 .destroy = xs_destroy, 2134 + .print_stats = xs_tcp_print_stats, 2135 + }; 2136 + 2137 + /* 2138 + * The rpc_xprt_ops for the server backchannel 2139 + */ 2140 + 2141 + static struct rpc_xprt_ops bc_tcp_ops = { 2142 + .reserve_xprt = xprt_reserve_xprt, 2143 + .release_xprt = xprt_release_xprt, 2144 + .buf_alloc = bc_malloc, 2145 + .buf_free = bc_free, 2146 + .send_request = bc_send_request, 2147 + .set_retrans_timeout = xprt_set_retrans_timeout_def, 2148 + .close = bc_close, 2149 + .destroy = bc_destroy, 2264 2150 .print_stats = xs_tcp_print_stats, 2265 2151 }; 2266 2152 ··· 2468 2322 return ERR_PTR(-EINVAL); 2469 2323 } 2470 2324 2325 + /** 2326 + * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket 2327 + * @args: rpc transport creation arguments 2328 + * 2329 + */ 2330 + static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) 2331 + { 2332 + struct sockaddr *addr = args->dstaddr; 2333 + struct rpc_xprt *xprt; 2334 + struct sock_xprt *transport; 2335 + struct svc_sock *bc_sock; 2336 + 2337 + if (!args->bc_xprt) 2338 + ERR_PTR(-EINVAL); 2339 + 2340 + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2341 + if (IS_ERR(xprt)) 2342 + return xprt; 2343 + transport = container_of(xprt, struct sock_xprt, xprt); 2344 + 2345 + xprt->prot = IPPROTO_TCP; 2346 + xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); 2347 + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 2348 + xprt->timeout = &xs_tcp_default_timeout; 2349 + 2350 + /* backchannel */ 2351 + xprt_set_bound(xprt); 2352 + xprt->bind_timeout = 0; 2353 + xprt->connect_timeout = 0; 2354 + xprt->reestablish_timeout = 0; 2355 + xprt->idle_timeout = 0; 2356 + 2357 + /* 2358 + * The backchannel uses the same socket connection as the 2359 + * forechannel 2360 + */ 2361 + xprt->bc_xprt = args->bc_xprt; 2362 + bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); 2363 + bc_sock->sk_bc_xprt = xprt; 2364 + transport->sock = bc_sock->sk_sock; 2365 + transport->inet = bc_sock->sk_sk; 2366 + 2367 + xprt->ops = &bc_tcp_ops; 2368 + 2369 + switch (addr->sa_family) { 2370 + case AF_INET: 2371 + xs_format_peer_addresses(xprt, "tcp", 2372 + RPCBIND_NETID_TCP); 2373 + break; 2374 + case AF_INET6: 2375 + xs_format_peer_addresses(xprt, "tcp", 2376 + RPCBIND_NETID_TCP6); 2377 + break; 2378 + default: 2379 + kfree(xprt); 2380 + return ERR_PTR(-EAFNOSUPPORT); 2381 + } 2382 + 2383 + if (xprt_bound(xprt)) 2384 + dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2385 + xprt->address_strings[RPC_DISPLAY_ADDR], 2386 + xprt->address_strings[RPC_DISPLAY_PORT], 2387 + xprt->address_strings[RPC_DISPLAY_PROTO]); 2388 + else 2389 + dprintk("RPC: set up xprt to %s (autobind) via %s\n", 2390 + xprt->address_strings[RPC_DISPLAY_ADDR], 2391 + xprt->address_strings[RPC_DISPLAY_PROTO]); 2392 + 2393 + /* 2394 + * Since we don't want connections for the backchannel, we set 2395 + * the xprt status to connected 2396 + */ 2397 + xprt_set_connected(xprt); 2398 + 2399 + 2400 + if (try_module_get(THIS_MODULE)) 2401 + return xprt; 2402 + kfree(xprt->slot); 2403 + kfree(xprt); 2404 + return ERR_PTR(-EINVAL); 2405 + } 2406 + 2471 2407 static struct xprt_class xs_udp_transport = { 2472 2408 .list = LIST_HEAD_INIT(xs_udp_transport.list), 2473 2409 .name = "udp", 2474 2410 .owner = THIS_MODULE, 2475 - .ident = IPPROTO_UDP, 2411 + .ident = XPRT_TRANSPORT_UDP, 2476 2412 .setup = xs_setup_udp, 2477 2413 }; 2478 2414 ··· 2562 2334 .list = LIST_HEAD_INIT(xs_tcp_transport.list), 2563 2335 .name = "tcp", 2564 2336 .owner = THIS_MODULE, 2565 - .ident = IPPROTO_TCP, 2337 + .ident = XPRT_TRANSPORT_TCP, 2566 2338 .setup = xs_setup_tcp, 2339 + }; 2340 + 2341 + static struct xprt_class xs_bc_tcp_transport = { 2342 + .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list), 2343 + .name = "tcp NFSv4.1 backchannel", 2344 + .owner = THIS_MODULE, 2345 + .ident = XPRT_TRANSPORT_BC_TCP, 2346 + .setup = xs_setup_bc_tcp, 2567 2347 }; 2568 2348 2569 2349 /** ··· 2587 2351 2588 2352 xprt_register_transport(&xs_udp_transport); 2589 2353 xprt_register_transport(&xs_tcp_transport); 2354 + xprt_register_transport(&xs_bc_tcp_transport); 2590 2355 2591 2356 return 0; 2592 2357 } ··· 2607 2370 2608 2371 xprt_unregister_transport(&xs_udp_transport); 2609 2372 xprt_unregister_transport(&xs_tcp_transport); 2373 + xprt_unregister_transport(&xs_bc_tcp_transport); 2610 2374 } 2611 2375 2612 2376 static int param_set_uint_minmax(const char *val, struct kernel_param *kp,