Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfsd-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux

Pull nfsd updates from Chuck Lever:
"The big ticket item for this release is that support for RPC-with-TLS
[RFC 9289] has been added to the Linux NFS server.

The goal is to provide a simple-to-deploy, low-overhead in-transit
confidentiality and peer authentication mechanism. It can supplement
NFS Kerberos and it can protect the use of legacy non-cryptographic
user authentication flavors such as AUTH_SYS. The TLS Record protocol
is handled entirely by kTLS, meaning it can use either software
encryption or offload encryption to smart NICs.

Aside from that, work continues on improving NFSD's open file cache.
Among the many clean-ups in that area is a patch to convert the
rhashtable to use the list-hashing version of that data structure"

* tag 'nfsd-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (31 commits)
NFSD: Handle new xprtsec= export option
SUNRPC: Support TLS handshake in the server-side TCP socket code
NFSD: Clean up xattr memory allocation flags
NFSD: Fix problem of COMMIT and NFS4ERR_DELAY in infinite loop
SUNRPC: Clear rq_xid when receiving a new RPC Call
SUNRPC: Recognize control messages in server-side TCP socket code
SUNRPC: Be even lazier about releasing pages
SUNRPC: Convert svc_xprt_release() to the release_pages() API
SUNRPC: Relocate svc_free_res_pages()
nfsd: simplify the delayed disposal list code
SUNRPC: Ignore return value of ->xpo_sendto
SUNRPC: Ensure server-side sockets have a sock->file
NFSD: Watch for rq_pages bounds checking errors in nfsd_splice_actor()
sunrpc: simplify two-level sysctl registration for svcrdma_parm_table
SUNRPC: return proper error from get_expiry()
lockd: add some client-side tracepoints
nfs: move nfs_fhandle_hash to common include file
lockd: server should unlock lock if client rejects the grant
lockd: fix races in client GRANTED_MSG wait logic
lockd: move struct nlm_wait to lockd.h
...

+802 -442
+4 -2
fs/lockd/Makefile
··· 3 3 # Makefile for the linux lock manager stuff 4 4 # 5 5 6 + ccflags-y += -I$(src) # needed for trace events 7 + 6 8 obj-$(CONFIG_LOCKD) += lockd.o 7 9 8 - lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ 9 - svcshare.o svcproc.o svcsubs.o mon.o xdr.o 10 + lockd-objs-y += clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ 11 + svcshare.o svcproc.o svcsubs.o mon.o trace.o xdr.o 10 12 lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o 11 13 lockd-objs-$(CONFIG_PROC_FS) += procfs.o 12 14 lockd-objs := $(lockd-objs-y)
+25 -33
fs/lockd/clntlock.c
··· 14 14 #include <linux/nfs_fs.h> 15 15 #include <linux/sunrpc/addr.h> 16 16 #include <linux/sunrpc/svc.h> 17 + #include <linux/sunrpc/svc_xprt.h> 17 18 #include <linux/lockd/lockd.h> 18 19 #include <linux/kthread.h> 20 + 21 + #include "trace.h" 19 22 20 23 #define NLMDBG_FACILITY NLMDBG_CLIENT 21 24 ··· 31 28 * The following functions handle blocking and granting from the 32 29 * client perspective. 33 30 */ 34 - 35 - /* 36 - * This is the representation of a blocked client lock. 37 - */ 38 - struct nlm_wait { 39 - struct list_head b_list; /* linked list */ 40 - wait_queue_head_t b_wait; /* where to wait on */ 41 - struct nlm_host * b_host; 42 - struct file_lock * b_lock; /* local file lock */ 43 - unsigned short b_reclaim; /* got to reclaim lock */ 44 - __be32 b_status; /* grant callback status */ 45 - }; 46 31 47 32 static LIST_HEAD(nlm_blocked); 48 33 static DEFINE_SPINLOCK(nlm_blocked_lock); ··· 85 94 } 86 95 EXPORT_SYMBOL_GPL(nlmclnt_done); 87 96 97 + void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct file_lock *fl) 98 + { 99 + block->b_host = host; 100 + block->b_lock = fl; 101 + init_waitqueue_head(&block->b_wait); 102 + block->b_status = nlm_lck_blocked; 103 + } 104 + 88 105 /* 89 106 * Queue up a lock for blocking so that the GRANTED request can see it 90 107 */ 91 - struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl) 108 + void nlmclnt_queue_block(struct nlm_wait *block) 92 109 { 93 - struct nlm_wait *block; 94 - 95 - block = kmalloc(sizeof(*block), GFP_KERNEL); 96 - if (block != NULL) { 97 - block->b_host = host; 98 - block->b_lock = fl; 99 - init_waitqueue_head(&block->b_wait); 100 - block->b_status = nlm_lck_blocked; 101 - 102 - spin_lock(&nlm_blocked_lock); 103 - list_add(&block->b_list, &nlm_blocked); 104 - spin_unlock(&nlm_blocked_lock); 105 - } 106 - return block; 110 + spin_lock(&nlm_blocked_lock); 111 + list_add(&block->b_list, &nlm_blocked); 112 + spin_unlock(&nlm_blocked_lock); 107 113 } 108 114 109 - void nlmclnt_finish_block(struct nlm_wait *block) 115 + /* 116 + * Dequeue the block and return its final status 117 + */ 118 + __be32 nlmclnt_dequeue_block(struct nlm_wait *block) 110 119 { 111 - if (block == NULL) 112 - return; 120 + __be32 status; 121 + 113 122 spin_lock(&nlm_blocked_lock); 114 123 list_del(&block->b_list); 124 + status = block->b_status; 115 125 spin_unlock(&nlm_blocked_lock); 116 - kfree(block); 126 + return status; 117 127 } 118 128 119 129 /* 120 130 * Block on a lock 121 131 */ 122 - int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) 132 + int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout) 123 133 { 124 134 long ret; 125 135 ··· 146 154 /* Reset the lock status after a server reboot so we resend */ 147 155 if (block->b_status == nlm_lck_denied_grace_period) 148 156 block->b_status = nlm_lck_blocked; 149 - req->a_res.status = block->b_status; 150 157 return 0; 151 158 } 152 159 ··· 189 198 res = nlm_granted; 190 199 } 191 200 spin_unlock(&nlm_blocked_lock); 201 + trace_nlmclnt_grant(lock, addr, svc_addr_len(addr), res); 192 202 return res; 193 203 } 194 204
+32 -10
fs/lockd/clntproc.c
··· 20 20 #include <linux/sunrpc/svc.h> 21 21 #include <linux/lockd/lockd.h> 22 22 23 + #include "trace.h" 24 + 23 25 #define NLMDBG_FACILITY NLMDBG_CLIENT 24 26 #define NLMCLNT_GRACE_WAIT (5*HZ) 25 27 #define NLMCLNT_POLL_TIMEOUT (30*HZ) ··· 453 451 status = nlm_stat_to_errno(req->a_res.status); 454 452 } 455 453 out: 454 + trace_nlmclnt_test(&req->a_args.lock, 455 + (const struct sockaddr *)&req->a_host->h_addr, 456 + req->a_host->h_addrlen, req->a_res.status); 456 457 nlmclnt_release_call(req); 457 458 return status; 458 459 } ··· 521 516 const struct cred *cred = nfs_file_cred(fl->fl_file); 522 517 struct nlm_host *host = req->a_host; 523 518 struct nlm_res *resp = &req->a_res; 524 - struct nlm_wait *block = NULL; 519 + struct nlm_wait block; 525 520 unsigned char fl_flags = fl->fl_flags; 526 521 unsigned char fl_type; 522 + __be32 b_status; 527 523 int status = -ENOLCK; 528 524 529 525 if (nsm_monitor(host) < 0) ··· 537 531 if (status < 0) 538 532 goto out; 539 533 540 - block = nlmclnt_prepare_block(host, fl); 534 + nlmclnt_prepare_block(&block, host, fl); 541 535 again: 542 536 /* 543 537 * Initialise resp->status to a valid non-zero value, 544 538 * since 0 == nlm_lck_granted 545 539 */ 546 540 resp->status = nlm_lck_blocked; 547 - for(;;) { 541 + 542 + /* 543 + * A GRANTED callback can come at any time -- even before the reply 544 + * to the LOCK request arrives, so we queue the wait before 545 + * requesting the lock. 546 + */ 547 + nlmclnt_queue_block(&block); 548 + for (;;) { 548 549 /* Reboot protection */ 549 550 fl->fl_u.nfs_fl.state = host->h_state; 550 551 status = nlmclnt_call(cred, req, NLMPROC_LOCK); 551 552 if (status < 0) 552 553 break; 553 554 /* Did a reclaimer thread notify us of a server reboot? */ 554 - if (resp->status == nlm_lck_denied_grace_period) 555 + if (resp->status == nlm_lck_denied_grace_period) 555 556 continue; 556 557 if (resp->status != nlm_lck_blocked) 557 558 break; 558 559 /* Wait on an NLM blocking lock */ 559 - status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); 560 + status = nlmclnt_wait(&block, req, NLMCLNT_POLL_TIMEOUT); 560 561 if (status < 0) 561 562 break; 562 - if (resp->status != nlm_lck_blocked) 563 + if (block.b_status != nlm_lck_blocked) 563 564 break; 564 565 } 566 + b_status = nlmclnt_dequeue_block(&block); 567 + if (resp->status == nlm_lck_blocked) 568 + resp->status = b_status; 565 569 566 570 /* if we were interrupted while blocking, then cancel the lock request 567 571 * and exit ··· 580 564 if (!req->a_args.block) 581 565 goto out_unlock; 582 566 if (nlmclnt_cancel(host, req->a_args.block, fl) == 0) 583 - goto out_unblock; 567 + goto out; 584 568 } 585 569 586 570 if (resp->status == nlm_granted) { ··· 609 593 status = -ENOLCK; 610 594 else 611 595 status = nlm_stat_to_errno(resp->status); 612 - out_unblock: 613 - nlmclnt_finish_block(block); 614 596 out: 597 + trace_nlmclnt_lock(&req->a_args.lock, 598 + (const struct sockaddr *)&req->a_host->h_addr, 599 + req->a_host->h_addrlen, req->a_res.status); 615 600 nlmclnt_release_call(req); 616 601 return status; 617 602 out_unlock: 618 603 /* Fatal error: ensure that we remove the lock altogether */ 604 + trace_nlmclnt_lock(&req->a_args.lock, 605 + (const struct sockaddr *)&req->a_host->h_addr, 606 + req->a_host->h_addrlen, req->a_res.status); 619 607 dprintk("lockd: lock attempt ended in fatal error.\n" 620 608 " Attempting to unlock.\n"); 621 - nlmclnt_finish_block(block); 622 609 fl_type = fl->fl_type; 623 610 fl->fl_type = F_UNLCK; 624 611 down_read(&host->h_rwsem); ··· 715 696 /* What to do now? I'm out of my depth... */ 716 697 status = -ENOLCK; 717 698 out: 699 + trace_nlmclnt_unlock(&req->a_args.lock, 700 + (const struct sockaddr *)&req->a_host->h_addr, 701 + req->a_host->h_addrlen, req->a_res.status); 718 702 nlmclnt_release_call(req); 719 703 return status; 720 704 }
+1
fs/lockd/host.c
··· 629 629 rpc_shutdown_client(host->h_rpcclnt); 630 630 host->h_rpcclnt = NULL; 631 631 } 632 + nlmsvc_free_host_resources(host); 632 633 } 633 634 634 635 /* Then, perform a garbage collection pass */
+17 -4
fs/lockd/svclock.c
··· 954 954 nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status) 955 955 { 956 956 struct nlm_block *block; 957 + struct file_lock *fl; 958 + int error; 957 959 958 960 dprintk("grant_reply: looking for cookie %x, s=%d \n", 959 961 *(unsigned int *)(cookie->data), status); 960 962 if (!(block = nlmsvc_find_block(cookie))) 961 963 return; 962 964 963 - if (status == nlm_lck_denied_grace_period) { 965 + switch (status) { 966 + case nlm_lck_denied_grace_period: 964 967 /* Try again in a couple of seconds */ 965 968 nlmsvc_insert_block(block, 10 * HZ); 966 - } else { 969 + break; 970 + case nlm_lck_denied: 971 + /* Client doesn't want it, just unlock it */ 972 + nlmsvc_unlink_block(block); 973 + fl = &block->b_call->a_args.lock.fl; 974 + fl->fl_type = F_UNLCK; 975 + error = vfs_lock_file(fl->fl_file, F_SETLK, fl, NULL); 976 + if (error) 977 + pr_warn("lockd: unable to unlock lock rejected by client!\n"); 978 + break; 979 + default: 967 980 /* 968 - * Lock is now held by client, or has been rejected. 969 - * In both cases, the block should be removed. 981 + * Either it was accepted or the status makes no sense 982 + * just unlink it either way. 970 983 */ 971 984 nlmsvc_unlink_block(block); 972 985 }
+3
fs/lockd/trace.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #define CREATE_TRACE_POINTS 3 + #include "trace.h"
+106
fs/lockd/trace.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #undef TRACE_SYSTEM 3 + #define TRACE_SYSTEM lockd 4 + 5 + #if !defined(_TRACE_LOCKD_H) || defined(TRACE_HEADER_MULTI_READ) 6 + #define _TRACE_LOCKD_H 7 + 8 + #include <linux/tracepoint.h> 9 + #include <linux/crc32.h> 10 + #include <linux/nfs.h> 11 + #include <linux/lockd/lockd.h> 12 + 13 + #ifdef CONFIG_LOCKD_V4 14 + #define NLM_STATUS_LIST \ 15 + nlm_status_code(LCK_GRANTED) \ 16 + nlm_status_code(LCK_DENIED) \ 17 + nlm_status_code(LCK_DENIED_NOLOCKS) \ 18 + nlm_status_code(LCK_BLOCKED) \ 19 + nlm_status_code(LCK_DENIED_GRACE_PERIOD) \ 20 + nlm_status_code(DEADLCK) \ 21 + nlm_status_code(ROFS) \ 22 + nlm_status_code(STALE_FH) \ 23 + nlm_status_code(FBIG) \ 24 + nlm_status_code_end(FAILED) 25 + #else 26 + #define NLM_STATUS_LIST \ 27 + nlm_status_code(LCK_GRANTED) \ 28 + nlm_status_code(LCK_DENIED) \ 29 + nlm_status_code(LCK_DENIED_NOLOCKS) \ 30 + nlm_status_code(LCK_BLOCKED) \ 31 + nlm_status_code_end(LCK_DENIED_GRACE_PERIOD) 32 + #endif 33 + 34 + #undef nlm_status_code 35 + #undef nlm_status_code_end 36 + #define nlm_status_code(x) TRACE_DEFINE_ENUM(NLM_##x); 37 + #define nlm_status_code_end(x) TRACE_DEFINE_ENUM(NLM_##x); 38 + 39 + NLM_STATUS_LIST 40 + 41 + #undef nlm_status_code 42 + #undef nlm_status_code_end 43 + #define nlm_status_code(x) { NLM_##x, #x }, 44 + #define nlm_status_code_end(x) { NLM_##x, #x } 45 + 46 + #define show_nlm_status(x) __print_symbolic(x, NLM_STATUS_LIST) 47 + 48 + DECLARE_EVENT_CLASS(nlmclnt_lock_event, 49 + TP_PROTO( 50 + const struct nlm_lock *lock, 51 + const struct sockaddr *addr, 52 + unsigned int addrlen, 53 + __be32 status 54 + ), 55 + 56 + TP_ARGS(lock, addr, addrlen, status), 57 + 58 + TP_STRUCT__entry( 59 + __field(u32, oh) 60 + __field(u32, svid) 61 + __field(u32, fh) 62 + __field(unsigned long, status) 63 + __field(u64, start) 64 + __field(u64, len) 65 + __sockaddr(addr, addrlen) 66 + ), 67 + 68 + TP_fast_assign( 69 + __entry->oh = ~crc32_le(0xffffffff, lock->oh.data, lock->oh.len); 70 + __entry->svid = lock->svid; 71 + __entry->fh = nfs_fhandle_hash(&lock->fh); 72 + __entry->start = lock->lock_start; 73 + __entry->len = lock->lock_len; 74 + __entry->status = be32_to_cpu(status); 75 + __assign_sockaddr(addr, addr, addrlen); 76 + ), 77 + 78 + TP_printk( 79 + "addr=%pISpc oh=0x%08x svid=0x%08x fh=0x%08x start=%llu len=%llu status=%s", 80 + __get_sockaddr(addr), __entry->oh, __entry->svid, 81 + __entry->fh, __entry->start, __entry->len, 82 + show_nlm_status(__entry->status) 83 + ) 84 + ); 85 + 86 + #define DEFINE_NLMCLNT_EVENT(name) \ 87 + DEFINE_EVENT(nlmclnt_lock_event, name, \ 88 + TP_PROTO( \ 89 + const struct nlm_lock *lock, \ 90 + const struct sockaddr *addr, \ 91 + unsigned int addrlen, \ 92 + __be32 status \ 93 + ), \ 94 + TP_ARGS(lock, addr, addrlen, status)) 95 + 96 + DEFINE_NLMCLNT_EVENT(nlmclnt_test); 97 + DEFINE_NLMCLNT_EVENT(nlmclnt_lock); 98 + DEFINE_NLMCLNT_EVENT(nlmclnt_unlock); 99 + DEFINE_NLMCLNT_EVENT(nlmclnt_grant); 100 + 101 + #endif /* _TRACE_LOCKD_H */ 102 + 103 + #undef TRACE_INCLUDE_PATH 104 + #define TRACE_INCLUDE_PATH . 105 + #define TRACE_INCLUDE_FILE trace 106 + #include <trace/define_trace.h>
+6 -3
fs/nfs/export.c
··· 149 149 .encode_fh = nfs_encode_fh, 150 150 .fh_to_dentry = nfs_fh_to_dentry, 151 151 .get_parent = nfs_get_parent, 152 - .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| 153 - EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| 154 - EXPORT_OP_NOATOMIC_ATTR, 152 + .flags = EXPORT_OP_NOWCC | 153 + EXPORT_OP_NOSUBTREECHK | 154 + EXPORT_OP_CLOSE_BEFORE_UNLINK | 155 + EXPORT_OP_REMOTE_FS | 156 + EXPORT_OP_NOATOMIC_ATTR | 157 + EXPORT_OP_FLUSH_ON_CLOSE, 155 158 };
-15
fs/nfs/internal.h
··· 855 855 } 856 856 857 857 #ifdef CONFIG_CRC32 858 - /** 859 - * nfs_fhandle_hash - calculate the crc32 hash for the filehandle 860 - * @fh - pointer to filehandle 861 - * 862 - * returns a crc32 hash for the filehandle that is compatible with 863 - * the one displayed by "wireshark". 864 - */ 865 - static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) 866 - { 867 - return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); 868 - } 869 858 static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid) 870 859 { 871 860 return ~crc32_le(0xFFFFFFFF, &stateid->other[0], 872 861 NFS4_STATEID_OTHER_SIZE); 873 862 } 874 863 #else 875 - static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) 876 - { 877 - return 0; 878 - } 879 864 static inline u32 nfs_stateid_hash(nfs4_stateid *stateid) 880 865 { 881 866 return 0;
+54 -10
fs/nfsd/export.c
··· 123 123 124 124 /* OK, we seem to have a valid key */ 125 125 key.h.flags = 0; 126 - key.h.expiry_time = get_expiry(&mesg); 127 - if (key.h.expiry_time == 0) 126 + err = get_expiry(&mesg, &key.h.expiry_time); 127 + if (err) 128 128 goto out; 129 129 130 - key.ek_client = dom; 130 + key.ek_client = dom; 131 131 key.ek_fsidtype = fsidtype; 132 132 memcpy(key.ek_fsid, buf, len); 133 133 ··· 439 439 return -EINVAL; 440 440 } 441 441 return 0; 442 - 443 442 } 444 443 445 444 #ifdef CONFIG_NFSD_V4 ··· 545 546 secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } 546 547 #endif 547 548 549 + static int xprtsec_parse(char **mesg, char *buf, struct svc_export *exp) 550 + { 551 + unsigned int i, mode, listsize; 552 + int err; 553 + 554 + err = get_uint(mesg, &listsize); 555 + if (err) 556 + return err; 557 + if (listsize > NFSEXP_XPRTSEC_NUM) 558 + return -EINVAL; 559 + 560 + exp->ex_xprtsec_modes = 0; 561 + for (i = 0; i < listsize; i++) { 562 + err = get_uint(mesg, &mode); 563 + if (err) 564 + return err; 565 + if (mode > NFSEXP_XPRTSEC_MTLS) 566 + return -EINVAL; 567 + exp->ex_xprtsec_modes |= mode; 568 + } 569 + return 0; 570 + } 571 + 548 572 static inline int 549 573 nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid) 550 574 { ··· 630 608 exp.ex_client = dom; 631 609 exp.cd = cd; 632 610 exp.ex_devid_map = NULL; 611 + exp.ex_xprtsec_modes = NFSEXP_XPRTSEC_ALL; 633 612 634 613 /* expiry */ 635 - err = -EINVAL; 636 - exp.h.expiry_time = get_expiry(&mesg); 637 - if (exp.h.expiry_time == 0) 614 + err = get_expiry(&mesg, &exp.h.expiry_time); 615 + if (err) 638 616 goto out3; 639 617 640 618 /* flags */ ··· 646 624 if (err || an_int < 0) 647 625 goto out3; 648 626 exp.ex_flags= an_int; 649 - 627 + 650 628 /* anon uid */ 651 629 err = get_int(&mesg, &an_int); 652 630 if (err) ··· 672 650 err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid); 673 651 else if (strcmp(buf, "secinfo") == 0) 674 652 err = secinfo_parse(&mesg, buf, &exp); 653 + else if (strcmp(buf, "xprtsec") == 0) 654 + err = xprtsec_parse(&mesg, buf, &exp); 675 655 else 676 656 /* quietly ignore unknown words and anything 677 657 * following. Newer user-space can try to set ··· 687 663 err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid); 688 664 if (err) 689 665 goto out4; 666 + 690 667 /* 691 668 * No point caching this if it would immediately expire. 692 669 * Also, this protects exportfs's dummy export from the ··· 849 824 for (i = 0; i < MAX_SECINFO_LIST; i++) { 850 825 new->ex_flavors[i] = item->ex_flavors[i]; 851 826 } 827 + new->ex_xprtsec_modes = item->ex_xprtsec_modes; 852 828 } 853 829 854 830 static struct cache_head *svc_export_alloc(void) ··· 1061 1035 1062 1036 __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) 1063 1037 { 1064 - struct exp_flavor_info *f; 1065 - struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; 1038 + struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors; 1039 + struct svc_xprt *xprt = rqstp->rq_xprt; 1066 1040 1041 + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) { 1042 + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) 1043 + goto ok; 1044 + } 1045 + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) { 1046 + if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) && 1047 + !test_bit(XPT_PEER_AUTH, &xprt->xpt_flags)) 1048 + goto ok; 1049 + } 1050 + if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) { 1051 + if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) && 1052 + test_bit(XPT_PEER_AUTH, &xprt->xpt_flags)) 1053 + goto ok; 1054 + } 1055 + goto denied; 1056 + 1057 + ok: 1067 1058 /* legacy gss-only clients are always OK: */ 1068 1059 if (exp->ex_client == rqstp->rq_gssclient) 1069 1060 return 0; ··· 1105 1062 if (nfsd4_spo_must_allow(rqstp)) 1106 1063 return 0; 1107 1064 1065 + denied: 1108 1066 return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec; 1109 1067 } 1110 1068
+1
fs/nfsd/export.h
··· 77 77 struct cache_detail *cd; 78 78 struct rcu_head ex_rcu; 79 79 struct export_stats ex_stats; 80 + unsigned long ex_xprtsec_modes; 80 81 }; 81 82 82 83 /* an "export key" (expkey) maps a filehandlefragement to an
+185 -245
fs/nfsd/filecache.c
··· 74 74 static unsigned long nfsd_file_flags; 75 75 static struct fsnotify_group *nfsd_file_fsnotify_group; 76 76 static struct delayed_work nfsd_filecache_laundrette; 77 - static struct rhashtable nfsd_file_rhash_tbl 77 + static struct rhltable nfsd_file_rhltable 78 78 ____cacheline_aligned_in_smp; 79 - 80 - enum nfsd_file_lookup_type { 81 - NFSD_FILE_KEY_INODE, 82 - NFSD_FILE_KEY_FULL, 83 - }; 84 - 85 - struct nfsd_file_lookup_key { 86 - struct inode *inode; 87 - struct net *net; 88 - const struct cred *cred; 89 - unsigned char need; 90 - bool gc; 91 - enum nfsd_file_lookup_type type; 92 - }; 93 - 94 - /* 95 - * The returned hash value is based solely on the address of an in-code 96 - * inode, a pointer to a slab-allocated object. The entropy in such a 97 - * pointer is concentrated in its middle bits. 98 - */ 99 - static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) 100 - { 101 - unsigned long ptr = (unsigned long)inode; 102 - u32 k; 103 - 104 - k = ptr >> L1_CACHE_SHIFT; 105 - k &= 0x00ffffff; 106 - return jhash2(&k, 1, seed); 107 - } 108 - 109 - /** 110 - * nfsd_file_key_hashfn - Compute the hash value of a lookup key 111 - * @data: key on which to compute the hash value 112 - * @len: rhash table's key_len parameter (unused) 113 - * @seed: rhash table's random seed of the day 114 - * 115 - * Return value: 116 - * Computed 32-bit hash value 117 - */ 118 - static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) 119 - { 120 - const struct nfsd_file_lookup_key *key = data; 121 - 122 - return nfsd_file_inode_hash(key->inode, seed); 123 - } 124 - 125 - /** 126 - * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file 127 - * @data: object on which to compute the hash value 128 - * @len: rhash table's key_len parameter (unused) 129 - * @seed: rhash table's random seed of the day 130 - * 131 - * Return value: 132 - * Computed 32-bit hash value 133 - */ 134 - static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) 135 - { 136 - const struct nfsd_file *nf = data; 137 - 138 - return nfsd_file_inode_hash(nf->nf_inode, seed); 139 - } 140 79 141 80 static bool 142 81 nfsd_match_cred(const struct cred *c1, const struct cred *c2) ··· 97 158 return true; 98 159 } 99 160 100 - /** 101 - * nfsd_file_obj_cmpfn - Match a cache item against search criteria 102 - * @arg: search criteria 103 - * @ptr: cache item to check 104 - * 105 - * Return values: 106 - * %0 - Item matches search criteria 107 - * %1 - Item does not match search criteria 108 - */ 109 - static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, 110 - const void *ptr) 111 - { 112 - const struct nfsd_file_lookup_key *key = arg->key; 113 - const struct nfsd_file *nf = ptr; 114 - 115 - switch (key->type) { 116 - case NFSD_FILE_KEY_INODE: 117 - if (nf->nf_inode != key->inode) 118 - return 1; 119 - break; 120 - case NFSD_FILE_KEY_FULL: 121 - if (nf->nf_inode != key->inode) 122 - return 1; 123 - if (nf->nf_may != key->need) 124 - return 1; 125 - if (nf->nf_net != key->net) 126 - return 1; 127 - if (!nfsd_match_cred(nf->nf_cred, key->cred)) 128 - return 1; 129 - if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) 130 - return 1; 131 - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) 132 - return 1; 133 - break; 134 - } 135 - return 0; 136 - } 137 - 138 161 static const struct rhashtable_params nfsd_file_rhash_params = { 139 162 .key_len = sizeof_field(struct nfsd_file, nf_inode), 140 163 .key_offset = offsetof(struct nfsd_file, nf_inode), 141 - .head_offset = offsetof(struct nfsd_file, nf_rhash), 142 - .hashfn = nfsd_file_key_hashfn, 143 - .obj_hashfn = nfsd_file_obj_hashfn, 144 - .obj_cmpfn = nfsd_file_obj_cmpfn, 145 - /* Reduce resizing churn on light workloads */ 146 - .min_size = 512, /* buckets */ 164 + .head_offset = offsetof(struct nfsd_file, nf_rlist), 165 + 166 + /* 167 + * Start with a single page hash table to reduce resizing churn 168 + * on light workloads. 169 + */ 170 + .min_size = 256, 147 171 .automatic_shrinking = true, 148 172 }; 149 173 ··· 209 307 } 210 308 211 309 static struct nfsd_file * 212 - nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) 310 + nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, 311 + bool want_gc) 213 312 { 214 313 struct nfsd_file *nf; 215 314 216 315 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 217 - if (nf) { 218 - INIT_LIST_HEAD(&nf->nf_lru); 219 - nf->nf_birthtime = ktime_get(); 220 - nf->nf_file = NULL; 221 - nf->nf_cred = get_current_cred(); 222 - nf->nf_net = key->net; 223 - nf->nf_flags = 0; 224 - __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 225 - __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 226 - if (key->gc) 227 - __set_bit(NFSD_FILE_GC, &nf->nf_flags); 228 - nf->nf_inode = key->inode; 229 - refcount_set(&nf->nf_ref, 1); 230 - nf->nf_may = key->need; 231 - nf->nf_mark = NULL; 232 - } 316 + if (unlikely(!nf)) 317 + return NULL; 318 + 319 + INIT_LIST_HEAD(&nf->nf_lru); 320 + nf->nf_birthtime = ktime_get(); 321 + nf->nf_file = NULL; 322 + nf->nf_cred = get_current_cred(); 323 + nf->nf_net = net; 324 + nf->nf_flags = want_gc ? 325 + BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) : 326 + BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING); 327 + nf->nf_inode = inode; 328 + refcount_set(&nf->nf_ref, 1); 329 + nf->nf_may = need; 330 + nf->nf_mark = NULL; 233 331 return nf; 234 332 } 235 333 ··· 254 352 nfsd_file_hash_remove(struct nfsd_file *nf) 255 353 { 256 354 trace_nfsd_file_unhash(nf); 257 - rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, 258 - nfsd_file_rhash_params); 355 + rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist, 356 + nfsd_file_rhash_params); 259 357 } 260 358 261 359 static bool ··· 282 380 if (nf->nf_mark) 283 381 nfsd_file_mark_put(nf->nf_mark); 284 382 if (nf->nf_file) { 285 - get_file(nf->nf_file); 286 - filp_close(nf->nf_file, NULL); 287 383 nfsd_file_check_write_error(nf); 288 - fput(nf->nf_file); 384 + filp_close(nf->nf_file, NULL); 289 385 } 290 386 291 387 /* ··· 302 402 struct file *file = nf->nf_file; 303 403 struct address_space *mapping; 304 404 305 - if (!file || !(file->f_mode & FMODE_WRITE)) 405 + /* File not open for write? */ 406 + if (!(file->f_mode & FMODE_WRITE)) 306 407 return false; 408 + 409 + /* 410 + * Some filesystems (e.g. NFS) flush all dirty data on close. 411 + * On others, there is no need to wait for writeback. 412 + */ 413 + if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE)) 414 + return false; 415 + 307 416 mapping = file->f_mapping; 308 417 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 309 418 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 310 419 } 420 + 311 421 312 422 static bool nfsd_file_lru_add(struct nfsd_file *nf) 313 423 { ··· 402 492 } 403 493 } 404 494 405 - static void 406 - nfsd_file_list_remove_disposal(struct list_head *dst, 407 - struct nfsd_fcache_disposal *l) 408 - { 409 - spin_lock(&l->lock); 410 - list_splice_init(&l->freeme, dst); 411 - spin_unlock(&l->lock); 412 - } 413 - 414 - static void 415 - nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 416 - { 417 - struct nfsd_net *nn = net_generic(net, nfsd_net_id); 418 - struct nfsd_fcache_disposal *l = nn->fcache_disposal; 419 - 420 - spin_lock(&l->lock); 421 - list_splice_tail_init(files, &l->freeme); 422 - spin_unlock(&l->lock); 423 - queue_work(nfsd_filecache_wq, &l->work); 424 - } 425 - 426 - static void 427 - nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 428 - struct net *net) 429 - { 430 - struct nfsd_file *nf, *tmp; 431 - 432 - list_for_each_entry_safe(nf, tmp, src, nf_lru) { 433 - if (nf->nf_net == net) 434 - list_move_tail(&nf->nf_lru, dst); 435 - } 436 - } 437 - 495 + /** 496 + * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list 497 + * @dispose: list of nfsd_files to be disposed 498 + * 499 + * Transfers each file to the "freeme" list for its nfsd_net, to eventually 500 + * be disposed of by the per-net garbage collector. 501 + */ 438 502 static void 439 503 nfsd_file_dispose_list_delayed(struct list_head *dispose) 440 504 { 441 - LIST_HEAD(list); 442 - struct nfsd_file *nf; 443 - 444 505 while(!list_empty(dispose)) { 445 - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 446 - nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 447 - nfsd_file_list_add_disposal(&list, nf->nf_net); 506 + struct nfsd_file *nf = list_first_entry(dispose, 507 + struct nfsd_file, nf_lru); 508 + struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); 509 + struct nfsd_fcache_disposal *l = nn->fcache_disposal; 510 + 511 + spin_lock(&l->lock); 512 + list_move_tail(&nf->nf_lru, &l->freeme); 513 + spin_unlock(&l->lock); 514 + queue_work(nfsd_filecache_wq, &l->work); 448 515 } 449 516 } 450 517 ··· 565 678 * @inode: inode on which to close out nfsd_files 566 679 * @dispose: list on which to gather nfsd_files to close out 567 680 * 568 - * An nfsd_file represents a struct file being held open on behalf of nfsd. An 569 - * open file however can block other activity (such as leases), or cause 681 + * An nfsd_file represents a struct file being held open on behalf of nfsd. 682 + * An open file however can block other activity (such as leases), or cause 570 683 * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). 571 684 * 572 685 * This function is intended to find open nfsd_files when this sort of ··· 579 692 static void 580 693 nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) 581 694 { 582 - struct nfsd_file_lookup_key key = { 583 - .type = NFSD_FILE_KEY_INODE, 584 - .inode = inode, 585 - }; 695 + struct rhlist_head *tmp, *list; 586 696 struct nfsd_file *nf; 587 697 588 698 rcu_read_lock(); 589 - do { 590 - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 591 - nfsd_file_rhash_params); 592 - if (!nf) 593 - break; 699 + list = rhltable_lookup(&nfsd_file_rhltable, &inode, 700 + nfsd_file_rhash_params); 701 + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { 702 + if (!test_bit(NFSD_FILE_GC, &nf->nf_flags)) 703 + continue; 594 704 nfsd_file_cond_queue(nf, dispose); 595 - } while (1); 705 + } 596 706 rcu_read_unlock(); 597 707 } 598 708 ··· 642 758 * nfsd_file_delayed_close - close unused nfsd_files 643 759 * @work: dummy 644 760 * 645 - * Walk the LRU list and destroy any entries that have not been used since 646 - * the last scan. 761 + * Scrape the freeme list for this nfsd_net, and then dispose of them 762 + * all. 647 763 */ 648 764 static void 649 765 nfsd_file_delayed_close(struct work_struct *work) ··· 652 768 struct nfsd_fcache_disposal *l = container_of(work, 653 769 struct nfsd_fcache_disposal, work); 654 770 655 - nfsd_file_list_remove_disposal(&head, l); 771 + spin_lock(&l->lock); 772 + list_splice_init(&l->freeme, &head); 773 + spin_unlock(&l->lock); 774 + 656 775 nfsd_file_dispose_list(&head); 657 776 } 658 777 ··· 716 829 if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 717 830 return 0; 718 831 719 - ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); 832 + ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); 720 833 if (ret) 721 834 return ret; 722 835 ··· 784 897 nfsd_file_mark_slab = NULL; 785 898 destroy_workqueue(nfsd_filecache_wq); 786 899 nfsd_filecache_wq = NULL; 787 - rhashtable_destroy(&nfsd_file_rhash_tbl); 900 + rhltable_destroy(&nfsd_file_rhltable); 788 901 goto out; 789 902 } 790 903 ··· 793 906 * @net: net-namespace to shut down the cache (may be NULL) 794 907 * 795 908 * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, 796 - * then close out everything. Called when an nfsd instance is being shut down. 909 + * then close out everything. Called when an nfsd instance is being shut down, 910 + * and when the exports table is flushed. 797 911 */ 798 912 static void 799 913 __nfsd_file_cache_purge(struct net *net) ··· 803 915 struct nfsd_file *nf; 804 916 LIST_HEAD(dispose); 805 917 806 - rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); 918 + rhltable_walk_enter(&nfsd_file_rhltable, &iter); 807 919 do { 808 920 rhashtable_walk_start(&iter); 809 921 ··· 909 1021 nfsd_file_mark_slab = NULL; 910 1022 destroy_workqueue(nfsd_filecache_wq); 911 1023 nfsd_filecache_wq = NULL; 912 - rhashtable_destroy(&nfsd_file_rhash_tbl); 1024 + rhltable_destroy(&nfsd_file_rhltable); 913 1025 914 1026 for_each_possible_cpu(i) { 915 1027 per_cpu(nfsd_file_cache_hits, i) = 0; ··· 918 1030 per_cpu(nfsd_file_total_age, i) = 0; 919 1031 per_cpu(nfsd_file_evictions, i) = 0; 920 1032 } 1033 + } 1034 + 1035 + static struct nfsd_file * 1036 + nfsd_file_lookup_locked(const struct net *net, const struct cred *cred, 1037 + struct inode *inode, unsigned char need, 1038 + bool want_gc) 1039 + { 1040 + struct rhlist_head *tmp, *list; 1041 + struct nfsd_file *nf; 1042 + 1043 + list = rhltable_lookup(&nfsd_file_rhltable, &inode, 1044 + nfsd_file_rhash_params); 1045 + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { 1046 + if (nf->nf_may != need) 1047 + continue; 1048 + if (nf->nf_net != net) 1049 + continue; 1050 + if (!nfsd_match_cred(nf->nf_cred, cred)) 1051 + continue; 1052 + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc) 1053 + continue; 1054 + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) 1055 + continue; 1056 + 1057 + if (!nfsd_file_get(nf)) 1058 + continue; 1059 + return nf; 1060 + } 1061 + return NULL; 921 1062 } 922 1063 923 1064 /** ··· 963 1046 bool 964 1047 nfsd_file_is_cached(struct inode *inode) 965 1048 { 966 - struct nfsd_file_lookup_key key = { 967 - .type = NFSD_FILE_KEY_INODE, 968 - .inode = inode, 969 - }; 1049 + struct rhlist_head *tmp, *list; 1050 + struct nfsd_file *nf; 970 1051 bool ret = false; 971 1052 972 - if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, 973 - nfsd_file_rhash_params) != NULL) 974 - ret = true; 1053 + rcu_read_lock(); 1054 + list = rhltable_lookup(&nfsd_file_rhltable, &inode, 1055 + nfsd_file_rhash_params); 1056 + rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) 1057 + if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) { 1058 + ret = true; 1059 + break; 1060 + } 1061 + rcu_read_unlock(); 1062 + 975 1063 trace_nfsd_file_is_cached(inode, (int)ret); 976 1064 return ret; 977 1065 } ··· 986 1064 unsigned int may_flags, struct file *file, 987 1065 struct nfsd_file **pnf, bool want_gc) 988 1066 { 989 - struct nfsd_file_lookup_key key = { 990 - .type = NFSD_FILE_KEY_FULL, 991 - .need = may_flags & NFSD_FILE_MAY_MASK, 992 - .net = SVC_NET(rqstp), 993 - .gc = want_gc, 994 - }; 1067 + unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 1068 + struct net *net = SVC_NET(rqstp); 1069 + struct nfsd_file *new, *nf; 1070 + const struct cred *cred; 995 1071 bool open_retry = true; 996 - struct nfsd_file *nf; 1072 + struct inode *inode; 997 1073 __be32 status; 998 1074 int ret; 999 1075 ··· 999 1079 may_flags|NFSD_MAY_OWNER_OVERRIDE); 1000 1080 if (status != nfs_ok) 1001 1081 return status; 1002 - key.inode = d_inode(fhp->fh_dentry); 1003 - key.cred = get_current_cred(); 1082 + inode = d_inode(fhp->fh_dentry); 1083 + cred = get_current_cred(); 1004 1084 1005 1085 retry: 1006 1086 rcu_read_lock(); 1007 - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 1008 - nfsd_file_rhash_params); 1009 - nf = nfsd_file_get(nf); 1087 + nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); 1010 1088 rcu_read_unlock(); 1011 1089 1012 1090 if (nf) { 1091 + /* 1092 + * If the nf is on the LRU then it holds an extra reference 1093 + * that must be put if it's removed. It had better not be 1094 + * the last one however, since we should hold another. 1095 + */ 1013 1096 if (nfsd_file_lru_remove(nf)) 1014 1097 WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); 1015 1098 goto wait_for_construction; 1016 1099 } 1017 1100 1018 - nf = nfsd_file_alloc(&key, may_flags); 1019 - if (!nf) { 1101 + new = nfsd_file_alloc(net, inode, need, want_gc); 1102 + if (!new) { 1020 1103 status = nfserr_jukebox; 1021 - goto out_status; 1104 + goto out; 1022 1105 } 1023 1106 1024 - ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, 1025 - &key, &nf->nf_rhash, 1026 - nfsd_file_rhash_params); 1107 + rcu_read_lock(); 1108 + spin_lock(&inode->i_lock); 1109 + nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); 1110 + if (unlikely(nf)) { 1111 + spin_unlock(&inode->i_lock); 1112 + rcu_read_unlock(); 1113 + nfsd_file_slab_free(&new->nf_rcu); 1114 + goto wait_for_construction; 1115 + } 1116 + nf = new; 1117 + ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist, 1118 + nfsd_file_rhash_params); 1119 + spin_unlock(&inode->i_lock); 1120 + rcu_read_unlock(); 1027 1121 if (likely(ret == 0)) 1028 1122 goto open_file; 1029 1123 1030 - nfsd_file_slab_free(&nf->nf_rcu); 1031 - nf = NULL; 1032 1124 if (ret == -EEXIST) 1033 1125 goto retry; 1034 - trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); 1126 + trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); 1035 1127 status = nfserr_jukebox; 1036 - goto out_status; 1128 + goto construction_err; 1037 1129 1038 1130 wait_for_construction: 1039 1131 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 1040 1132 1041 1133 /* Did construction of this file fail? */ 1042 1134 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 1043 - trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); 1135 + trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf); 1044 1136 if (!open_retry) { 1045 1137 status = nfserr_jukebox; 1046 - goto out; 1138 + goto construction_err; 1047 1139 } 1048 1140 open_retry = false; 1049 - if (refcount_dec_and_test(&nf->nf_ref)) 1050 - nfsd_file_free(nf); 1051 1141 goto retry; 1052 1142 } 1053 - 1054 1143 this_cpu_inc(nfsd_file_cache_hits); 1055 1144 1056 1145 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 1146 + if (status != nfs_ok) { 1147 + nfsd_file_put(nf); 1148 + nf = NULL; 1149 + } 1150 + 1057 1151 out: 1058 1152 if (status == nfs_ok) { 1059 1153 this_cpu_inc(nfsd_file_acquisitions); 1060 1154 nfsd_file_check_write_error(nf); 1061 1155 *pnf = nf; 1062 - } else { 1063 - if (refcount_dec_and_test(&nf->nf_ref)) 1064 - nfsd_file_free(nf); 1065 - nf = NULL; 1066 1156 } 1067 - 1068 - out_status: 1069 - put_cred(key.cred); 1070 - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); 1157 + put_cred(cred); 1158 + trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); 1071 1159 return status; 1072 1160 1073 1161 open_file: 1074 1162 trace_nfsd_file_alloc(nf); 1075 - nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); 1163 + nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); 1076 1164 if (nf->nf_mark) { 1077 1165 if (file) { 1078 1166 get_file(file); ··· 1098 1170 * If construction failed, or we raced with a call to unlink() 1099 1171 * then unhash. 1100 1172 */ 1101 - if (status == nfs_ok && key.inode->i_nlink == 0) 1102 - status = nfserr_jukebox; 1103 - if (status != nfs_ok) 1173 + if (status != nfs_ok || inode->i_nlink == 0) 1104 1174 nfsd_file_unhash(nf); 1105 - clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1106 - smp_mb__after_atomic(); 1107 - wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1175 + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); 1176 + if (status == nfs_ok) 1177 + goto out; 1178 + 1179 + construction_err: 1180 + if (refcount_dec_and_test(&nf->nf_ref)) 1181 + nfsd_file_free(nf); 1182 + nf = NULL; 1108 1183 goto out; 1109 1184 } 1110 1185 ··· 1123 1192 * seconds after the final nfsd_file_put() in case the caller 1124 1193 * wants to re-use it. 1125 1194 * 1126 - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1127 - * network byte order is returned. 1195 + * Return values: 1196 + * %nfs_ok - @pnf points to an nfsd_file with its reference 1197 + * count boosted. 1198 + * 1199 + * On error, an nfsstat value in network byte order is returned. 1128 1200 */ 1129 1201 __be32 1130 1202 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, ··· 1147 1213 * but not garbage-collected. The object is unhashed after the 1148 1214 * final nfsd_file_put(). 1149 1215 * 1150 - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1151 - * network byte order is returned. 1216 + * Return values: 1217 + * %nfs_ok - @pnf points to an nfsd_file with its reference 1218 + * count boosted. 1219 + * 1220 + * On error, an nfsstat value in network byte order is returned. 1152 1221 */ 1153 1222 __be32 1154 1223 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, ··· 1172 1235 * and @file is non-NULL, use it to instantiate a new nfsd_file instead of 1173 1236 * opening a new one. 1174 1237 * 1175 - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1176 - * network byte order is returned. 1238 + * Return values: 1239 + * %nfs_ok - @pnf points to an nfsd_file with its reference 1240 + * count boosted. 1241 + * 1242 + * On error, an nfsstat value in network byte order is returned. 1177 1243 */ 1178 1244 __be32 1179 1245 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, ··· 1207 1267 lru = list_lru_count(&nfsd_file_lru); 1208 1268 1209 1269 rcu_read_lock(); 1210 - ht = &nfsd_file_rhash_tbl; 1270 + ht = &nfsd_file_rhltable.ht; 1211 1271 count = atomic_read(&ht->nelems); 1212 1272 tbl = rht_dereference_rcu(ht->tbl, ht); 1213 1273 buckets = tbl->size; ··· 1223 1283 evictions += per_cpu(nfsd_file_evictions, i); 1224 1284 } 1225 1285 1226 - seq_printf(m, "total entries: %u\n", count); 1286 + seq_printf(m, "total inodes: %u\n", count); 1227 1287 seq_printf(m, "hash buckets: %u\n", buckets); 1228 1288 seq_printf(m, "lru entries: %lu\n", lru); 1229 1289 seq_printf(m, "cache hits: %lu\n", hits);
+5 -4
fs/nfsd/filecache.h
··· 29 29 * never be dereferenced, only used for comparison. 30 30 */ 31 31 struct nfsd_file { 32 - struct rhash_head nf_rhash; 33 - struct list_head nf_lru; 34 - struct rcu_head nf_rcu; 32 + struct rhlist_head nf_rlist; 33 + void *nf_inode; 35 34 struct file *nf_file; 36 35 const struct cred *nf_cred; 37 36 struct net *nf_net; ··· 39 40 #define NFSD_FILE_REFERENCED (2) 40 41 #define NFSD_FILE_GC (3) 41 42 unsigned long nf_flags; 42 - struct inode *nf_inode; /* don't deref */ 43 43 refcount_t nf_ref; 44 44 unsigned char nf_may; 45 + 45 46 struct nfsd_file_mark *nf_mark; 47 + struct list_head nf_lru; 48 + struct rcu_head nf_rcu; 46 49 ktime_t nf_birthtime; 47 50 }; 48 51
+4 -4
fs/nfsd/nfs4idmap.c
··· 240 240 goto out; 241 241 242 242 /* expiry */ 243 - ent.h.expiry_time = get_expiry(&buf); 244 - if (ent.h.expiry_time == 0) 243 + error = get_expiry(&buf, &ent.h.expiry_time); 244 + if (error) 245 245 goto out; 246 246 247 247 error = -ENOMEM; ··· 408 408 memcpy(ent.name, buf1, sizeof(ent.name)); 409 409 410 410 /* expiry */ 411 - ent.h.expiry_time = get_expiry(&buf); 412 - if (ent.h.expiry_time == 0) 411 + error = get_expiry(&buf, &ent.h.expiry_time); 412 + if (error) 413 413 goto out; 414 414 415 415 /* ID */
+7 -6
fs/nfsd/vfs.c
··· 930 930 * Grab and keep cached pages associated with a file in the svc_rqst 931 931 * so that they can be passed to the network sendmsg/sendpage routines 932 932 * directly. They will be released after the sending has completed. 933 + * 934 + * Return values: Number of bytes consumed, or -EIO if there are no 935 + * remaining pages in rqstp->rq_pages. 933 936 */ 934 937 static int 935 938 nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, ··· 951 948 */ 952 949 if (page == *(rqstp->rq_next_page - 1)) 953 950 continue; 954 - svc_rqst_replace_page(rqstp, page); 951 + if (unlikely(!svc_rqst_replace_page(rqstp, page))) 952 + return -EIO; 955 953 } 956 954 if (rqstp->rq_res.page_len == 0) // first call 957 955 rqstp->rq_res.page_base = offset % PAGE_SIZE; ··· 2168 2164 goto out; 2169 2165 } 2170 2166 2171 - buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS); 2167 + buf = kvmalloc(len, GFP_KERNEL); 2172 2168 if (buf == NULL) { 2173 2169 err = nfserr_jukebox; 2174 2170 goto out; ··· 2231 2227 goto out; 2232 2228 } 2233 2229 2234 - /* 2235 - * We're holding i_rwsem - use GFP_NOFS. 2236 - */ 2237 - buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS); 2230 + buf = kvmalloc(len, GFP_KERNEL); 2238 2231 if (buf == NULL) { 2239 2232 err = nfserr_jukebox; 2240 2233 goto out;
+1
include/linux/exportfs.h
··· 220 220 #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply 221 221 atomic attribute updates 222 222 */ 223 + #define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ 223 224 unsigned long flags; 224 225 }; 225 226
+15 -14
include/linux/lockd/lockd.h
··· 99 99 /* 100 100 * Rigorous type checking on sockaddr type conversions 101 101 */ 102 - static inline struct sockaddr_in *nlm_addr_in(const struct nlm_host *host) 103 - { 104 - return (struct sockaddr_in *)&host->h_addr; 105 - } 106 - 107 102 static inline struct sockaddr *nlm_addr(const struct nlm_host *host) 108 103 { 109 104 return (struct sockaddr *)&host->h_addr; 110 - } 111 - 112 - static inline struct sockaddr_in *nlm_srcaddr_in(const struct nlm_host *host) 113 - { 114 - return (struct sockaddr_in *)&host->h_srcaddr; 115 105 } 116 106 117 107 static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host) ··· 121 131 uint32_t pid; 122 132 }; 123 133 124 - struct nlm_wait; 134 + /* 135 + * This is the representation of a blocked client lock. 136 + */ 137 + struct nlm_wait { 138 + struct list_head b_list; /* linked list */ 139 + wait_queue_head_t b_wait; /* where to wait on */ 140 + struct nlm_host *b_host; 141 + struct file_lock *b_lock; /* local file lock */ 142 + __be32 b_status; /* grant callback status */ 143 + }; 125 144 126 145 /* 127 146 * Memory chunk for NLM client RPC request. ··· 211 212 int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *); 212 213 int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *); 213 214 void nlmclnt_release_call(struct nlm_rqst *); 214 - struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl); 215 - void nlmclnt_finish_block(struct nlm_wait *block); 216 - int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout); 215 + void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, 216 + struct file_lock *fl); 217 + void nlmclnt_queue_block(struct nlm_wait *block); 218 + __be32 nlmclnt_dequeue_block(struct nlm_wait *block); 219 + int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout); 217 220 __be32 nlmclnt_grant(const struct sockaddr *addr, 218 221 const struct nlm_lock *lock); 219 222 void nlmclnt_recovery(struct nlm_host *);
+20
include/linux/nfs.h
··· 10 10 11 11 #include <linux/sunrpc/msg_prot.h> 12 12 #include <linux/string.h> 13 + #include <linux/crc32.h> 13 14 #include <uapi/linux/nfs.h> 14 15 15 16 /* ··· 45 44 /* used by direct.c to mark verf as invalid */ 46 45 NFS_INVALID_STABLE_HOW = -1 47 46 }; 47 + 48 + #ifdef CONFIG_CRC32 49 + /** 50 + * nfs_fhandle_hash - calculate the crc32 hash for the filehandle 51 + * @fh - pointer to filehandle 52 + * 53 + * returns a crc32 hash for the filehandle that is compatible with 54 + * the one displayed by "wireshark". 55 + */ 56 + static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) 57 + { 58 + return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); 59 + } 60 + #else /* CONFIG_CRC32 */ 61 + static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) 62 + { 63 + return 0; 64 + } 65 + #endif /* CONFIG_CRC32 */ 48 66 #endif /* _LINUX_NFS_H */
+8 -7
include/linux/sunrpc/cache.h
··· 300 300 return 0; 301 301 } 302 302 303 - static inline time64_t get_expiry(char **bpp) 303 + static inline int get_expiry(char **bpp, time64_t *rvp) 304 304 { 305 - time64_t rv; 305 + int error; 306 306 struct timespec64 boot; 307 307 308 - if (get_time(bpp, &rv)) 309 - return 0; 310 - if (rv < 0) 311 - return 0; 308 + error = get_time(bpp, rvp); 309 + if (error) 310 + return error; 311 + 312 312 getboottime64(&boot); 313 - return rv - boot.tv_sec; 313 + (*rvp) -= boot.tv_sec; 314 + return 0; 314 315 } 315 316 316 317 #endif /* _LINUX_SUNRPC_CACHE_H_ */
+3 -13
include/linux/sunrpc/svc.h
··· 309 309 return (struct sockaddr *) &rqst->rq_daddr; 310 310 } 311 311 312 - static inline void svc_free_res_pages(struct svc_rqst *rqstp) 313 - { 314 - while (rqstp->rq_next_page != rqstp->rq_respages) { 315 - struct page **pp = --rqstp->rq_next_page; 316 - if (*pp) { 317 - put_page(*pp); 318 - *pp = NULL; 319 - } 320 - } 321 - } 322 - 323 312 struct svc_deferred_req { 324 313 u32 prot; /* protocol (UDP or TCP) */ 325 314 struct svc_xprt *xprt; ··· 411 422 int (*threadfn)(void *data)); 412 423 struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, 413 424 struct svc_pool *pool, int node); 414 - void svc_rqst_replace_page(struct svc_rqst *rqstp, 425 + bool svc_rqst_replace_page(struct svc_rqst *rqstp, 415 426 struct page *page); 427 + void svc_rqst_release_pages(struct svc_rqst *rqstp); 416 428 void svc_rqst_free(struct svc_rqst *); 417 429 void svc_exit_thread(struct svc_rqst *); 418 430 struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, 419 431 int (*threadfn)(void *data)); 420 432 int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); 421 433 int svc_pool_stats_open(struct svc_serv *serv, struct file *file); 422 - int svc_process(struct svc_rqst *); 434 + void svc_process(struct svc_rqst *rqstp); 423 435 int bc_svc_process(struct svc_serv *, struct rpc_rqst *, 424 436 struct svc_rqst *); 425 437 int svc_register(const struct svc_serv *, struct net *, const int,
+4 -1
include/linux/sunrpc/svc_xprt.h
··· 27 27 void (*xpo_detach)(struct svc_xprt *); 28 28 void (*xpo_free)(struct svc_xprt *); 29 29 void (*xpo_kill_temp_xprt)(struct svc_xprt *); 30 - void (*xpo_start_tls)(struct svc_xprt *); 30 + void (*xpo_handshake)(struct svc_xprt *xprt); 31 31 }; 32 32 33 33 struct svc_xprt_class { ··· 70 70 #define XPT_LOCAL 12 /* connection from loopback interface */ 71 71 #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ 72 72 #define XPT_CONG_CTRL 14 /* has congestion control */ 73 + #define XPT_HANDSHAKE 15 /* xprt requests a handshake */ 74 + #define XPT_TLS_SESSION 16 /* transport-layer security established */ 75 + #define XPT_PEER_AUTH 17 /* peer has been authenticated */ 73 76 74 77 struct svc_serv *xpt_server; /* service for transport */ 75 78 atomic_t xpt_reserved; /* space on outq that is rsvd */
+3 -1
include/linux/sunrpc/svcsock.h
··· 38 38 /* Number of queued send requests */ 39 39 atomic_t sk_sendqlen; 40 40 41 + struct completion sk_handshake_done; 42 + 41 43 struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ 42 44 }; 43 45 ··· 58 56 */ 59 57 void svc_close_net(struct svc_serv *, struct net *); 60 58 int svc_recv(struct svc_rqst *, long); 61 - int svc_send(struct svc_rqst *); 59 + void svc_send(struct svc_rqst *rqstp); 62 60 void svc_drop(struct svc_rqst *); 63 61 void svc_sock_update_bufs(struct svc_serv *serv); 64 62 bool svc_alien_sock(struct net *net, int fd);
+2
include/net/tls.h
··· 69 69 70 70 #define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type) 71 71 72 + #define TLS_RECORD_TYPE_ALERT 0x15 73 + #define TLS_RECORD_TYPE_HANDSHAKE 0x16 72 74 #define TLS_RECORD_TYPE_DATA 0x17 73 75 74 76 #define TLS_AAD_SPACE_SIZE 13
+40 -1
include/trace/events/sunrpc.h
··· 1790 1790 TP_PROTO(const struct svc_rqst *rqst, int status), 1791 1791 TP_ARGS(rqst, status)); 1792 1792 1793 + TRACE_EVENT(svc_replace_page_err, 1794 + TP_PROTO(const struct svc_rqst *rqst), 1795 + 1796 + TP_ARGS(rqst), 1797 + TP_STRUCT__entry( 1798 + SVC_RQST_ENDPOINT_FIELDS(rqst) 1799 + 1800 + __field(const void *, begin) 1801 + __field(const void *, respages) 1802 + __field(const void *, nextpage) 1803 + ), 1804 + 1805 + TP_fast_assign( 1806 + SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst); 1807 + 1808 + __entry->begin = rqst->rq_pages; 1809 + __entry->respages = rqst->rq_respages; 1810 + __entry->nextpage = rqst->rq_next_page; 1811 + ), 1812 + 1813 + TP_printk(SVC_RQST_ENDPOINT_FORMAT " begin=%p respages=%p nextpage=%p", 1814 + SVC_RQST_ENDPOINT_VARARGS, 1815 + __entry->begin, __entry->respages, __entry->nextpage) 1816 + ); 1817 + 1793 1818 TRACE_EVENT(svc_stats_latency, 1794 1819 TP_PROTO( 1795 1820 const struct svc_rqst *rqst ··· 1857 1832 { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \ 1858 1833 { BIT(XPT_LOCAL), "LOCAL" }, \ 1859 1834 { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \ 1860 - { BIT(XPT_CONG_CTRL), "CONG_CTRL" }) 1835 + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \ 1836 + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \ 1837 + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \ 1838 + { BIT(XPT_PEER_AUTH), "PEER_AUTH" }) 1861 1839 1862 1840 TRACE_EVENT(svc_xprt_create_err, 1863 1841 TP_PROTO( ··· 1992 1964 DEFINE_SVC_XPRT_EVENT(close); 1993 1965 DEFINE_SVC_XPRT_EVENT(detach); 1994 1966 DEFINE_SVC_XPRT_EVENT(free); 1967 + 1968 + #define DEFINE_SVC_TLS_EVENT(name) \ 1969 + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \ 1970 + TP_PROTO(const struct svc_xprt *xprt), \ 1971 + TP_ARGS(xprt)) 1972 + 1973 + DEFINE_SVC_TLS_EVENT(start); 1974 + DEFINE_SVC_TLS_EVENT(upcall); 1975 + DEFINE_SVC_TLS_EVENT(unavailable); 1976 + DEFINE_SVC_TLS_EVENT(not_started); 1977 + DEFINE_SVC_TLS_EVENT(timed_out); 1995 1978 1996 1979 TRACE_EVENT(svc_xprt_accept, 1997 1980 TP_PROTO(
+13
include/uapi/linux/nfsd/export.h
··· 62 62 | NFSEXP_ALLSQUASH \ 63 63 | NFSEXP_INSECURE_PORT) 64 64 65 + /* 66 + * Transport layer security policies that are permitted to access 67 + * an export 68 + */ 69 + #define NFSEXP_XPRTSEC_NONE 0x0001 70 + #define NFSEXP_XPRTSEC_TLS 0x0002 71 + #define NFSEXP_XPRTSEC_MTLS 0x0004 72 + 73 + #define NFSEXP_XPRTSEC_NUM (3) 74 + 75 + #define NFSEXP_XPRTSEC_ALL (NFSEXP_XPRTSEC_NONE | \ 76 + NFSEXP_XPRTSEC_TLS | \ 77 + NFSEXP_XPRTSEC_MTLS) 65 78 66 79 #endif /* _UAPINFSD_EXPORT_H */
+6 -6
net/sunrpc/auth_gss/svcauth_gss.c
··· 257 257 258 258 rsii.h.flags = 0; 259 259 /* expiry */ 260 - expiry = get_expiry(&mesg); 261 - status = -EINVAL; 262 - if (expiry == 0) 260 + status = get_expiry(&mesg, &expiry); 261 + if (status) 263 262 goto out; 264 263 264 + status = -EINVAL; 265 265 /* major/minor */ 266 266 len = qword_get(&mesg, buf, mlen); 267 267 if (len <= 0) ··· 483 483 484 484 rsci.h.flags = 0; 485 485 /* expiry */ 486 - expiry = get_expiry(&mesg); 487 - status = -EINVAL; 488 - if (expiry == 0) 486 + status = get_expiry(&mesg, &expiry); 487 + if (status) 489 488 goto out; 490 489 490 + status = -EINVAL; 491 491 rscp = rsc_lookup(cd, &rsci); 492 492 if (!rscp) 493 493 goto out;
+42 -7
net/sunrpc/svc.c
··· 649 649 if (!rqstp) 650 650 return rqstp; 651 651 652 + pagevec_init(&rqstp->rq_pvec); 653 + 652 654 __set_bit(RQ_BUSY, &rqstp->rq_flags); 653 655 rqstp->rq_server = serv; 654 656 rqstp->rq_pool = pool; ··· 844 842 * 845 843 * When replacing a page in rq_pages, batch the release of the 846 844 * replaced pages to avoid hammering the page allocator. 845 + * 846 + * Return values: 847 + * %true: page replaced 848 + * %false: array bounds checking failed 847 849 */ 848 - void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) 850 + bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) 849 851 { 852 + struct page **begin = rqstp->rq_pages; 853 + struct page **end = &rqstp->rq_pages[RPCSVC_MAXPAGES]; 854 + 855 + if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) { 856 + trace_svc_replace_page_err(rqstp); 857 + return false; 858 + } 859 + 850 860 if (*rqstp->rq_next_page) { 851 861 if (!pagevec_space(&rqstp->rq_pvec)) 852 862 __pagevec_release(&rqstp->rq_pvec); ··· 867 853 868 854 get_page(page); 869 855 *(rqstp->rq_next_page++) = page; 856 + return true; 870 857 } 871 858 EXPORT_SYMBOL_GPL(svc_rqst_replace_page); 859 + 860 + /** 861 + * svc_rqst_release_pages - Release Reply buffer pages 862 + * @rqstp: RPC transaction context 863 + * 864 + * Release response pages that might still be in flight after 865 + * svc_send, and any spliced filesystem-owned pages. 866 + */ 867 + void svc_rqst_release_pages(struct svc_rqst *rqstp) 868 + { 869 + int i, count = rqstp->rq_next_page - rqstp->rq_respages; 870 + 871 + if (count) { 872 + release_pages(rqstp->rq_respages, count); 873 + for (i = 0; i < count; i++) 874 + rqstp->rq_respages[i] = NULL; 875 + } 876 + } 872 877 873 878 /* 874 879 * Called from a server thread as it's exiting. Caller must hold the "service ··· 896 863 void 897 864 svc_rqst_free(struct svc_rqst *rqstp) 898 865 { 866 + pagevec_release(&rqstp->rq_pvec); 899 867 svc_release_buffer(rqstp); 900 868 if (rqstp->rq_scratch_page) 901 869 put_page(rqstp->rq_scratch_page); ··· 1465 1431 goto sendit; 1466 1432 } 1467 1433 1468 - /* 1469 - * Process the RPC request. 1434 + /** 1435 + * svc_process - Execute one RPC transaction 1436 + * @rqstp: RPC transaction context 1437 + * 1470 1438 */ 1471 - int 1472 - svc_process(struct svc_rqst *rqstp) 1439 + void svc_process(struct svc_rqst *rqstp) 1473 1440 { 1474 1441 struct kvec *resv = &rqstp->rq_res.head[0]; 1475 1442 __be32 *p; ··· 1506 1471 1507 1472 if (!svc_process_common(rqstp)) 1508 1473 goto out_drop; 1509 - return svc_send(rqstp); 1474 + svc_send(rqstp); 1475 + return; 1510 1476 1511 1477 out_baddir: 1512 1478 svc_printk(rqstp, "bad direction 0x%08x, dropping request\n", ··· 1515 1479 rqstp->rq_server->sv_stats->rpcbadfmt++; 1516 1480 out_drop: 1517 1481 svc_drop(rqstp); 1518 - return 0; 1519 1482 } 1520 1483 EXPORT_SYMBOL_GPL(svc_process); 1521 1484
+16 -17
net/sunrpc/svc_xprt.c
··· 427 427 428 428 if (xpt_flags & BIT(XPT_BUSY)) 429 429 return false; 430 - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) 430 + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) 431 431 return true; 432 432 if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { 433 433 if (xprt->xpt_ops->xpo_has_wspace(xprt) && ··· 541 541 kfree(rqstp->rq_deferred); 542 542 rqstp->rq_deferred = NULL; 543 543 544 - pagevec_release(&rqstp->rq_pvec); 545 - svc_free_res_pages(rqstp); 544 + svc_rqst_release_pages(rqstp); 546 545 rqstp->rq_res.page_len = 0; 547 546 rqstp->rq_res.page_base = 0; 548 547 ··· 666 667 struct xdr_buf *arg = &rqstp->rq_arg; 667 668 unsigned long pages, filled, ret; 668 669 669 - pagevec_init(&rqstp->rq_pvec); 670 - 671 670 pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT; 672 671 if (pages > RPCSVC_MAXPAGES) { 673 672 pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n", ··· 701 704 arg->page_len = (pages-2)*PAGE_SIZE; 702 705 arg->len = (pages-1)*PAGE_SIZE; 703 706 arg->tail[0].iov_len = 0; 707 + 708 + rqstp->rq_xid = xdr_zero; 704 709 return 0; 705 710 } 706 711 ··· 828 829 module_put(xprt->xpt_class->xcl_owner); 829 830 } 830 831 svc_xprt_received(xprt); 832 + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { 833 + xprt->xpt_ops->xpo_handshake(xprt); 834 + svc_xprt_received(xprt); 831 835 } else if (svc_xprt_reserve_slot(rqstp, xprt)) { 832 836 /* XPT_DATA|XPT_DEFERRED case: */ 833 837 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", ··· 911 909 } 912 910 EXPORT_SYMBOL_GPL(svc_drop); 913 911 914 - /* 915 - * Return reply to client. 912 + /** 913 + * svc_send - Return reply to client 914 + * @rqstp: RPC transaction context 915 + * 916 916 */ 917 - int svc_send(struct svc_rqst *rqstp) 917 + void svc_send(struct svc_rqst *rqstp) 918 918 { 919 919 struct svc_xprt *xprt; 920 - int len = -EFAULT; 921 920 struct xdr_buf *xb; 921 + int status; 922 922 923 923 xprt = rqstp->rq_xprt; 924 924 if (!xprt) 925 - goto out; 925 + return; 926 926 927 927 /* calculate over-all length */ 928 928 xb = &rqstp->rq_res; ··· 934 930 trace_svc_xdr_sendto(rqstp->rq_xid, xb); 935 931 trace_svc_stats_latency(rqstp); 936 932 937 - len = xprt->xpt_ops->xpo_sendto(rqstp); 933 + status = xprt->xpt_ops->xpo_sendto(rqstp); 938 934 939 - trace_svc_send(rqstp, len); 935 + trace_svc_send(rqstp, status); 940 936 svc_xprt_release(rqstp); 941 - 942 - if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) 943 - len = 0; 944 - out: 945 - return len; 946 937 } 947 938 948 939 /*
+15 -8
net/sunrpc/svcauth_unix.c
··· 17 17 #include <net/ipv6.h> 18 18 #include <linux/kernel.h> 19 19 #include <linux/user_namespace.h> 20 - #define RPCDBG_FACILITY RPCDBG_AUTH 20 + #include <trace/events/sunrpc.h> 21 21 22 + #define RPCDBG_FACILITY RPCDBG_AUTH 22 23 23 24 #include "netns.h" 24 25 ··· 226 225 return -EINVAL; 227 226 } 228 227 229 - expiry = get_expiry(&mesg); 230 - if (expiry ==0) 231 - return -EINVAL; 228 + err = get_expiry(&mesg, &expiry); 229 + if (err) 230 + return err; 232 231 233 232 /* domainname, or empty for NEGATIVE */ 234 233 len = qword_get(&mesg, buf, mlen); ··· 507 506 uid = make_kuid(current_user_ns(), id); 508 507 ug.uid = uid; 509 508 510 - expiry = get_expiry(&mesg); 511 - if (expiry == 0) 512 - return -EINVAL; 509 + err = get_expiry(&mesg, &expiry); 510 + if (err) 511 + return err; 513 512 514 513 rv = get_int(&mesg, &gids); 515 514 if (rv || gids < 0 || gids > 8192) ··· 833 832 { 834 833 struct xdr_stream *xdr = &rqstp->rq_arg_stream; 835 834 struct svc_cred *cred = &rqstp->rq_cred; 835 + struct svc_xprt *xprt = rqstp->rq_xprt; 836 836 u32 flavor, len; 837 837 void *body; 838 838 __be32 *p; ··· 867 865 if (cred->cr_group_info == NULL) 868 866 return SVC_CLOSE; 869 867 870 - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { 868 + if (xprt->xpt_ops->xpo_handshake) { 871 869 p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); 872 870 if (!p) 873 871 return SVC_CLOSE; 872 + trace_svc_tls_start(xprt); 874 873 *p++ = rpc_auth_null; 875 874 *p++ = cpu_to_be32(8); 876 875 memcpy(p, "STARTTLS", 8); 876 + 877 + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); 878 + svc_xprt_enqueue(xprt); 877 879 } else { 880 + trace_svc_tls_unavailable(xprt); 878 881 if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, 879 882 RPC_AUTH_NULL, NULL, 0) < 0) 880 883 return SVC_CLOSE;
+162 -12
net/sunrpc/svcsock.c
··· 43 43 #include <net/udp.h> 44 44 #include <net/tcp.h> 45 45 #include <net/tcp_states.h> 46 + #include <net/tls.h> 47 + #include <net/handshake.h> 46 48 #include <linux/uaccess.h> 47 49 #include <linux/highmem.h> 48 50 #include <asm/ioctls.h> 51 + #include <linux/key.h> 49 52 50 53 #include <linux/sunrpc/types.h> 51 54 #include <linux/sunrpc/clnt.h> ··· 66 63 67 64 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 68 65 66 + /* To-do: to avoid tying up an nfsd thread while waiting for a 67 + * handshake request, the request could instead be deferred. 68 + */ 69 + enum { 70 + SVC_HANDSHAKE_TO = 5U * HZ 71 + }; 69 72 70 73 static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, 71 74 int flags); ··· 225 216 return len; 226 217 } 227 218 219 + static int 220 + svc_tcp_sock_process_cmsg(struct svc_sock *svsk, struct msghdr *msg, 221 + struct cmsghdr *cmsg, int ret) 222 + { 223 + if (cmsg->cmsg_level == SOL_TLS && 224 + cmsg->cmsg_type == TLS_GET_RECORD_TYPE) { 225 + u8 content_type = *((u8 *)CMSG_DATA(cmsg)); 226 + 227 + switch (content_type) { 228 + case TLS_RECORD_TYPE_DATA: 229 + /* TLS sets EOR at the end of each application data 230 + * record, even though there might be more frames 231 + * waiting to be decrypted. 232 + */ 233 + msg->msg_flags &= ~MSG_EOR; 234 + break; 235 + case TLS_RECORD_TYPE_ALERT: 236 + ret = -ENOTCONN; 237 + break; 238 + default: 239 + ret = -EAGAIN; 240 + } 241 + } 242 + return ret; 243 + } 244 + 245 + static int 246 + svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg) 247 + { 248 + union { 249 + struct cmsghdr cmsg; 250 + u8 buf[CMSG_SPACE(sizeof(u8))]; 251 + } u; 252 + int ret; 253 + 254 + msg->msg_control = &u; 255 + msg->msg_controllen = sizeof(u); 256 + ret = sock_recvmsg(svsk->sk_sock, msg, MSG_DONTWAIT); 257 + if (unlikely(msg->msg_controllen != sizeof(u))) 258 + ret = svc_tcp_sock_process_cmsg(svsk, msg, &u.cmsg, ret); 259 + return ret; 260 + } 261 + 228 262 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 229 263 static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) 230 264 { ··· 315 263 iov_iter_advance(&msg.msg_iter, seek); 316 264 buflen -= seek; 317 265 } 318 - len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); 266 + len = svc_tcp_sock_recv_cmsg(svsk, &msg); 319 267 if (len > 0) 320 268 svc_flush_bvec(bvec, len, seek); 321 269 ··· 367 315 rmb(); 368 316 svsk->sk_odata(sk); 369 317 trace_svcsock_data_ready(&svsk->sk_xprt, 0); 318 + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) 319 + return; 370 320 if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) 371 321 svc_xprt_enqueue(&svsk->sk_xprt); 372 322 } ··· 404 350 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 405 351 406 352 sock_no_linger(svsk->sk_sock->sk); 353 + } 354 + 355 + /** 356 + * svc_tcp_handshake_done - Handshake completion handler 357 + * @data: address of xprt to wake 358 + * @status: status of handshake 359 + * @peerid: serial number of key containing the remote peer's identity 360 + * 361 + * If a security policy is specified as an export option, we don't 362 + * have a specific export here to check. So we set a "TLS session 363 + * is present" flag on the xprt and let an upper layer enforce local 364 + * security policy. 365 + */ 366 + static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) 367 + { 368 + struct svc_xprt *xprt = data; 369 + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 370 + 371 + if (!status) { 372 + if (peerid != TLS_NO_PEERID) 373 + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); 374 + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); 375 + } 376 + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); 377 + complete_all(&svsk->sk_handshake_done); 378 + } 379 + 380 + /** 381 + * svc_tcp_handshake - Perform a transport-layer security handshake 382 + * @xprt: connected transport endpoint 383 + * 384 + */ 385 + static void svc_tcp_handshake(struct svc_xprt *xprt) 386 + { 387 + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 388 + struct sock *sk = svsk->sk_sock->sk; 389 + struct tls_handshake_args args = { 390 + .ta_sock = svsk->sk_sock, 391 + .ta_done = svc_tcp_handshake_done, 392 + .ta_data = xprt, 393 + }; 394 + int ret; 395 + 396 + trace_svc_tls_upcall(xprt); 397 + 398 + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); 399 + init_completion(&svsk->sk_handshake_done); 400 + 401 + ret = tls_server_hello_x509(&args, GFP_KERNEL); 402 + if (ret) { 403 + trace_svc_tls_not_started(xprt); 404 + goto out_failed; 405 + } 406 + 407 + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, 408 + SVC_HANDSHAKE_TO); 409 + if (ret <= 0) { 410 + if (tls_handshake_cancel(sk)) { 411 + trace_svc_tls_timed_out(xprt); 412 + goto out_close; 413 + } 414 + } 415 + 416 + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { 417 + trace_svc_tls_unavailable(xprt); 418 + goto out_close; 419 + } 420 + 421 + /* Mark the transport ready in case the remote sent RPC 422 + * traffic before the kernel received the handshake 423 + * completion downcall. 424 + */ 425 + set_bit(XPT_DATA, &xprt->xpt_flags); 426 + svc_xprt_enqueue(xprt); 427 + return; 428 + 429 + out_close: 430 + set_bit(XPT_CLOSE, &xprt->xpt_flags); 431 + out_failed: 432 + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); 433 + set_bit(XPT_DATA, &xprt->xpt_flags); 434 + svc_xprt_enqueue(xprt); 407 435 } 408 436 409 437 /* ··· 1013 877 iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; 1014 878 iov.iov_len = want; 1015 879 iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want); 1016 - len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); 880 + len = svc_tcp_sock_recv_cmsg(svsk, &msg); 1017 881 if (len < 0) 1018 882 return len; 1019 883 svsk->sk_tcplen += len; ··· 1349 1213 .xpo_has_wspace = svc_tcp_has_wspace, 1350 1214 .xpo_accept = svc_tcp_accept, 1351 1215 .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, 1216 + .xpo_handshake = svc_tcp_handshake, 1352 1217 }; 1353 1218 1354 1219 static struct svc_xprt_class svc_tcp_class = { ··· 1430 1293 struct socket *sock, 1431 1294 int flags) 1432 1295 { 1296 + struct file *filp = NULL; 1433 1297 struct svc_sock *svsk; 1434 1298 struct sock *inet; 1435 1299 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1436 - int err = 0; 1437 1300 1438 1301 svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); 1439 1302 if (!svsk) 1440 1303 return ERR_PTR(-ENOMEM); 1441 1304 1305 + if (!sock->file) { 1306 + filp = sock_alloc_file(sock, O_NONBLOCK, NULL); 1307 + if (IS_ERR(filp)) { 1308 + kfree(svsk); 1309 + return ERR_CAST(filp); 1310 + } 1311 + } 1312 + 1442 1313 inet = sock->sk; 1443 1314 1444 - /* Register socket with portmapper */ 1445 - if (pmap_register) 1315 + if (pmap_register) { 1316 + int err; 1317 + 1446 1318 err = svc_register(serv, sock_net(sock->sk), inet->sk_family, 1447 1319 inet->sk_protocol, 1448 1320 ntohs(inet_sk(inet)->inet_sport)); 1449 - 1450 - if (err < 0) { 1451 - kfree(svsk); 1452 - return ERR_PTR(err); 1321 + if (err < 0) { 1322 + if (filp) 1323 + fput(filp); 1324 + kfree(svsk); 1325 + return ERR_PTR(err); 1326 + } 1453 1327 } 1454 1328 1455 1329 svsk->sk_sock = sock; ··· 1673 1525 static void svc_sock_free(struct svc_xprt *xprt) 1674 1526 { 1675 1527 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1528 + struct socket *sock = svsk->sk_sock; 1676 1529 1677 - if (svsk->sk_sock->file) 1678 - sockfd_put(svsk->sk_sock); 1530 + tls_handshake_cancel(sock->sk); 1531 + if (sock->file) 1532 + sockfd_put(sock); 1679 1533 else 1680 - sock_release(svsk->sk_sock); 1534 + sock_release(sock); 1681 1535 kfree(svsk); 1682 1536 }
+2 -19
net/sunrpc/xprtrdma/svc_rdma.c
··· 212 212 { }, 213 213 }; 214 214 215 - static struct ctl_table svcrdma_table[] = { 216 - { 217 - .procname = "svc_rdma", 218 - .mode = 0555, 219 - .child = svcrdma_parm_table 220 - }, 221 - { }, 222 - }; 223 - 224 - static struct ctl_table svcrdma_root_table[] = { 225 - { 226 - .procname = "sunrpc", 227 - .mode = 0555, 228 - .child = svcrdma_table 229 - }, 230 - { }, 231 - }; 232 - 233 215 static void svc_rdma_proc_cleanup(void) 234 216 { 235 217 if (!svcrdma_table_header) ··· 245 263 if (rc) 246 264 goto out_err; 247 265 248 - svcrdma_table_header = register_sysctl_table(svcrdma_root_table); 266 + svcrdma_table_header = register_sysctl("sunrpc/svc_rdma", 267 + svcrdma_parm_table); 249 268 return 0; 250 269 251 270 out_err: