Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfsd-5.9' of git://git.linux-nfs.org/projects/cel/cel-2.6

Pull NFS server updates from Chuck Lever:
"Highlights:
- Support for user extended attributes on NFS (RFC 8276)
- Further reduce unnecessary NFSv4 delegation recalls

Notable fixes:
- Fix recent krb5p regression
- Address a few resource leaks and a rare NULL dereference

Other:
- De-duplicate RPC/RDMA error handling and other utility functions
- Replace storage and display of kernel memory addresses by tracepoints"

* tag 'nfsd-5.9' of git://git.linux-nfs.org/projects/cel/cel-2.6: (38 commits)
svcrdma: CM event handler clean up
svcrdma: Remove transport reference counting
svcrdma: Fix another Receive buffer leak
SUNRPC: Refresh the show_rqstp_flags() macro
nfsd: netns.h: delete a duplicated word
SUNRPC: Fix ("SUNRPC: Add "@len" parameter to gss_unwrap()")
nfsd: avoid a NULL dereference in __cld_pipe_upcall()
nfsd4: a client's own opens needn't prevent delegations
nfsd: Use seq_putc() in two functions
svcrdma: Display chunk completion ID when posting a rw_ctxt
svcrdma: Record send_ctxt completion ID in trace_svcrdma_post_send()
svcrdma: Introduce Send completion IDs
svcrdma: Record Receive completion ID in svc_rdma_decode_rqst
svcrdma: Introduce Receive completion IDs
svcrdma: Introduce infrastructure to support completion IDs
svcrdma: Add common XDR encoders for RDMA and Read segments
svcrdma: Add common XDR decoders for RDMA and Read segments
SUNRPC: Add helpers for decoding list discriminators symbolically
svcrdma: Remove declarations for functions long removed
svcrdma: Clean up trace_svcrdma_send_failed() tracepoint
...

+1816 -475
+3
fs/locks.c
··· 1808 1808 1809 1809 if (flags & FL_LAYOUT) 1810 1810 return 0; 1811 + if (flags & FL_DELEG) 1812 + /* We leave these checks to the caller. */ 1813 + return 0; 1811 1814 1812 1815 if (arg == F_RDLCK) 1813 1816 return inode_is_open_for_write(inode) ? -EAGAIN : 0;
+1 -1
fs/nfsd/netns.h
··· 171 171 unsigned int longest_chain_cachesize; 172 172 173 173 struct shrinker nfsd_reply_cache_shrinker; 174 - /* utsname taken from the the process that starts the server */ 174 + /* utsname taken from the process that starts the server */ 175 175 char nfsd_name[UNX_MAXNODENAME+1]; 176 176 }; 177 177
+2 -2
fs/nfsd/nfs4idmap.c
··· 168 168 ent->id); 169 169 if (test_bit(CACHE_VALID, &h->flags)) 170 170 seq_printf(m, " %s", ent->name); 171 - seq_printf(m, "\n"); 171 + seq_putc(m, '\n'); 172 172 return 0; 173 173 } 174 174 ··· 346 346 ent->name); 347 347 if (test_bit(CACHE_VALID, &h->flags)) 348 348 seq_printf(m, " %u", ent->id); 349 - seq_printf(m, "\n"); 349 + seq_putc(m, '\n'); 350 350 return 0; 351 351 } 352 352
+127 -1
fs/nfsd/nfs4proc.c
··· 566 566 union nfsd4_op_u *u) 567 567 { 568 568 struct nfsd4_access *access = &u->access; 569 + u32 access_full; 569 570 570 - if (access->ac_req_access & ~NFS3_ACCESS_FULL) 571 + access_full = NFS3_ACCESS_FULL; 572 + if (cstate->minorversion >= 2) 573 + access_full |= NFS4_ACCESS_XALIST | NFS4_ACCESS_XAREAD | 574 + NFS4_ACCESS_XAWRITE; 575 + 576 + if (access->ac_req_access & ~access_full) 571 577 return nfserr_inval; 572 578 573 579 access->ac_resp_access = access->ac_req_access; ··· 2097 2091 } 2098 2092 #endif /* CONFIG_NFSD_PNFS */ 2099 2093 2094 + static __be32 2095 + nfsd4_getxattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 2096 + union nfsd4_op_u *u) 2097 + { 2098 + struct nfsd4_getxattr *getxattr = &u->getxattr; 2099 + 2100 + return nfsd_getxattr(rqstp, &cstate->current_fh, 2101 + getxattr->getxa_name, &getxattr->getxa_buf, 2102 + &getxattr->getxa_len); 2103 + } 2104 + 2105 + static __be32 2106 + nfsd4_setxattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 2107 + union nfsd4_op_u *u) 2108 + { 2109 + struct nfsd4_setxattr *setxattr = &u->setxattr; 2110 + __be32 ret; 2111 + 2112 + if (opens_in_grace(SVC_NET(rqstp))) 2113 + return nfserr_grace; 2114 + 2115 + ret = nfsd_setxattr(rqstp, &cstate->current_fh, setxattr->setxa_name, 2116 + setxattr->setxa_buf, setxattr->setxa_len, 2117 + setxattr->setxa_flags); 2118 + 2119 + if (!ret) 2120 + set_change_info(&setxattr->setxa_cinfo, &cstate->current_fh); 2121 + 2122 + return ret; 2123 + } 2124 + 2125 + static __be32 2126 + nfsd4_listxattrs(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 2127 + union nfsd4_op_u *u) 2128 + { 2129 + /* 2130 + * Get the entire list, then copy out only the user attributes 2131 + * in the encode function. 2132 + */ 2133 + return nfsd_listxattr(rqstp, &cstate->current_fh, 2134 + &u->listxattrs.lsxa_buf, &u->listxattrs.lsxa_len); 2135 + } 2136 + 2137 + static __be32 2138 + nfsd4_removexattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 2139 + union nfsd4_op_u *u) 2140 + { 2141 + struct nfsd4_removexattr *removexattr = &u->removexattr; 2142 + __be32 ret; 2143 + 2144 + if (opens_in_grace(SVC_NET(rqstp))) 2145 + return nfserr_grace; 2146 + 2147 + ret = nfsd_removexattr(rqstp, &cstate->current_fh, 2148 + removexattr->rmxa_name); 2149 + 2150 + if (!ret) 2151 + set_change_info(&removexattr->rmxa_cinfo, &cstate->current_fh); 2152 + 2153 + return ret; 2154 + } 2155 + 2100 2156 /* 2101 2157 * NULL call. 2102 2158 */ ··· 2768 2700 return (op_encode_hdr_size + 3) * sizeof(__be32); 2769 2701 } 2770 2702 2703 + static inline u32 nfsd4_getxattr_rsize(struct svc_rqst *rqstp, 2704 + struct nfsd4_op *op) 2705 + { 2706 + u32 maxcount, rlen; 2707 + 2708 + maxcount = svc_max_payload(rqstp); 2709 + rlen = min_t(u32, XATTR_SIZE_MAX, maxcount); 2710 + 2711 + return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32); 2712 + } 2713 + 2714 + static inline u32 nfsd4_setxattr_rsize(struct svc_rqst *rqstp, 2715 + struct nfsd4_op *op) 2716 + { 2717 + return (op_encode_hdr_size + op_encode_change_info_maxsz) 2718 + * sizeof(__be32); 2719 + } 2720 + static inline u32 nfsd4_listxattrs_rsize(struct svc_rqst *rqstp, 2721 + struct nfsd4_op *op) 2722 + { 2723 + u32 maxcount, rlen; 2724 + 2725 + maxcount = svc_max_payload(rqstp); 2726 + rlen = min(op->u.listxattrs.lsxa_maxcount, maxcount); 2727 + 2728 + return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32); 2729 + } 2730 + 2731 + static inline u32 nfsd4_removexattr_rsize(struct svc_rqst *rqstp, 2732 + struct nfsd4_op *op) 2733 + { 2734 + return (op_encode_hdr_size + op_encode_change_info_maxsz) 2735 + * sizeof(__be32); 2736 + } 2737 + 2738 + 2771 2739 static const struct nfsd4_operation nfsd4_ops[] = { 2772 2740 [OP_ACCESS] = { 2773 2741 .op_func = nfsd4_access, ··· 3184 3080 .op_flags = OP_MODIFIES_SOMETHING, 3185 3081 .op_name = "OP_COPY_NOTIFY", 3186 3082 .op_rsize_bop = nfsd4_copy_notify_rsize, 3083 + }, 3084 + [OP_GETXATTR] = { 3085 + .op_func = nfsd4_getxattr, 3086 + .op_name = "OP_GETXATTR", 3087 + .op_rsize_bop = nfsd4_getxattr_rsize, 3088 + }, 3089 + [OP_SETXATTR] = { 3090 + .op_func = nfsd4_setxattr, 3091 + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, 3092 + .op_name = "OP_SETXATTR", 3093 + .op_rsize_bop = nfsd4_setxattr_rsize, 3094 + }, 3095 + [OP_LISTXATTRS] = { 3096 + .op_func = nfsd4_listxattrs, 3097 + .op_name = "OP_LISTXATTRS", 3098 + .op_rsize_bop = nfsd4_listxattrs_rsize, 3099 + }, 3100 + [OP_REMOVEXATTR] = { 3101 + .op_func = nfsd4_removexattr, 3102 + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, 3103 + .op_name = "OP_REMOVEXATTR", 3104 + .op_rsize_bop = nfsd4_removexattr_rsize, 3187 3105 }, 3188 3106 }; 3189 3107
+11 -13
fs/nfsd/nfs4recover.c
··· 747 747 }; 748 748 749 749 static int 750 - __cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg) 750 + __cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn) 751 751 { 752 752 int ret; 753 753 struct rpc_pipe_msg msg; 754 754 struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u); 755 - struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info, 756 - nfsd_net_id); 757 755 758 756 memset(&msg, 0, sizeof(msg)); 759 757 msg.data = cmsg; ··· 771 773 } 772 774 773 775 static int 774 - cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg) 776 + cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn) 775 777 { 776 778 int ret; 777 779 ··· 780 782 * upcalls queued. 781 783 */ 782 784 do { 783 - ret = __cld_pipe_upcall(pipe, cmsg); 785 + ret = __cld_pipe_upcall(pipe, cmsg, nn); 784 786 } while (ret == -EAGAIN); 785 787 786 788 return ret; ··· 1113 1115 memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, 1114 1116 clp->cl_name.len); 1115 1117 1116 - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); 1118 + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); 1117 1119 if (!ret) { 1118 1120 ret = cup->cu_u.cu_msg.cm_status; 1119 1121 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); ··· 1178 1180 } else 1179 1181 cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0; 1180 1182 1181 - ret = cld_pipe_upcall(cn->cn_pipe, cmsg); 1183 + ret = cld_pipe_upcall(cn->cn_pipe, cmsg, nn); 1182 1184 if (!ret) { 1183 1185 ret = cmsg->cm_status; 1184 1186 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); ··· 1216 1218 memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, 1217 1219 clp->cl_name.len); 1218 1220 1219 - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); 1221 + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); 1220 1222 if (!ret) { 1221 1223 ret = cup->cu_u.cu_msg.cm_status; 1222 1224 clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); ··· 1259 1261 memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, 1260 1262 clp->cl_name.len); 1261 1263 1262 - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); 1264 + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); 1263 1265 if (!ret) { 1264 1266 ret = cup->cu_u.cu_msg.cm_status; 1265 1267 set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); ··· 1402 1404 } 1403 1405 1404 1406 cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart; 1405 - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); 1407 + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); 1406 1408 if (!ret) 1407 1409 ret = cup->cu_u.cu_msg.cm_status; 1408 1410 ··· 1430 1432 1431 1433 cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; 1432 1434 cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time; 1433 - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); 1435 + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); 1434 1436 if (!ret) 1435 1437 ret = cup->cu_u.cu_msg.cm_status; 1436 1438 ··· 1458 1460 } 1459 1461 1460 1462 cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; 1461 - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); 1463 + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); 1462 1464 if (!ret) 1463 1465 ret = cup->cu_u.cu_msg.cm_status; 1464 1466 ··· 1522 1524 goto out_err; 1523 1525 } 1524 1526 cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion; 1525 - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); 1527 + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); 1526 1528 if (!ret) { 1527 1529 ret = cup->cu_u.cu_msg.cm_status; 1528 1530 if (ret)
+40 -14
fs/nfsd/nfs4state.c
··· 4940 4940 return fl; 4941 4941 } 4942 4942 4943 + static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, 4944 + struct nfs4_file *fp) 4945 + { 4946 + struct nfs4_clnt_odstate *co; 4947 + struct file *f = fp->fi_deleg_file->nf_file; 4948 + struct inode *ino = locks_inode(f); 4949 + int writes = atomic_read(&ino->i_writecount); 4950 + 4951 + if (fp->fi_fds[O_WRONLY]) 4952 + writes--; 4953 + if (fp->fi_fds[O_RDWR]) 4954 + writes--; 4955 + WARN_ON_ONCE(writes < 0); 4956 + if (writes > 0) 4957 + return -EAGAIN; 4958 + spin_lock(&fp->fi_lock); 4959 + list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) { 4960 + if (co->co_client != clp) { 4961 + spin_unlock(&fp->fi_lock); 4962 + return -EAGAIN; 4963 + } 4964 + } 4965 + spin_unlock(&fp->fi_lock); 4966 + return 0; 4967 + } 4968 + 4943 4969 static struct nfs4_delegation * 4944 4970 nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, 4945 4971 struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) ··· 4985 4959 4986 4960 nf = find_readable_file(fp); 4987 4961 if (!nf) { 4988 - /* We should always have a readable file here */ 4989 - WARN_ON_ONCE(1); 4990 - return ERR_PTR(-EBADF); 4962 + /* 4963 + * We probably could attempt another open and get a read 4964 + * delegation, but for now, don't bother until the 4965 + * client actually sends us one. 4966 + */ 4967 + return ERR_PTR(-EAGAIN); 4991 4968 } 4992 4969 spin_lock(&state_lock); 4993 4970 spin_lock(&fp->fi_lock); ··· 5020 4991 if (!fl) 5021 4992 goto out_clnt_odstate; 5022 4993 4994 + status = nfsd4_check_conflicting_opens(clp, fp); 4995 + if (status) { 4996 + locks_free_lock(fl); 4997 + goto out_clnt_odstate; 4998 + } 5023 4999 status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL); 5024 5000 if (fl) 5025 5001 locks_free_lock(fl); 5002 + if (status) 5003 + goto out_clnt_odstate; 5004 + status = nfsd4_check_conflicting_opens(clp, fp); 5026 5005 if (status) 5027 5006 goto out_clnt_odstate; 5028 5007 ··· 5113 5076 if (locks_in_grace(clp->net)) 5114 5077 goto out_no_deleg; 5115 5078 if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) 5116 - goto out_no_deleg; 5117 - /* 5118 - * Also, if the file was opened for write or 5119 - * create, there's a good chance the client's 5120 - * about to write to it, resulting in an 5121 - * immediate recall (since we don't support 5122 - * write delegations): 5123 - */ 5124 - if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 5125 - goto out_no_deleg; 5126 - if (open->op_create == NFS4_OPEN_CREATE) 5127 5079 goto out_no_deleg; 5128 5080 break; 5129 5081 default:
+501 -30
fs/nfsd/nfs4xdr.c
··· 41 41 #include <linux/pagemap.h> 42 42 #include <linux/sunrpc/svcauth_gss.h> 43 43 #include <linux/sunrpc/addr.h> 44 + #include <linux/xattr.h> 45 + #include <uapi/linux/xattr.h> 44 46 45 47 #include "idmap.h" 46 48 #include "acl.h" ··· 257 255 memcpy(p, buf, len); 258 256 p[len] = '\0'; 259 257 return p; 258 + } 259 + 260 + static __be32 261 + svcxdr_construct_vector(struct nfsd4_compoundargs *argp, struct kvec *head, 262 + struct page ***pagelist, u32 buflen) 263 + { 264 + int avail; 265 + int len; 266 + int pages; 267 + 268 + /* Sorry .. no magic macros for this.. * 269 + * READ_BUF(write->wr_buflen); 270 + * SAVEMEM(write->wr_buf, write->wr_buflen); 271 + */ 272 + avail = (char *)argp->end - (char *)argp->p; 273 + if (avail + argp->pagelen < buflen) { 274 + dprintk("NFSD: xdr error (%s:%d)\n", 275 + __FILE__, __LINE__); 276 + return nfserr_bad_xdr; 277 + } 278 + head->iov_base = argp->p; 279 + head->iov_len = avail; 280 + *pagelist = argp->pagelist; 281 + 282 + len = XDR_QUADLEN(buflen) << 2; 283 + if (len >= avail) { 284 + len -= avail; 285 + 286 + pages = len >> PAGE_SHIFT; 287 + argp->pagelist += pages; 288 + argp->pagelen -= pages * PAGE_SIZE; 289 + len -= pages * PAGE_SIZE; 290 + 291 + next_decode_page(argp); 292 + } 293 + argp->p += XDR_QUADLEN(len); 294 + 295 + return 0; 260 296 } 261 297 262 298 /** ··· 1305 1265 static __be32 1306 1266 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) 1307 1267 { 1308 - int avail; 1309 - int len; 1310 1268 DECODE_HEAD; 1311 1269 1312 1270 status = nfsd4_decode_stateid(argp, &write->wr_stateid); ··· 1317 1279 goto xdr_error; 1318 1280 write->wr_buflen = be32_to_cpup(p++); 1319 1281 1320 - /* Sorry .. no magic macros for this.. * 1321 - * READ_BUF(write->wr_buflen); 1322 - * SAVEMEM(write->wr_buf, write->wr_buflen); 1323 - */ 1324 - avail = (char*)argp->end - (char*)argp->p; 1325 - if (avail + argp->pagelen < write->wr_buflen) { 1326 - dprintk("NFSD: xdr error (%s:%d)\n", 1327 - __FILE__, __LINE__); 1328 - goto xdr_error; 1329 - } 1330 - write->wr_head.iov_base = p; 1331 - write->wr_head.iov_len = avail; 1332 - write->wr_pagelist = argp->pagelist; 1333 - 1334 - len = XDR_QUADLEN(write->wr_buflen) << 2; 1335 - if (len >= avail) { 1336 - int pages; 1337 - 1338 - len -= avail; 1339 - 1340 - pages = len >> PAGE_SHIFT; 1341 - argp->pagelist += pages; 1342 - argp->pagelen -= pages * PAGE_SIZE; 1343 - len -= pages * PAGE_SIZE; 1344 - 1345 - next_decode_page(argp); 1346 - } 1347 - argp->p += XDR_QUADLEN(len); 1282 + status = svcxdr_construct_vector(argp, &write->wr_head, 1283 + &write->wr_pagelist, write->wr_buflen); 1284 + if (status) 1285 + return status; 1348 1286 1349 1287 DECODE_TAIL; 1350 1288 } ··· 1879 1865 DECODE_TAIL; 1880 1866 } 1881 1867 1868 + /* 1869 + * XDR data that is more than PAGE_SIZE in size is normally part of a 1870 + * read or write. However, the size of extended attributes is limited 1871 + * by the maximum request size, and then further limited by the underlying 1872 + * filesystem limits. This can exceed PAGE_SIZE (currently, XATTR_SIZE_MAX 1873 + * is 64k). Since there is no kvec- or page-based interface to xattrs, 1874 + * and we're not dealing with contiguous pages, we need to do some copying. 1875 + */ 1876 + 1877 + /* 1878 + * Decode data into buffer. Uses head and pages constructed by 1879 + * svcxdr_construct_vector. 1880 + */ 1881 + static __be32 1882 + nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct kvec *head, 1883 + struct page **pages, char **bufp, u32 buflen) 1884 + { 1885 + char *tmp, *dp; 1886 + u32 len; 1887 + 1888 + if (buflen <= head->iov_len) { 1889 + /* 1890 + * We're in luck, the head has enough space. Just return 1891 + * the head, no need for copying. 1892 + */ 1893 + *bufp = head->iov_base; 1894 + return 0; 1895 + } 1896 + 1897 + tmp = svcxdr_tmpalloc(argp, buflen); 1898 + if (tmp == NULL) 1899 + return nfserr_jukebox; 1900 + 1901 + dp = tmp; 1902 + memcpy(dp, head->iov_base, head->iov_len); 1903 + buflen -= head->iov_len; 1904 + dp += head->iov_len; 1905 + 1906 + while (buflen > 0) { 1907 + len = min_t(u32, buflen, PAGE_SIZE); 1908 + memcpy(dp, page_address(*pages), len); 1909 + 1910 + buflen -= len; 1911 + dp += len; 1912 + pages++; 1913 + } 1914 + 1915 + *bufp = tmp; 1916 + return 0; 1917 + } 1918 + 1919 + /* 1920 + * Get a user extended attribute name from the XDR buffer. 1921 + * It will not have the "user." prefix, so prepend it. 1922 + * Lastly, check for nul characters in the name. 1923 + */ 1924 + static __be32 1925 + nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) 1926 + { 1927 + DECODE_HEAD; 1928 + char *name, *sp, *dp; 1929 + u32 namelen, cnt; 1930 + 1931 + READ_BUF(4); 1932 + namelen = be32_to_cpup(p++); 1933 + 1934 + if (namelen > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) 1935 + return nfserr_nametoolong; 1936 + 1937 + if (namelen == 0) 1938 + goto xdr_error; 1939 + 1940 + READ_BUF(namelen); 1941 + 1942 + name = svcxdr_tmpalloc(argp, namelen + XATTR_USER_PREFIX_LEN + 1); 1943 + if (!name) 1944 + return nfserr_jukebox; 1945 + 1946 + memcpy(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); 1947 + 1948 + /* 1949 + * Copy the extended attribute name over while checking for 0 1950 + * characters. 1951 + */ 1952 + sp = (char *)p; 1953 + dp = name + XATTR_USER_PREFIX_LEN; 1954 + cnt = namelen; 1955 + 1956 + while (cnt-- > 0) { 1957 + if (*sp == '\0') 1958 + goto xdr_error; 1959 + *dp++ = *sp++; 1960 + } 1961 + *dp = '\0'; 1962 + 1963 + *namep = name; 1964 + 1965 + DECODE_TAIL; 1966 + } 1967 + 1968 + /* 1969 + * A GETXATTR op request comes without a length specifier. We just set the 1970 + * maximum length for the reply based on XATTR_SIZE_MAX and the maximum 1971 + * channel reply size. nfsd_getxattr will probe the length of the xattr, 1972 + * check it against getxa_len, and allocate + return the value. 1973 + */ 1974 + static __be32 1975 + nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, 1976 + struct nfsd4_getxattr *getxattr) 1977 + { 1978 + __be32 status; 1979 + u32 maxcount; 1980 + 1981 + status = nfsd4_decode_xattr_name(argp, &getxattr->getxa_name); 1982 + if (status) 1983 + return status; 1984 + 1985 + maxcount = svc_max_payload(argp->rqstp); 1986 + maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); 1987 + 1988 + getxattr->getxa_len = maxcount; 1989 + 1990 + return status; 1991 + } 1992 + 1993 + static __be32 1994 + nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, 1995 + struct nfsd4_setxattr *setxattr) 1996 + { 1997 + DECODE_HEAD; 1998 + u32 flags, maxcount, size; 1999 + struct kvec head; 2000 + struct page **pagelist; 2001 + 2002 + READ_BUF(4); 2003 + flags = be32_to_cpup(p++); 2004 + 2005 + if (flags > SETXATTR4_REPLACE) 2006 + return nfserr_inval; 2007 + setxattr->setxa_flags = flags; 2008 + 2009 + status = nfsd4_decode_xattr_name(argp, &setxattr->setxa_name); 2010 + if (status) 2011 + return status; 2012 + 2013 + maxcount = svc_max_payload(argp->rqstp); 2014 + maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); 2015 + 2016 + READ_BUF(4); 2017 + size = be32_to_cpup(p++); 2018 + if (size > maxcount) 2019 + return nfserr_xattr2big; 2020 + 2021 + setxattr->setxa_len = size; 2022 + if (size > 0) { 2023 + status = svcxdr_construct_vector(argp, &head, &pagelist, size); 2024 + if (status) 2025 + return status; 2026 + 2027 + status = nfsd4_vbuf_from_vector(argp, &head, pagelist, 2028 + &setxattr->setxa_buf, size); 2029 + } 2030 + 2031 + DECODE_TAIL; 2032 + } 2033 + 2034 + static __be32 2035 + nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, 2036 + struct nfsd4_listxattrs *listxattrs) 2037 + { 2038 + DECODE_HEAD; 2039 + u32 maxcount; 2040 + 2041 + READ_BUF(12); 2042 + p = xdr_decode_hyper(p, &listxattrs->lsxa_cookie); 2043 + 2044 + /* 2045 + * If the cookie is too large to have even one user.x attribute 2046 + * plus trailing '\0' left in a maximum size buffer, it's invalid. 2047 + */ 2048 + if (listxattrs->lsxa_cookie >= 2049 + (XATTR_LIST_MAX / (XATTR_USER_PREFIX_LEN + 2))) 2050 + return nfserr_badcookie; 2051 + 2052 + maxcount = be32_to_cpup(p++); 2053 + if (maxcount < 8) 2054 + /* Always need at least 2 words (length and one character) */ 2055 + return nfserr_inval; 2056 + 2057 + maxcount = min(maxcount, svc_max_payload(argp->rqstp)); 2058 + listxattrs->lsxa_maxcount = maxcount; 2059 + 2060 + DECODE_TAIL; 2061 + } 2062 + 2063 + static __be32 2064 + nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, 2065 + struct nfsd4_removexattr *removexattr) 2066 + { 2067 + return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); 2068 + } 2069 + 1882 2070 static __be32 1883 2071 nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) 1884 2072 { ··· 2177 1961 [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, 2178 1962 [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, 2179 1963 [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, 1964 + /* RFC 8276 extended atributes operations */ 1965 + [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr, 1966 + [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr, 1967 + [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs, 1968 + [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr, 2180 1969 }; 2181 1970 2182 1971 static inline bool ··· 3212 2991 goto out; 3213 2992 } 3214 2993 #endif 2994 + 2995 + if (bmval2 & FATTR4_WORD2_XATTR_SUPPORT) { 2996 + p = xdr_reserve_space(xdr, 4); 2997 + if (!p) 2998 + goto out_resource; 2999 + err = xattr_supported_namespace(d_inode(dentry), 3000 + XATTR_USER_PREFIX); 3001 + *p++ = cpu_to_be32(err == 0); 3002 + } 3215 3003 3216 3004 attrlen = htonl(xdr->buf->len - attrlen_offset - 4); 3217 3005 write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); ··· 4676 4446 return nfserr; 4677 4447 } 4678 4448 4449 + /* 4450 + * Encode kmalloc-ed buffer in to XDR stream. 4451 + */ 4452 + static int 4453 + nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen) 4454 + { 4455 + u32 cplen; 4456 + __be32 *p; 4457 + 4458 + cplen = min_t(unsigned long, buflen, 4459 + ((void *)xdr->end - (void *)xdr->p)); 4460 + p = xdr_reserve_space(xdr, cplen); 4461 + if (!p) 4462 + return nfserr_resource; 4463 + 4464 + memcpy(p, buf, cplen); 4465 + buf += cplen; 4466 + buflen -= cplen; 4467 + 4468 + while (buflen) { 4469 + cplen = min_t(u32, buflen, PAGE_SIZE); 4470 + p = xdr_reserve_space(xdr, cplen); 4471 + if (!p) 4472 + return nfserr_resource; 4473 + 4474 + memcpy(p, buf, cplen); 4475 + 4476 + if (cplen < PAGE_SIZE) { 4477 + /* 4478 + * We're done, with a length that wasn't page 4479 + * aligned, so possibly not word aligned. Pad 4480 + * any trailing bytes with 0. 4481 + */ 4482 + xdr_encode_opaque_fixed(p, NULL, cplen); 4483 + break; 4484 + } 4485 + 4486 + buflen -= PAGE_SIZE; 4487 + buf += PAGE_SIZE; 4488 + } 4489 + 4490 + return 0; 4491 + } 4492 + 4493 + static __be32 4494 + nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, 4495 + struct nfsd4_getxattr *getxattr) 4496 + { 4497 + struct xdr_stream *xdr = &resp->xdr; 4498 + __be32 *p, err; 4499 + 4500 + p = xdr_reserve_space(xdr, 4); 4501 + if (!p) 4502 + return nfserr_resource; 4503 + 4504 + *p = cpu_to_be32(getxattr->getxa_len); 4505 + 4506 + if (getxattr->getxa_len == 0) 4507 + return 0; 4508 + 4509 + err = nfsd4_vbuf_to_stream(xdr, getxattr->getxa_buf, 4510 + getxattr->getxa_len); 4511 + 4512 + kvfree(getxattr->getxa_buf); 4513 + 4514 + return err; 4515 + } 4516 + 4517 + static __be32 4518 + nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, 4519 + struct nfsd4_setxattr *setxattr) 4520 + { 4521 + struct xdr_stream *xdr = &resp->xdr; 4522 + __be32 *p; 4523 + 4524 + p = xdr_reserve_space(xdr, 20); 4525 + if (!p) 4526 + return nfserr_resource; 4527 + 4528 + encode_cinfo(p, &setxattr->setxa_cinfo); 4529 + 4530 + return 0; 4531 + } 4532 + 4533 + /* 4534 + * See if there are cookie values that can be rejected outright. 4535 + */ 4536 + static __be32 4537 + nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, 4538 + u32 *offsetp) 4539 + { 4540 + u64 cookie = listxattrs->lsxa_cookie; 4541 + 4542 + /* 4543 + * If the cookie is larger than the maximum number we can fit 4544 + * in either the buffer we just got back from vfs_listxattr, or, 4545 + * XDR-encoded, in the return buffer, it's invalid. 4546 + */ 4547 + if (cookie > (listxattrs->lsxa_len) / (XATTR_USER_PREFIX_LEN + 2)) 4548 + return nfserr_badcookie; 4549 + 4550 + if (cookie > (listxattrs->lsxa_maxcount / 4551 + (XDR_QUADLEN(XATTR_USER_PREFIX_LEN + 2) + 4))) 4552 + return nfserr_badcookie; 4553 + 4554 + *offsetp = (u32)cookie; 4555 + return 0; 4556 + } 4557 + 4558 + static __be32 4559 + nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, 4560 + struct nfsd4_listxattrs *listxattrs) 4561 + { 4562 + struct xdr_stream *xdr = &resp->xdr; 4563 + u32 cookie_offset, count_offset, eof; 4564 + u32 left, xdrleft, slen, count; 4565 + u32 xdrlen, offset; 4566 + u64 cookie; 4567 + char *sp; 4568 + __be32 status; 4569 + __be32 *p; 4570 + u32 nuser; 4571 + 4572 + eof = 1; 4573 + 4574 + status = nfsd4_listxattr_validate_cookie(listxattrs, &offset); 4575 + if (status) 4576 + goto out; 4577 + 4578 + /* 4579 + * Reserve space for the cookie and the name array count. Record 4580 + * the offsets to save them later. 4581 + */ 4582 + cookie_offset = xdr->buf->len; 4583 + count_offset = cookie_offset + 8; 4584 + p = xdr_reserve_space(xdr, 12); 4585 + if (!p) { 4586 + status = nfserr_resource; 4587 + goto out; 4588 + } 4589 + 4590 + count = 0; 4591 + left = listxattrs->lsxa_len; 4592 + sp = listxattrs->lsxa_buf; 4593 + nuser = 0; 4594 + 4595 + xdrleft = listxattrs->lsxa_maxcount; 4596 + 4597 + while (left > 0 && xdrleft > 0) { 4598 + slen = strlen(sp); 4599 + 4600 + /* 4601 + * Check if this a user. attribute, skip it if not. 4602 + */ 4603 + if (strncmp(sp, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) 4604 + goto contloop; 4605 + 4606 + slen -= XATTR_USER_PREFIX_LEN; 4607 + xdrlen = 4 + ((slen + 3) & ~3); 4608 + if (xdrlen > xdrleft) { 4609 + if (count == 0) { 4610 + /* 4611 + * Can't even fit the first attribute name. 4612 + */ 4613 + status = nfserr_toosmall; 4614 + goto out; 4615 + } 4616 + eof = 0; 4617 + goto wreof; 4618 + } 4619 + 4620 + left -= XATTR_USER_PREFIX_LEN; 4621 + sp += XATTR_USER_PREFIX_LEN; 4622 + if (nuser++ < offset) 4623 + goto contloop; 4624 + 4625 + 4626 + p = xdr_reserve_space(xdr, xdrlen); 4627 + if (!p) { 4628 + status = nfserr_resource; 4629 + goto out; 4630 + } 4631 + 4632 + p = xdr_encode_opaque(p, sp, slen); 4633 + 4634 + xdrleft -= xdrlen; 4635 + count++; 4636 + contloop: 4637 + sp += slen + 1; 4638 + left -= slen + 1; 4639 + } 4640 + 4641 + /* 4642 + * If there were user attributes to copy, but we didn't copy 4643 + * any, the offset was too large (e.g. the cookie was invalid). 4644 + */ 4645 + if (nuser > 0 && count == 0) { 4646 + status = nfserr_badcookie; 4647 + goto out; 4648 + } 4649 + 4650 + wreof: 4651 + p = xdr_reserve_space(xdr, 4); 4652 + if (!p) { 4653 + status = nfserr_resource; 4654 + goto out; 4655 + } 4656 + *p = cpu_to_be32(eof); 4657 + 4658 + cookie = offset + count; 4659 + 4660 + write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &cookie, 8); 4661 + count = htonl(count); 4662 + write_bytes_to_xdr_buf(xdr->buf, count_offset, &count, 4); 4663 + out: 4664 + if (listxattrs->lsxa_len) 4665 + kvfree(listxattrs->lsxa_buf); 4666 + return status; 4667 + } 4668 + 4669 + static __be32 4670 + nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, 4671 + struct nfsd4_removexattr *removexattr) 4672 + { 4673 + struct xdr_stream *xdr = &resp->xdr; 4674 + __be32 *p; 4675 + 4676 + p = xdr_reserve_space(xdr, 20); 4677 + if (!p) 4678 + return nfserr_resource; 4679 + 4680 + p = encode_cinfo(p, &removexattr->rmxa_cinfo); 4681 + return 0; 4682 + } 4683 + 4679 4684 typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); 4680 4685 4681 4686 /* ··· 5000 4535 [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, 5001 4536 [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, 5002 4537 [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, 4538 + 4539 + /* RFC 8276 extended atributes operations */ 4540 + [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr, 4541 + [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr, 4542 + [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs, 4543 + [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr, 5003 4544 }; 5004 4545 5005 4546 /*
+4 -1
fs/nfsd/nfsd.h
··· 286 286 #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) 287 287 #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) 288 288 #define nfserr_file_open cpu_to_be32(NFS4ERR_FILE_OPEN) 289 + #define nfserr_xattr2big cpu_to_be32(NFS4ERR_XATTR2BIG) 290 + #define nfserr_noxattr cpu_to_be32(NFS4ERR_NOXATTR) 289 291 290 292 /* error codes for internal use */ 291 293 /* if a request fails due to kmalloc failure, it gets dropped. ··· 389 387 (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ 390 388 FATTR4_WORD2_CHANGE_ATTR_TYPE | \ 391 389 FATTR4_WORD2_MODE_UMASK | \ 392 - NFSD4_2_SECURITY_ATTRS) 390 + NFSD4_2_SECURITY_ATTRS | \ 391 + FATTR4_WORD2_XATTR_SUPPORT) 393 392 394 393 extern const u32 nfsd_suppattrs[3][3]; 395 394
+239
fs/nfsd/vfs.c
··· 612 612 { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC }, 613 613 { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE }, 614 614 615 + #ifdef CONFIG_NFSD_V4 616 + { NFS4_ACCESS_XAREAD, NFSD_MAY_READ }, 617 + { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE }, 618 + { NFS4_ACCESS_XALIST, NFSD_MAY_READ }, 619 + #endif 620 + 615 621 { 0, 0 } 616 622 }; 617 623 ··· 627 621 { NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC}, 628 622 { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE }, 629 623 { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE }, 624 + 625 + #ifdef CONFIG_NFSD_V4 626 + { NFS4_ACCESS_XAREAD, NFSD_MAY_READ }, 627 + { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE }, 628 + { NFS4_ACCESS_XALIST, NFSD_MAY_READ }, 629 + #endif 630 630 631 631 { 0, 0 } 632 632 }; ··· 2076 2064 { 2077 2065 return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY; 2078 2066 } 2067 + 2068 + #ifdef CONFIG_NFSD_V4 2069 + /* 2070 + * Helper function to translate error numbers. In the case of xattr operations, 2071 + * some error codes need to be translated outside of the standard translations. 2072 + * 2073 + * ENODATA needs to be translated to nfserr_noxattr. 2074 + * E2BIG to nfserr_xattr2big. 2075 + * 2076 + * Additionally, vfs_listxattr can return -ERANGE. This means that the 2077 + * file has too many extended attributes to retrieve inside an 2078 + * XATTR_LIST_MAX sized buffer. This is a bug in the xattr implementation: 2079 + * filesystems will allow the adding of extended attributes until they hit 2080 + * their own internal limit. This limit may be larger than XATTR_LIST_MAX. 2081 + * So, at that point, the attributes are present and valid, but can't 2082 + * be retrieved using listxattr, since the upper level xattr code enforces 2083 + * the XATTR_LIST_MAX limit. 2084 + * 2085 + * This bug means that we need to deal with listxattr returning -ERANGE. The 2086 + * best mapping is to return TOOSMALL. 2087 + */ 2088 + static __be32 2089 + nfsd_xattr_errno(int err) 2090 + { 2091 + switch (err) { 2092 + case -ENODATA: 2093 + return nfserr_noxattr; 2094 + case -E2BIG: 2095 + return nfserr_xattr2big; 2096 + case -ERANGE: 2097 + return nfserr_toosmall; 2098 + } 2099 + return nfserrno(err); 2100 + } 2101 + 2102 + /* 2103 + * Retrieve the specified user extended attribute. To avoid always 2104 + * having to allocate the maximum size (since we are not getting 2105 + * a maximum size from the RPC), do a probe + alloc. Hold a reader 2106 + * lock on i_rwsem to prevent the extended attribute from changing 2107 + * size while we're doing this. 2108 + */ 2109 + __be32 2110 + nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, 2111 + void **bufp, int *lenp) 2112 + { 2113 + ssize_t len; 2114 + __be32 err; 2115 + char *buf; 2116 + struct inode *inode; 2117 + struct dentry *dentry; 2118 + 2119 + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ); 2120 + if (err) 2121 + return err; 2122 + 2123 + err = nfs_ok; 2124 + dentry = fhp->fh_dentry; 2125 + inode = d_inode(dentry); 2126 + 2127 + inode_lock_shared(inode); 2128 + 2129 + len = vfs_getxattr(dentry, name, NULL, 0); 2130 + 2131 + /* 2132 + * Zero-length attribute, just return. 2133 + */ 2134 + if (len == 0) { 2135 + *bufp = NULL; 2136 + *lenp = 0; 2137 + goto out; 2138 + } 2139 + 2140 + if (len < 0) { 2141 + err = nfsd_xattr_errno(len); 2142 + goto out; 2143 + } 2144 + 2145 + if (len > *lenp) { 2146 + err = nfserr_toosmall; 2147 + goto out; 2148 + } 2149 + 2150 + buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS); 2151 + if (buf == NULL) { 2152 + err = nfserr_jukebox; 2153 + goto out; 2154 + } 2155 + 2156 + len = vfs_getxattr(dentry, name, buf, len); 2157 + if (len <= 0) { 2158 + kvfree(buf); 2159 + buf = NULL; 2160 + err = nfsd_xattr_errno(len); 2161 + } 2162 + 2163 + *lenp = len; 2164 + *bufp = buf; 2165 + 2166 + out: 2167 + inode_unlock_shared(inode); 2168 + 2169 + return err; 2170 + } 2171 + 2172 + /* 2173 + * Retrieve the xattr names. Since we can't know how many are 2174 + * user extended attributes, we must get all attributes here, 2175 + * and have the XDR encode filter out the "user." ones. 2176 + * 2177 + * While this could always just allocate an XATTR_LIST_MAX 2178 + * buffer, that's a waste, so do a probe + allocate. To 2179 + * avoid any changes between the probe and allocate, wrap 2180 + * this in inode_lock. 2181 + */ 2182 + __be32 2183 + nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp, 2184 + int *lenp) 2185 + { 2186 + ssize_t len; 2187 + __be32 err; 2188 + char *buf; 2189 + struct inode *inode; 2190 + struct dentry *dentry; 2191 + 2192 + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ); 2193 + if (err) 2194 + return err; 2195 + 2196 + dentry = fhp->fh_dentry; 2197 + inode = d_inode(dentry); 2198 + *lenp = 0; 2199 + 2200 + inode_lock_shared(inode); 2201 + 2202 + len = vfs_listxattr(dentry, NULL, 0); 2203 + if (len <= 0) { 2204 + err = nfsd_xattr_errno(len); 2205 + goto out; 2206 + } 2207 + 2208 + if (len > XATTR_LIST_MAX) { 2209 + err = nfserr_xattr2big; 2210 + goto out; 2211 + } 2212 + 2213 + /* 2214 + * We're holding i_rwsem - use GFP_NOFS. 2215 + */ 2216 + buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS); 2217 + if (buf == NULL) { 2218 + err = nfserr_jukebox; 2219 + goto out; 2220 + } 2221 + 2222 + len = vfs_listxattr(dentry, buf, len); 2223 + if (len <= 0) { 2224 + kvfree(buf); 2225 + err = nfsd_xattr_errno(len); 2226 + goto out; 2227 + } 2228 + 2229 + *lenp = len; 2230 + *bufp = buf; 2231 + 2232 + err = nfs_ok; 2233 + out: 2234 + inode_unlock_shared(inode); 2235 + 2236 + return err; 2237 + } 2238 + 2239 + /* 2240 + * Removexattr and setxattr need to call fh_lock to both lock the inode 2241 + * and set the change attribute. Since the top-level vfs_removexattr 2242 + * and vfs_setxattr calls already do their own inode_lock calls, call 2243 + * the _locked variant. Pass in a NULL pointer for delegated_inode, 2244 + * and let the client deal with NFS4ERR_DELAY (same as with e.g. 2245 + * setattr and remove). 2246 + */ 2247 + __be32 2248 + nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) 2249 + { 2250 + int err, ret; 2251 + 2252 + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE); 2253 + if (err) 2254 + return err; 2255 + 2256 + ret = fh_want_write(fhp); 2257 + if (ret) 2258 + return nfserrno(ret); 2259 + 2260 + fh_lock(fhp); 2261 + 2262 + ret = __vfs_removexattr_locked(fhp->fh_dentry, name, NULL); 2263 + 2264 + fh_unlock(fhp); 2265 + fh_drop_write(fhp); 2266 + 2267 + return nfsd_xattr_errno(ret); 2268 + } 2269 + 2270 + __be32 2271 + nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, 2272 + void *buf, u32 len, u32 flags) 2273 + { 2274 + int err, ret; 2275 + 2276 + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE); 2277 + if (err) 2278 + return err; 2279 + 2280 + ret = fh_want_write(fhp); 2281 + if (ret) 2282 + return nfserrno(ret); 2283 + fh_lock(fhp); 2284 + 2285 + ret = __vfs_setxattr_locked(fhp->fh_dentry, name, buf, len, flags, 2286 + NULL); 2287 + 2288 + fh_unlock(fhp); 2289 + fh_drop_write(fhp); 2290 + 2291 + return nfsd_xattr_errno(ret); 2292 + } 2293 + #endif 2079 2294 2080 2295 /* 2081 2296 * Check for a user's access permissions to this inode.
+10
fs/nfsd/vfs.h
··· 76 76 __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, 77 77 loff_t, unsigned long, __be32 *verf); 78 78 #endif /* CONFIG_NFSD_V3 */ 79 + #ifdef CONFIG_NFSD_V4 80 + __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, 81 + char *name, void **bufp, int *lenp); 82 + __be32 nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, 83 + char **bufp, int *lenp); 84 + __be32 nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, 85 + char *name); 86 + __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, 87 + char *name, void *buf, u32 len, u32 flags); 88 + #endif 79 89 int nfsd_open_break_lease(struct inode *, int); 80 90 __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, 81 91 int, struct file **);
+31
fs/nfsd/xdr4.h
··· 224 224 bool no_verify; /* represents foreigh fh */ 225 225 }; 226 226 227 + struct nfsd4_getxattr { 228 + char *getxa_name; /* request */ 229 + u32 getxa_len; /* request */ 230 + void *getxa_buf; 231 + }; 232 + 233 + struct nfsd4_setxattr { 234 + u32 setxa_flags; /* request */ 235 + char *setxa_name; /* request */ 236 + char *setxa_buf; /* request */ 237 + u32 setxa_len; /* request */ 238 + struct nfsd4_change_info setxa_cinfo; /* response */ 239 + }; 240 + 241 + struct nfsd4_removexattr { 242 + char *rmxa_name; /* request */ 243 + struct nfsd4_change_info rmxa_cinfo; /* response */ 244 + }; 245 + 246 + struct nfsd4_listxattrs { 247 + u64 lsxa_cookie; /* request */ 248 + u32 lsxa_maxcount; /* request */ 249 + char *lsxa_buf; /* unfiltered buffer (reply) */ 250 + u32 lsxa_len; /* unfiltered len (reply) */ 251 + }; 252 + 227 253 struct nfsd4_open { 228 254 u32 op_claim_type; /* request */ 229 255 struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */ ··· 675 649 struct nfsd4_offload_status offload_status; 676 650 struct nfsd4_copy_notify copy_notify; 677 651 struct nfsd4_seek seek; 652 + 653 + struct nfsd4_getxattr getxattr; 654 + struct nfsd4_setxattr setxattr; 655 + struct nfsd4_listxattrs listxattrs; 656 + struct nfsd4_removexattr removexattr; 678 657 } u; 679 658 struct nfs4_replay * replay; 680 659 };
+104 -7
fs/xattr.c
··· 134 134 return inode_permission(inode, mask); 135 135 } 136 136 137 + /* 138 + * Look for any handler that deals with the specified namespace. 139 + */ 140 + int 141 + xattr_supported_namespace(struct inode *inode, const char *prefix) 142 + { 143 + const struct xattr_handler **handlers = inode->i_sb->s_xattr; 144 + const struct xattr_handler *handler; 145 + size_t preflen; 146 + 147 + if (!(inode->i_opflags & IOP_XATTR)) { 148 + if (unlikely(is_bad_inode(inode))) 149 + return -EIO; 150 + return -EOPNOTSUPP; 151 + } 152 + 153 + preflen = strlen(prefix); 154 + 155 + for_each_xattr_handler(handlers, handler) { 156 + if (!strncmp(xattr_prefix(handler), prefix, preflen)) 157 + return 0; 158 + } 159 + 160 + return -EOPNOTSUPP; 161 + } 162 + EXPORT_SYMBOL(xattr_supported_namespace); 163 + 137 164 int 138 165 __vfs_setxattr(struct dentry *dentry, struct inode *inode, const char *name, 139 166 const void *value, size_t size, int flags) ··· 231 204 return error; 232 205 } 233 206 234 - 207 + /** 208 + * __vfs_setxattr_locked: set an extended attribute while holding the inode 209 + * lock 210 + * 211 + * @dentry - object to perform setxattr on 212 + * @name - xattr name to set 213 + * @value - value to set @name to 214 + * @size - size of @value 215 + * @flags - flags to pass into filesystem operations 216 + * @delegated_inode - on return, will contain an inode pointer that 217 + * a delegation was broken on, NULL if none. 218 + */ 235 219 int 236 - vfs_setxattr(struct dentry *dentry, const char *name, const void *value, 237 - size_t size, int flags) 220 + __vfs_setxattr_locked(struct dentry *dentry, const char *name, 221 + const void *value, size_t size, int flags, 222 + struct inode **delegated_inode) 238 223 { 239 224 struct inode *inode = dentry->d_inode; 240 225 int error; ··· 255 216 if (error) 256 217 return error; 257 218 258 - inode_lock(inode); 259 219 error = security_inode_setxattr(dentry, name, value, size, flags); 220 + if (error) 221 + goto out; 222 + 223 + error = try_break_deleg(inode, delegated_inode); 260 224 if (error) 261 225 goto out; 262 226 263 227 error = __vfs_setxattr_noperm(dentry, name, value, size, flags); 264 228 265 229 out: 230 + return error; 231 + } 232 + EXPORT_SYMBOL_GPL(__vfs_setxattr_locked); 233 + 234 + int 235 + vfs_setxattr(struct dentry *dentry, const char *name, const void *value, 236 + size_t size, int flags) 237 + { 238 + struct inode *inode = dentry->d_inode; 239 + struct inode *delegated_inode = NULL; 240 + int error; 241 + 242 + retry_deleg: 243 + inode_lock(inode); 244 + error = __vfs_setxattr_locked(dentry, name, value, size, flags, 245 + &delegated_inode); 266 246 inode_unlock(inode); 247 + 248 + if (delegated_inode) { 249 + error = break_deleg_wait(&delegated_inode); 250 + if (!error) 251 + goto retry_deleg; 252 + } 267 253 return error; 268 254 } 269 255 EXPORT_SYMBOL_GPL(vfs_setxattr); ··· 442 378 } 443 379 EXPORT_SYMBOL(__vfs_removexattr); 444 380 381 + /** 382 + * __vfs_removexattr_locked: set an extended attribute while holding the inode 383 + * lock 384 + * 385 + * @dentry - object to perform setxattr on 386 + * @name - name of xattr to remove 387 + * @delegated_inode - on return, will contain an inode pointer that 388 + * a delegation was broken on, NULL if none. 389 + */ 445 390 int 446 - vfs_removexattr(struct dentry *dentry, const char *name) 391 + __vfs_removexattr_locked(struct dentry *dentry, const char *name, 392 + struct inode **delegated_inode) 447 393 { 448 394 struct inode *inode = dentry->d_inode; 449 395 int error; ··· 462 388 if (error) 463 389 return error; 464 390 465 - inode_lock(inode); 466 391 error = security_inode_removexattr(dentry, name); 392 + if (error) 393 + goto out; 394 + 395 + error = try_break_deleg(inode, delegated_inode); 467 396 if (error) 468 397 goto out; 469 398 ··· 478 401 } 479 402 480 403 out: 404 + return error; 405 + } 406 + EXPORT_SYMBOL_GPL(__vfs_removexattr_locked); 407 + 408 + int 409 + vfs_removexattr(struct dentry *dentry, const char *name) 410 + { 411 + struct inode *inode = dentry->d_inode; 412 + struct inode *delegated_inode = NULL; 413 + int error; 414 + 415 + retry_deleg: 416 + inode_lock(inode); 417 + error = __vfs_removexattr_locked(dentry, name, &delegated_inode); 481 418 inode_unlock(inode); 419 + 420 + if (delegated_inode) { 421 + error = break_deleg_wait(&delegated_inode); 422 + if (!error) 423 + goto retry_deleg; 424 + } 425 + 482 426 return error; 483 427 } 484 428 EXPORT_SYMBOL_GPL(vfs_removexattr); 485 - 486 429 487 430 /* 488 431 * Extended attribute SET operations
+21 -1
include/linux/nfs4.h
··· 150 150 OP_WRITE_SAME = 70, 151 151 OP_CLONE = 71, 152 152 153 + /* xattr support (RFC8726) */ 154 + OP_GETXATTR = 72, 155 + OP_SETXATTR = 73, 156 + OP_LISTXATTRS = 74, 157 + OP_REMOVEXATTR = 75, 158 + 153 159 OP_ILLEGAL = 10044, 154 160 }; 155 161 ··· 165 159 #define FIRST_NFS4_OP OP_ACCESS 166 160 #define LAST_NFS40_OP OP_RELEASE_LOCKOWNER 167 161 #define LAST_NFS41_OP OP_RECLAIM_COMPLETE 168 - #define LAST_NFS42_OP OP_CLONE 162 + #define LAST_NFS42_OP OP_REMOVEXATTR 169 163 #define LAST_NFS4_OP LAST_NFS42_OP 170 164 171 165 enum nfsstat4 { ··· 286 280 NFS4ERR_WRONG_LFS = 10092, 287 281 NFS4ERR_BADLABEL = 10093, 288 282 NFS4ERR_OFFLOAD_NO_REQS = 10094, 283 + 284 + /* xattr (RFC8276) */ 285 + NFS4ERR_NOXATTR = 10095, 286 + NFS4ERR_XATTR2BIG = 10096, 289 287 }; 290 288 291 289 static inline bool seqid_mutating_err(u32 err) ··· 462 452 #define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) 463 453 #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) 464 454 #define FATTR4_WORD2_MODE_UMASK (1UL << 17) 455 + #define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) 465 456 466 457 /* MDS threshold bitmap bits */ 467 458 #define THRESHOLD_RD (1UL << 0) ··· 710 699 }; 711 700 struct nfs42_netaddr nl4_addr; /* NL4_NETADDR */ 712 701 } u; 702 + }; 703 + 704 + /* 705 + * Options for setxattr. These match the flags for setxattr(2). 706 + */ 707 + enum nfs4_setxattr_options { 708 + SETXATTR4_EITHER = 0, 709 + SETXATTR4_CREATE = 1, 710 + SETXATTR4_REPLACE = 2, 713 711 }; 714 712 #endif
+74
include/linux/sunrpc/rpc_rdma.h
··· 124 124 return ((unsigned int)val + 1) << 10; 125 125 } 126 126 127 + /** 128 + * xdr_encode_rdma_segment - Encode contents of an RDMA segment 129 + * @p: Pointer into a send buffer 130 + * @handle: The RDMA handle to encode 131 + * @length: The RDMA length to encode 132 + * @offset: The RDMA offset to encode 133 + * 134 + * Return value: 135 + * Pointer to the XDR position that follows the encoded RDMA segment 136 + */ 137 + static inline __be32 *xdr_encode_rdma_segment(__be32 *p, u32 handle, 138 + u32 length, u64 offset) 139 + { 140 + *p++ = cpu_to_be32(handle); 141 + *p++ = cpu_to_be32(length); 142 + return xdr_encode_hyper(p, offset); 143 + } 144 + 145 + /** 146 + * xdr_encode_read_segment - Encode contents of a Read segment 147 + * @p: Pointer into a send buffer 148 + * @position: The position to encode 149 + * @handle: The RDMA handle to encode 150 + * @length: The RDMA length to encode 151 + * @offset: The RDMA offset to encode 152 + * 153 + * Return value: 154 + * Pointer to the XDR position that follows the encoded Read segment 155 + */ 156 + static inline __be32 *xdr_encode_read_segment(__be32 *p, u32 position, 157 + u32 handle, u32 length, 158 + u64 offset) 159 + { 160 + *p++ = cpu_to_be32(position); 161 + return xdr_encode_rdma_segment(p, handle, length, offset); 162 + } 163 + 164 + /** 165 + * xdr_decode_rdma_segment - Decode contents of an RDMA segment 166 + * @p: Pointer to the undecoded RDMA segment 167 + * @handle: Upon return, the RDMA handle 168 + * @length: Upon return, the RDMA length 169 + * @offset: Upon return, the RDMA offset 170 + * 171 + * Return value: 172 + * Pointer to the XDR item that follows the RDMA segment 173 + */ 174 + static inline __be32 *xdr_decode_rdma_segment(__be32 *p, u32 *handle, 175 + u32 *length, u64 *offset) 176 + { 177 + *handle = be32_to_cpup(p++); 178 + *length = be32_to_cpup(p++); 179 + return xdr_decode_hyper(p, offset); 180 + } 181 + 182 + /** 183 + * xdr_decode_read_segment - Decode contents of a Read segment 184 + * @p: Pointer to the undecoded Read segment 185 + * @position: Upon return, the segment's position 186 + * @handle: Upon return, the RDMA handle 187 + * @length: Upon return, the RDMA length 188 + * @offset: Upon return, the RDMA offset 189 + * 190 + * Return value: 191 + * Pointer to the XDR item that follows the Read segment 192 + */ 193 + static inline __be32 *xdr_decode_read_segment(__be32 *p, u32 *position, 194 + u32 *handle, u32 *length, 195 + u64 *offset) 196 + { 197 + *position = be32_to_cpup(p++); 198 + return xdr_decode_rdma_segment(p, handle, length, offset); 199 + } 200 + 127 201 #endif /* _LINUX_SUNRPC_RPC_RDMA_H */
+24
include/linux/sunrpc/rpc_rdma_cid.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * * Copyright (c) 2020, Oracle and/or its affiliates. 4 + */ 5 + 6 + #ifndef RPC_RDMA_CID_H 7 + #define RPC_RDMA_CID_H 8 + 9 + /* 10 + * The rpc_rdma_cid struct records completion ID information. A 11 + * completion ID matches an incoming Send or Receive completion 12 + * to a Completion Queue and to a previous ib_post_*(). The ID 13 + * can then be displayed in an error message or recorded in a 14 + * trace record. 15 + * 16 + * This struct is shared between the server and client RPC/RDMA 17 + * transport implementations. 18 + */ 19 + struct rpc_rdma_cid { 20 + u32 ci_queue_id; 21 + int ci_completion_id; 22 + }; 23 + 24 + #endif /* RPC_RDMA_CID_H */
+12 -5
include/linux/sunrpc/svc_rdma.h
··· 46 46 #include <linux/sunrpc/xdr.h> 47 47 #include <linux/sunrpc/svcsock.h> 48 48 #include <linux/sunrpc/rpc_rdma.h> 49 + #include <linux/sunrpc/rpc_rdma_cid.h> 49 50 #include <rdma/ib_verbs.h> 50 51 #include <rdma/rdma_cm.h> 51 52 ··· 110 109 struct work_struct sc_work; 111 110 112 111 struct llist_head sc_recv_ctxts; 112 + 113 + atomic_t sc_completion_ids; 113 114 }; 114 115 /* sc_flags */ 115 116 #define RDMAXPRT_CONN_PENDING 3 ··· 132 129 struct list_head rc_list; 133 130 struct ib_recv_wr rc_recv_wr; 134 131 struct ib_cqe rc_cqe; 132 + struct rpc_rdma_cid rc_cid; 135 133 struct ib_sge rc_recv_sge; 136 134 void *rc_recv_buf; 137 135 struct xdr_buf rc_arg; ··· 151 147 152 148 struct svc_rdma_send_ctxt { 153 149 struct list_head sc_list; 150 + struct rpc_rdma_cid sc_cid; 151 + 154 152 struct ib_send_wr sc_send_wr; 155 153 struct ib_cqe sc_cqe; 156 154 struct xdr_buf sc_hdrbuf; ··· 196 190 svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); 197 191 extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, 198 192 struct svc_rdma_send_ctxt *ctxt); 199 - extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr); 193 + extern int svc_rdma_send(struct svcxprt_rdma *rdma, 194 + struct svc_rdma_send_ctxt *ctxt); 200 195 extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, 201 196 struct svc_rdma_send_ctxt *sctxt, 202 197 const struct svc_rdma_recv_ctxt *rctxt, 203 198 struct xdr_buf *xdr); 199 + extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, 200 + struct svc_rdma_send_ctxt *sctxt, 201 + struct svc_rdma_recv_ctxt *rctxt, 202 + int status); 204 203 extern int svc_rdma_sendto(struct svc_rqst *); 205 204 extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, 206 205 unsigned int length); 207 206 208 207 /* svc_rdma_transport.c */ 209 - extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); 210 - extern void svc_sq_reap(struct svcxprt_rdma *); 211 - extern void svc_rq_reap(struct svcxprt_rdma *); 212 - 213 208 extern struct svc_xprt_class svc_rdma_class; 214 209 #ifdef CONFIG_SUNRPC_BACKCHANNEL 215 210 extern struct svc_xprt_class svc_rdma_bc_class;
+26
include/linux/sunrpc/xdr.h
··· 475 475 } 476 476 477 477 /** 478 + * xdr_item_is_absent - symbolically handle XDR discriminators 479 + * @p: pointer to undecoded discriminator 480 + * 481 + * Return values: 482 + * %true if the following XDR item is absent 483 + * %false if the following XDR item is present 484 + */ 485 + static inline bool xdr_item_is_absent(const __be32 *p) 486 + { 487 + return *p == xdr_zero; 488 + } 489 + 490 + /** 491 + * xdr_item_is_present - symbolically handle XDR discriminators 492 + * @p: pointer to undecoded discriminator 493 + * 494 + * Return values: 495 + * %true if the following XDR item is present 496 + * %false if the following XDR item is absent 497 + */ 498 + static inline bool xdr_item_is_present(const __be32 *p) 499 + { 500 + return *p != xdr_zero; 501 + } 502 + 503 + /** 478 504 * xdr_stream_decode_u32 - Decode a 32-bit integer 479 505 * @xdr: pointer to xdr_stream 480 506 * @ptr: location to store integer
+4
include/linux/xattr.h
··· 52 52 ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); 53 53 int __vfs_setxattr(struct dentry *, struct inode *, const char *, const void *, size_t, int); 54 54 int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int); 55 + int __vfs_setxattr_locked(struct dentry *, const char *, const void *, size_t, int, struct inode **); 55 56 int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); 56 57 int __vfs_removexattr(struct dentry *, const char *); 58 + int __vfs_removexattr_locked(struct dentry *, const char *, struct inode **); 57 59 int vfs_removexattr(struct dentry *, const char *); 58 60 59 61 ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); 60 62 ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name, 61 63 char **xattr_value, size_t size, gfp_t flags); 64 + 65 + int xattr_supported_namespace(struct inode *inode, const char *prefix); 62 66 63 67 static inline const char *xattr_prefix(const struct xattr_handler *handler) 64 68 {
+142 -26
include/trace/events/rpcgss.h
··· 170 170 DEFINE_CTX_EVENT(init); 171 171 DEFINE_CTX_EVENT(destroy); 172 172 173 + DECLARE_EVENT_CLASS(rpcgss_svc_gssapi_class, 174 + TP_PROTO( 175 + const struct svc_rqst *rqstp, 176 + u32 maj_stat 177 + ), 178 + 179 + TP_ARGS(rqstp, maj_stat), 180 + 181 + TP_STRUCT__entry( 182 + __field(u32, xid) 183 + __field(u32, maj_stat) 184 + __string(addr, rqstp->rq_xprt->xpt_remotebuf) 185 + ), 186 + 187 + TP_fast_assign( 188 + __entry->xid = __be32_to_cpu(rqstp->rq_xid); 189 + __entry->maj_stat = maj_stat; 190 + __assign_str(addr, rqstp->rq_xprt->xpt_remotebuf); 191 + ), 192 + 193 + TP_printk("addr=%s xid=0x%08x maj_stat=%s", 194 + __get_str(addr), __entry->xid, 195 + __entry->maj_stat == 0 ? 196 + "GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat)) 197 + ); 198 + 199 + #define DEFINE_SVC_GSSAPI_EVENT(name) \ 200 + DEFINE_EVENT(rpcgss_svc_gssapi_class, rpcgss_svc_##name, \ 201 + TP_PROTO( \ 202 + const struct svc_rqst *rqstp, \ 203 + u32 maj_stat \ 204 + ), \ 205 + TP_ARGS(rqstp, maj_stat)) 206 + 207 + DEFINE_SVC_GSSAPI_EVENT(unwrap); 208 + DEFINE_SVC_GSSAPI_EVENT(mic); 209 + 210 + TRACE_EVENT(rpcgss_svc_unwrap_failed, 211 + TP_PROTO( 212 + const struct svc_rqst *rqstp 213 + ), 214 + 215 + TP_ARGS(rqstp), 216 + 217 + TP_STRUCT__entry( 218 + __field(u32, xid) 219 + __string(addr, rqstp->rq_xprt->xpt_remotebuf) 220 + ), 221 + 222 + TP_fast_assign( 223 + __entry->xid = be32_to_cpu(rqstp->rq_xid); 224 + __assign_str(addr, rqstp->rq_xprt->xpt_remotebuf); 225 + ), 226 + 227 + TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid) 228 + ); 229 + 230 + TRACE_EVENT(rpcgss_svc_seqno_bad, 231 + TP_PROTO( 232 + const struct svc_rqst *rqstp, 233 + u32 expected, 234 + u32 received 235 + ), 236 + 237 + TP_ARGS(rqstp, expected, received), 238 + 239 + TP_STRUCT__entry( 240 + __field(u32, expected) 241 + __field(u32, received) 242 + __field(u32, xid) 243 + __string(addr, rqstp->rq_xprt->xpt_remotebuf) 244 + ), 245 + 246 + TP_fast_assign( 247 + __entry->expected = expected; 248 + __entry->received = received; 249 + __entry->xid = __be32_to_cpu(rqstp->rq_xid); 250 + __assign_str(addr, rqstp->rq_xprt->xpt_remotebuf); 251 + ), 252 + 253 + TP_printk("addr=%s xid=0x%08x expected seqno %u, received seqno %u", 254 + __get_str(addr), __entry->xid, 255 + __entry->expected, __entry->received) 256 + ); 257 + 173 258 TRACE_EVENT(rpcgss_svc_accept_upcall, 174 259 TP_PROTO( 175 - __be32 xid, 260 + const struct svc_rqst *rqstp, 176 261 u32 major_status, 177 262 u32 minor_status 178 263 ), 179 264 180 - TP_ARGS(xid, major_status, minor_status), 265 + TP_ARGS(rqstp, major_status, minor_status), 181 266 182 267 TP_STRUCT__entry( 183 - __field(u32, xid) 184 268 __field(u32, minor_status) 185 269 __field(unsigned long, major_status) 270 + __field(u32, xid) 271 + __string(addr, rqstp->rq_xprt->xpt_remotebuf) 186 272 ), 187 273 188 274 TP_fast_assign( 189 - __entry->xid = be32_to_cpu(xid); 190 275 __entry->minor_status = minor_status; 191 276 __entry->major_status = major_status; 277 + __entry->xid = be32_to_cpu(rqstp->rq_xid); 278 + __assign_str(addr, rqstp->rq_xprt->xpt_remotebuf); 192 279 ), 193 280 194 - TP_printk("xid=0x%08x major_status=%s (0x%08lx) minor_status=%u", 195 - __entry->xid, __entry->major_status == 0 ? "GSS_S_COMPLETE" : 196 - show_gss_status(__entry->major_status), 281 + TP_printk("addr=%s xid=0x%08x major_status=%s (0x%08lx) minor_status=%u", 282 + __get_str(addr), __entry->xid, 283 + (__entry->major_status == 0) ? "GSS_S_COMPLETE" : 284 + show_gss_status(__entry->major_status), 197 285 __entry->major_status, __entry->minor_status 198 286 ) 199 287 ); 200 288 201 - TRACE_EVENT(rpcgss_svc_accept, 289 + TRACE_EVENT(rpcgss_svc_authenticate, 202 290 TP_PROTO( 203 - __be32 xid, 204 - size_t len 291 + const struct svc_rqst *rqstp, 292 + const struct rpc_gss_wire_cred *gc 205 293 ), 206 294 207 - TP_ARGS(xid, len), 295 + TP_ARGS(rqstp, gc), 208 296 209 297 TP_STRUCT__entry( 298 + __field(u32, seqno) 210 299 __field(u32, xid) 211 - __field(size_t, len) 300 + __string(addr, rqstp->rq_xprt->xpt_remotebuf) 212 301 ), 213 302 214 303 TP_fast_assign( 215 - __entry->xid = be32_to_cpu(xid); 216 - __entry->len = len; 304 + __entry->xid = be32_to_cpu(rqstp->rq_xid); 305 + __entry->seqno = gc->gc_seq; 306 + __assign_str(addr, rqstp->rq_xprt->xpt_remotebuf); 217 307 ), 218 308 219 - TP_printk("xid=0x%08x len=%zu", 220 - __entry->xid, __entry->len 221 - ) 309 + TP_printk("addr=%s xid=0x%08x seqno=%u", __get_str(addr), 310 + __entry->xid, __entry->seqno) 222 311 ); 223 312 224 313 ··· 460 371 461 372 DECLARE_EVENT_CLASS(rpcgss_svc_seqno_class, 462 373 TP_PROTO( 463 - __be32 xid, 374 + const struct svc_rqst *rqstp, 464 375 u32 seqno 465 376 ), 466 377 467 - TP_ARGS(xid, seqno), 378 + TP_ARGS(rqstp, seqno), 468 379 469 380 TP_STRUCT__entry( 470 381 __field(u32, xid) ··· 472 383 ), 473 384 474 385 TP_fast_assign( 475 - __entry->xid = be32_to_cpu(xid); 386 + __entry->xid = be32_to_cpu(rqstp->rq_xid); 476 387 __entry->seqno = seqno; 477 388 ), 478 389 479 - TP_printk("xid=0x%08x seqno=%u, request discarded", 390 + TP_printk("xid=0x%08x seqno=%u", 480 391 __entry->xid, __entry->seqno) 481 392 ); 482 393 483 394 #define DEFINE_SVC_SEQNO_EVENT(name) \ 484 - DEFINE_EVENT(rpcgss_svc_seqno_class, rpcgss_svc_##name, \ 395 + DEFINE_EVENT(rpcgss_svc_seqno_class, rpcgss_svc_seqno_##name, \ 485 396 TP_PROTO( \ 486 - __be32 xid, \ 397 + const struct svc_rqst *rqstp, \ 487 398 u32 seqno \ 488 399 ), \ 489 - TP_ARGS(xid, seqno)) 400 + TP_ARGS(rqstp, seqno)) 490 401 491 - DEFINE_SVC_SEQNO_EVENT(large_seqno); 492 - DEFINE_SVC_SEQNO_EVENT(old_seqno); 402 + DEFINE_SVC_SEQNO_EVENT(large); 403 + DEFINE_SVC_SEQNO_EVENT(seen); 493 404 405 + TRACE_EVENT(rpcgss_svc_seqno_low, 406 + TP_PROTO( 407 + const struct svc_rqst *rqstp, 408 + u32 seqno, 409 + u32 min, 410 + u32 max 411 + ), 412 + 413 + TP_ARGS(rqstp, seqno, min, max), 414 + 415 + TP_STRUCT__entry( 416 + __field(u32, xid) 417 + __field(u32, seqno) 418 + __field(u32, min) 419 + __field(u32, max) 420 + ), 421 + 422 + TP_fast_assign( 423 + __entry->xid = be32_to_cpu(rqstp->rq_xid); 424 + __entry->seqno = seqno; 425 + __entry->min = min; 426 + __entry->max = max; 427 + ), 428 + 429 + TP_printk("xid=0x%08x seqno=%u window=[%u..%u]", 430 + __entry->xid, __entry->seqno, __entry->min, __entry->max) 431 + ); 494 432 495 433 /** 496 434 ** gssd upcall related trace events
+124 -103
include/trace/events/rpcrdma.h
··· 11 11 #define _TRACE_RPCRDMA_H 12 12 13 13 #include <linux/scatterlist.h> 14 + #include <linux/sunrpc/rpc_rdma_cid.h> 14 15 #include <linux/tracepoint.h> 15 16 #include <trace/events/rdma.h> 16 17 17 18 /** 18 19 ** Event classes 19 20 **/ 21 + 22 + DECLARE_EVENT_CLASS(rpcrdma_completion_class, 23 + TP_PROTO( 24 + const struct ib_wc *wc, 25 + const struct rpc_rdma_cid *cid 26 + ), 27 + 28 + TP_ARGS(wc, cid), 29 + 30 + TP_STRUCT__entry( 31 + __field(u32, cq_id) 32 + __field(int, completion_id) 33 + __field(unsigned long, status) 34 + __field(unsigned int, vendor_err) 35 + ), 36 + 37 + TP_fast_assign( 38 + __entry->cq_id = cid->ci_queue_id; 39 + __entry->completion_id = cid->ci_completion_id; 40 + __entry->status = wc->status; 41 + if (wc->status) 42 + __entry->vendor_err = wc->vendor_err; 43 + else 44 + __entry->vendor_err = 0; 45 + ), 46 + 47 + TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x)", 48 + __entry->cq_id, __entry->completion_id, 49 + rdma_show_wc_status(__entry->status), 50 + __entry->status, __entry->vendor_err 51 + ) 52 + ); 53 + 54 + #define DEFINE_COMPLETION_EVENT(name) \ 55 + DEFINE_EVENT(rpcrdma_completion_class, name, \ 56 + TP_PROTO( \ 57 + const struct ib_wc *wc, \ 58 + const struct rpc_rdma_cid *cid \ 59 + ), \ 60 + TP_ARGS(wc, cid)) 20 61 21 62 DECLARE_EVENT_CLASS(xprtrdma_reply_event, 22 63 TP_PROTO( ··· 1369 1328 1370 1329 TRACE_EVENT(svcrdma_decode_rqst, 1371 1330 TP_PROTO( 1331 + const struct svc_rdma_recv_ctxt *ctxt, 1372 1332 __be32 *p, 1373 1333 unsigned int hdrlen 1374 1334 ), 1375 1335 1376 - TP_ARGS(p, hdrlen), 1336 + TP_ARGS(ctxt, p, hdrlen), 1377 1337 1378 1338 TP_STRUCT__entry( 1339 + __field(u32, cq_id) 1340 + __field(int, completion_id) 1379 1341 __field(u32, xid) 1380 1342 __field(u32, vers) 1381 1343 __field(u32, proc) ··· 1387 1343 ), 1388 1344 1389 1345 TP_fast_assign( 1346 + __entry->cq_id = ctxt->rc_cid.ci_queue_id; 1347 + __entry->completion_id = ctxt->rc_cid.ci_completion_id; 1390 1348 __entry->xid = be32_to_cpup(p++); 1391 1349 __entry->vers = be32_to_cpup(p++); 1392 1350 __entry->credits = be32_to_cpup(p++); ··· 1396 1350 __entry->hdrlen = hdrlen; 1397 1351 ), 1398 1352 1399 - TP_printk("xid=0x%08x vers=%u credits=%u proc=%s hdrlen=%u", 1353 + TP_printk("cq.id=%u cid=%d xid=0x%08x vers=%u credits=%u proc=%s hdrlen=%u", 1354 + __entry->cq_id, __entry->completion_id, 1400 1355 __entry->xid, __entry->vers, __entry->credits, 1401 1356 show_rpcrdma_proc(__entry->proc), __entry->hdrlen) 1402 1357 ); 1403 1358 1404 1359 TRACE_EVENT(svcrdma_decode_short_err, 1405 1360 TP_PROTO( 1361 + const struct svc_rdma_recv_ctxt *ctxt, 1406 1362 unsigned int hdrlen 1407 1363 ), 1408 1364 1409 - TP_ARGS(hdrlen), 1365 + TP_ARGS(ctxt, hdrlen), 1410 1366 1411 1367 TP_STRUCT__entry( 1368 + __field(u32, cq_id) 1369 + __field(int, completion_id) 1412 1370 __field(unsigned int, hdrlen) 1413 1371 ), 1414 1372 1415 1373 TP_fast_assign( 1374 + __entry->cq_id = ctxt->rc_cid.ci_queue_id; 1375 + __entry->completion_id = ctxt->rc_cid.ci_completion_id; 1416 1376 __entry->hdrlen = hdrlen; 1417 1377 ), 1418 1378 1419 - TP_printk("hdrlen=%u", __entry->hdrlen) 1379 + TP_printk("cq.id=%u cid=%d hdrlen=%u", 1380 + __entry->cq_id, __entry->completion_id, 1381 + __entry->hdrlen) 1420 1382 ); 1421 1383 1422 1384 DECLARE_EVENT_CLASS(svcrdma_badreq_event, 1423 1385 TP_PROTO( 1386 + const struct svc_rdma_recv_ctxt *ctxt, 1424 1387 __be32 *p 1425 1388 ), 1426 1389 1427 - TP_ARGS(p), 1390 + TP_ARGS(ctxt, p), 1428 1391 1429 1392 TP_STRUCT__entry( 1393 + __field(u32, cq_id) 1394 + __field(int, completion_id) 1430 1395 __field(u32, xid) 1431 1396 __field(u32, vers) 1432 1397 __field(u32, proc) ··· 1445 1388 ), 1446 1389 1447 1390 TP_fast_assign( 1391 + __entry->cq_id = ctxt->rc_cid.ci_queue_id; 1392 + __entry->completion_id = ctxt->rc_cid.ci_completion_id; 1448 1393 __entry->xid = be32_to_cpup(p++); 1449 1394 __entry->vers = be32_to_cpup(p++); 1450 1395 __entry->credits = be32_to_cpup(p++); 1451 1396 __entry->proc = be32_to_cpup(p); 1452 1397 ), 1453 1398 1454 - TP_printk("xid=0x%08x vers=%u credits=%u proc=%u", 1399 + TP_printk("cq.id=%u cid=%d xid=0x%08x vers=%u credits=%u proc=%u", 1400 + __entry->cq_id, __entry->completion_id, 1455 1401 __entry->xid, __entry->vers, __entry->credits, __entry->proc) 1456 1402 ); 1457 1403 ··· 1462 1402 DEFINE_EVENT(svcrdma_badreq_event, \ 1463 1403 svcrdma_decode_##name##_err, \ 1464 1404 TP_PROTO( \ 1405 + const struct svc_rdma_recv_ctxt *ctxt, \ 1465 1406 __be32 *p \ 1466 1407 ), \ 1467 - TP_ARGS(p)) 1408 + TP_ARGS(ctxt, p)) 1468 1409 1469 1410 DEFINE_BADREQ_EVENT(badvers); 1470 1411 DEFINE_BADREQ_EVENT(drop); ··· 1777 1716 TP_printk("len=%u", __entry->len) 1778 1717 ); 1779 1718 1780 - TRACE_EVENT(svcrdma_send_failed, 1719 + TRACE_EVENT(svcrdma_send_err, 1781 1720 TP_PROTO( 1782 1721 const struct svc_rqst *rqst, 1783 1722 int status ··· 1788 1727 TP_STRUCT__entry( 1789 1728 __field(int, status) 1790 1729 __field(u32, xid) 1791 - __field(const void *, xprt) 1792 1730 __string(addr, rqst->rq_xprt->xpt_remotebuf) 1793 1731 ), 1794 1732 1795 1733 TP_fast_assign( 1796 1734 __entry->status = status; 1797 1735 __entry->xid = __be32_to_cpu(rqst->rq_xid); 1798 - __entry->xprt = rqst->rq_xprt; 1799 1736 __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); 1800 1737 ), 1801 1738 1802 - TP_printk("xprt=%p addr=%s xid=0x%08x status=%d", 1803 - __entry->xprt, __get_str(addr), 1739 + TP_printk("addr=%s xid=0x%08x status=%d", __get_str(addr), 1804 1740 __entry->xid, __entry->status 1805 1741 ) 1806 1742 ); 1807 1743 1808 - DECLARE_EVENT_CLASS(svcrdma_sendcomp_event, 1809 - TP_PROTO( 1810 - const struct ib_wc *wc 1811 - ), 1812 - 1813 - TP_ARGS(wc), 1814 - 1815 - TP_STRUCT__entry( 1816 - __field(const void *, cqe) 1817 - __field(unsigned int, status) 1818 - __field(unsigned int, vendor_err) 1819 - ), 1820 - 1821 - TP_fast_assign( 1822 - __entry->cqe = wc->wr_cqe; 1823 - __entry->status = wc->status; 1824 - if (wc->status) 1825 - __entry->vendor_err = wc->vendor_err; 1826 - else 1827 - __entry->vendor_err = 0; 1828 - ), 1829 - 1830 - TP_printk("cqe=%p status=%s (%u/0x%x)", 1831 - __entry->cqe, rdma_show_wc_status(__entry->status), 1832 - __entry->status, __entry->vendor_err 1833 - ) 1834 - ); 1835 - 1836 - #define DEFINE_SENDCOMP_EVENT(name) \ 1837 - DEFINE_EVENT(svcrdma_sendcomp_event, svcrdma_wc_##name, \ 1838 - TP_PROTO( \ 1839 - const struct ib_wc *wc \ 1840 - ), \ 1841 - TP_ARGS(wc)) 1842 - 1843 1744 TRACE_EVENT(svcrdma_post_send, 1844 1745 TP_PROTO( 1845 - const struct ib_send_wr *wr 1746 + const struct svc_rdma_send_ctxt *ctxt 1846 1747 ), 1847 1748 1848 - TP_ARGS(wr), 1749 + TP_ARGS(ctxt), 1849 1750 1850 1751 TP_STRUCT__entry( 1851 - __field(const void *, cqe) 1752 + __field(u32, cq_id) 1753 + __field(int, completion_id) 1852 1754 __field(unsigned int, num_sge) 1853 1755 __field(u32, inv_rkey) 1854 1756 ), 1855 1757 1856 1758 TP_fast_assign( 1857 - __entry->cqe = wr->wr_cqe; 1759 + const struct ib_send_wr *wr = &ctxt->sc_send_wr; 1760 + 1761 + __entry->cq_id = ctxt->sc_cid.ci_queue_id; 1762 + __entry->completion_id = ctxt->sc_cid.ci_completion_id; 1858 1763 __entry->num_sge = wr->num_sge; 1859 1764 __entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ? 1860 1765 wr->ex.invalidate_rkey : 0; 1861 1766 ), 1862 1767 1863 - TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x", 1864 - __entry->cqe, __entry->num_sge, 1865 - __entry->inv_rkey 1768 + TP_printk("cq_id=%u cid=%d num_sge=%u inv_rkey=0x%08x", 1769 + __entry->cq_id, __entry->completion_id, 1770 + __entry->num_sge, __entry->inv_rkey 1866 1771 ) 1867 1772 ); 1868 1773 1869 - DEFINE_SENDCOMP_EVENT(send); 1774 + DEFINE_COMPLETION_EVENT(svcrdma_wc_send); 1870 1775 1871 1776 TRACE_EVENT(svcrdma_post_recv, 1872 1777 TP_PROTO( 1873 - const struct ib_recv_wr *wr, 1778 + const struct svc_rdma_recv_ctxt *ctxt 1779 + ), 1780 + 1781 + TP_ARGS(ctxt), 1782 + 1783 + TP_STRUCT__entry( 1784 + __field(u32, cq_id) 1785 + __field(int, completion_id) 1786 + ), 1787 + 1788 + TP_fast_assign( 1789 + __entry->cq_id = ctxt->rc_cid.ci_queue_id; 1790 + __entry->completion_id = ctxt->rc_cid.ci_completion_id; 1791 + ), 1792 + 1793 + TP_printk("cq.id=%d cid=%d", 1794 + __entry->cq_id, __entry->completion_id 1795 + ) 1796 + ); 1797 + 1798 + DEFINE_COMPLETION_EVENT(svcrdma_wc_receive); 1799 + 1800 + TRACE_EVENT(svcrdma_rq_post_err, 1801 + TP_PROTO( 1802 + const struct svcxprt_rdma *rdma, 1874 1803 int status 1875 1804 ), 1876 1805 1877 - TP_ARGS(wr, status), 1806 + TP_ARGS(rdma, status), 1878 1807 1879 1808 TP_STRUCT__entry( 1880 - __field(const void *, cqe) 1881 1809 __field(int, status) 1810 + __string(addr, rdma->sc_xprt.xpt_remotebuf) 1882 1811 ), 1883 1812 1884 1813 TP_fast_assign( 1885 - __entry->cqe = wr->wr_cqe; 1886 1814 __entry->status = status; 1815 + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); 1887 1816 ), 1888 1817 1889 - TP_printk("cqe=%p status=%d", 1890 - __entry->cqe, __entry->status 1818 + TP_printk("addr=%s status=%d", 1819 + __get_str(addr), __entry->status 1891 1820 ) 1892 1821 ); 1893 1822 1894 - TRACE_EVENT(svcrdma_wc_receive, 1823 + TRACE_EVENT(svcrdma_post_chunk, 1895 1824 TP_PROTO( 1896 - const struct ib_wc *wc 1897 - ), 1898 - 1899 - TP_ARGS(wc), 1900 - 1901 - TP_STRUCT__entry( 1902 - __field(const void *, cqe) 1903 - __field(u32, byte_len) 1904 - __field(unsigned int, status) 1905 - __field(u32, vendor_err) 1906 - ), 1907 - 1908 - TP_fast_assign( 1909 - __entry->cqe = wc->wr_cqe; 1910 - __entry->status = wc->status; 1911 - if (wc->status) { 1912 - __entry->byte_len = 0; 1913 - __entry->vendor_err = wc->vendor_err; 1914 - } else { 1915 - __entry->byte_len = wc->byte_len; 1916 - __entry->vendor_err = 0; 1917 - } 1918 - ), 1919 - 1920 - TP_printk("cqe=%p byte_len=%u status=%s (%u/0x%x)", 1921 - __entry->cqe, __entry->byte_len, 1922 - rdma_show_wc_status(__entry->status), 1923 - __entry->status, __entry->vendor_err 1924 - ) 1925 - ); 1926 - 1927 - TRACE_EVENT(svcrdma_post_rw, 1928 - TP_PROTO( 1929 - const void *cqe, 1825 + const struct rpc_rdma_cid *cid, 1930 1826 int sqecount 1931 1827 ), 1932 1828 1933 - TP_ARGS(cqe, sqecount), 1829 + TP_ARGS(cid, sqecount), 1934 1830 1935 1831 TP_STRUCT__entry( 1936 - __field(const void *, cqe) 1832 + __field(u32, cq_id) 1833 + __field(int, completion_id) 1937 1834 __field(int, sqecount) 1938 1835 ), 1939 1836 1940 1837 TP_fast_assign( 1941 - __entry->cqe = cqe; 1838 + __entry->cq_id = cid->ci_queue_id; 1839 + __entry->completion_id = cid->ci_completion_id; 1942 1840 __entry->sqecount = sqecount; 1943 1841 ), 1944 1842 1945 - TP_printk("cqe=%p sqecount=%d", 1946 - __entry->cqe, __entry->sqecount 1843 + TP_printk("cq.id=%u cid=%d sqecount=%d", 1844 + __entry->cq_id, __entry->completion_id, 1845 + __entry->sqecount 1947 1846 ) 1948 1847 ); 1949 1848 1950 - DEFINE_SENDCOMP_EVENT(read); 1951 - DEFINE_SENDCOMP_EVENT(write); 1849 + DEFINE_COMPLETION_EVENT(svcrdma_wc_read); 1850 + DEFINE_COMPLETION_EVENT(svcrdma_wc_write); 1952 1851 1953 1852 TRACE_EVENT(svcrdma_qp_error, 1954 1853 TP_PROTO(
+27 -8
include/trace/events/sunrpc.h
··· 1250 1250 DEFINE_SVCXDRBUF_EVENT(recvfrom); 1251 1251 DEFINE_SVCXDRBUF_EVENT(sendto); 1252 1252 1253 + /* 1254 + * from include/linux/sunrpc/svc.h 1255 + */ 1256 + #define SVC_RQST_FLAG_LIST \ 1257 + svc_rqst_flag(SECURE) \ 1258 + svc_rqst_flag(LOCAL) \ 1259 + svc_rqst_flag(USEDEFERRAL) \ 1260 + svc_rqst_flag(DROPME) \ 1261 + svc_rqst_flag(SPLICE_OK) \ 1262 + svc_rqst_flag(VICTIM) \ 1263 + svc_rqst_flag(BUSY) \ 1264 + svc_rqst_flag(DATA) \ 1265 + svc_rqst_flag_end(AUTHERR) 1266 + 1267 + #undef svc_rqst_flag 1268 + #undef svc_rqst_flag_end 1269 + #define svc_rqst_flag(x) TRACE_DEFINE_ENUM(RQ_##x); 1270 + #define svc_rqst_flag_end(x) TRACE_DEFINE_ENUM(RQ_##x); 1271 + 1272 + SVC_RQST_FLAG_LIST 1273 + 1274 + #undef svc_rqst_flag 1275 + #undef svc_rqst_flag_end 1276 + #define svc_rqst_flag(x) { BIT(RQ_##x), #x }, 1277 + #define svc_rqst_flag_end(x) { BIT(RQ_##x), #x } 1278 + 1253 1279 #define show_rqstp_flags(flags) \ 1254 - __print_flags(flags, "|", \ 1255 - { (1UL << RQ_SECURE), "RQ_SECURE"}, \ 1256 - { (1UL << RQ_LOCAL), "RQ_LOCAL"}, \ 1257 - { (1UL << RQ_USEDEFERRAL), "RQ_USEDEFERRAL"}, \ 1258 - { (1UL << RQ_DROPME), "RQ_DROPME"}, \ 1259 - { (1UL << RQ_SPLICE_OK), "RQ_SPLICE_OK"}, \ 1260 - { (1UL << RQ_VICTIM), "RQ_VICTIM"}, \ 1261 - { (1UL << RQ_BUSY), "RQ_BUSY"}) 1280 + __print_flags(flags, "|", SVC_RQST_FLAG_LIST) 1262 1281 1263 1282 TRACE_EVENT(svc_recv, 1264 1283 TP_PROTO(struct svc_rqst *rqst, int len),
+3
include/uapi/linux/nfs4.h
··· 33 33 #define NFS4_ACCESS_EXTEND 0x0008 34 34 #define NFS4_ACCESS_DELETE 0x0010 35 35 #define NFS4_ACCESS_EXECUTE 0x0020 36 + #define NFS4_ACCESS_XAREAD 0x0040 37 + #define NFS4_ACCESS_XAWRITE 0x0080 38 + #define NFS4_ACCESS_XALIST 0x0100 36 39 37 40 #define NFS4_FH_PERSISTENT 0x0000 38 41 #define NFS4_FH_NOEXPIRE_WITH_OPEN 0x0001
+1 -1
net/sunrpc/auth_gss/gss_krb5_wrap.c
··· 584 584 buf->head[0].iov_len); 585 585 memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen); 586 586 buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; 587 - buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip; 587 + buf->len = len - (GSS_KRB5_TOK_HDR_LEN + headskip); 588 588 589 589 /* Trim off the trailing "extra count" and checksum blob */ 590 590 xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
+79 -39
net/sunrpc/auth_gss/svcauth_gss.c
··· 332 332 333 333 struct gss_svc_seq_data { 334 334 /* highest seq number seen so far: */ 335 - int sd_max; 335 + u32 sd_max; 336 336 /* for i such that sd_max-GSS_SEQ_WIN < i <= sd_max, the i-th bit of 337 337 * sd_win is nonzero iff sequence number i has been seen already: */ 338 338 unsigned long sd_win[GSS_SEQ_WIN/BITS_PER_LONG]; ··· 613 613 return found; 614 614 } 615 615 616 - /* Implements sequence number algorithm as specified in RFC 2203. */ 617 - static int 618 - gss_check_seq_num(struct rsc *rsci, int seq_num) 616 + /** 617 + * gss_check_seq_num - GSS sequence number window check 618 + * @rqstp: RPC Call to use when reporting errors 619 + * @rsci: cached GSS context state (updated on return) 620 + * @seq_num: sequence number to check 621 + * 622 + * Implements sequence number algorithm as specified in 623 + * RFC 2203, Section 5.3.3.1. "Context Management". 624 + * 625 + * Return values: 626 + * %true: @rqstp's GSS sequence number is inside the window 627 + * %false: @rqstp's GSS sequence number is outside the window 628 + */ 629 + static bool gss_check_seq_num(const struct svc_rqst *rqstp, struct rsc *rsci, 630 + u32 seq_num) 619 631 { 620 632 struct gss_svc_seq_data *sd = &rsci->seqdata; 633 + bool result = false; 621 634 622 635 spin_lock(&sd->sd_lock); 623 636 if (seq_num > sd->sd_max) { 624 637 if (seq_num >= sd->sd_max + GSS_SEQ_WIN) { 625 - memset(sd->sd_win,0,sizeof(sd->sd_win)); 638 + memset(sd->sd_win, 0, sizeof(sd->sd_win)); 626 639 sd->sd_max = seq_num; 627 640 } else while (sd->sd_max < seq_num) { 628 641 sd->sd_max++; ··· 644 631 __set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win); 645 632 goto ok; 646 633 } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) { 647 - goto drop; 634 + goto toolow; 648 635 } 649 - /* sd_max - GSS_SEQ_WIN < seq_num <= sd_max */ 650 636 if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win)) 651 - goto drop; 637 + goto alreadyseen; 638 + 652 639 ok: 640 + result = true; 641 + out: 653 642 spin_unlock(&sd->sd_lock); 654 - return 1; 655 - drop: 656 - spin_unlock(&sd->sd_lock); 657 - return 0; 643 + return result; 644 + 645 + toolow: 646 + trace_rpcgss_svc_seqno_low(rqstp, seq_num, 647 + sd->sd_max - GSS_SEQ_WIN, 648 + sd->sd_max); 649 + goto out; 650 + alreadyseen: 651 + trace_rpcgss_svc_seqno_seen(rqstp, seq_num); 652 + goto out; 658 653 } 659 654 660 655 static inline u32 round_up_to_quad(u32 i) ··· 742 721 } 743 722 744 723 if (gc->gc_seq > MAXSEQ) { 745 - trace_rpcgss_svc_large_seqno(rqstp->rq_xid, gc->gc_seq); 724 + trace_rpcgss_svc_seqno_large(rqstp, gc->gc_seq); 746 725 *authp = rpcsec_gsserr_ctxproblem; 747 726 return SVC_DENIED; 748 727 } 749 - if (!gss_check_seq_num(rsci, gc->gc_seq)) { 750 - trace_rpcgss_svc_old_seqno(rqstp->rq_xid, gc->gc_seq); 728 + if (!gss_check_seq_num(rqstp, rsci, gc->gc_seq)) 751 729 return SVC_DROP; 752 - } 753 730 return SVC_OK; 754 731 } 755 732 ··· 885 866 static int 886 867 unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) 887 868 { 869 + u32 integ_len, rseqno, maj_stat; 888 870 int stat = -EINVAL; 889 - u32 integ_len, maj_stat; 890 871 struct xdr_netobj mic; 891 872 struct xdr_buf integ_buf; 873 + 874 + mic.data = NULL; 892 875 893 876 /* NFS READ normally uses splice to send data in-place. However 894 877 * the data in cache can change after the reply's MIC is computed ··· 906 885 907 886 integ_len = svc_getnl(&buf->head[0]); 908 887 if (integ_len & 3) 909 - return stat; 888 + goto unwrap_failed; 910 889 if (integ_len > buf->len) 911 - return stat; 912 - if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) { 913 - WARN_ON_ONCE(1); 914 - return stat; 915 - } 890 + goto unwrap_failed; 891 + if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) 892 + goto unwrap_failed; 893 + 916 894 /* copy out mic... */ 917 895 if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) 918 - return stat; 896 + goto unwrap_failed; 919 897 if (mic.len > RPC_MAX_AUTH_SIZE) 920 - return stat; 898 + goto unwrap_failed; 921 899 mic.data = kmalloc(mic.len, GFP_KERNEL); 922 900 if (!mic.data) 923 - return stat; 901 + goto unwrap_failed; 924 902 if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) 925 - goto out; 903 + goto unwrap_failed; 926 904 maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); 927 905 if (maj_stat != GSS_S_COMPLETE) 928 - goto out; 929 - if (svc_getnl(&buf->head[0]) != seq) 930 - goto out; 906 + goto bad_mic; 907 + rseqno = svc_getnl(&buf->head[0]); 908 + if (rseqno != seq) 909 + goto bad_seqno; 931 910 /* trim off the mic and padding at the end before returning */ 932 911 xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); 933 912 stat = 0; 934 913 out: 935 914 kfree(mic.data); 936 915 return stat; 916 + 917 + unwrap_failed: 918 + trace_rpcgss_svc_unwrap_failed(rqstp); 919 + goto out; 920 + bad_seqno: 921 + trace_rpcgss_svc_seqno_bad(rqstp, seq, rseqno); 922 + goto out; 923 + bad_mic: 924 + trace_rpcgss_svc_mic(rqstp, maj_stat); 925 + goto out; 937 926 } 938 927 939 928 static inline int ··· 968 937 { 969 938 u32 priv_len, maj_stat; 970 939 int pad, remaining_len, offset; 940 + u32 rseqno; 971 941 972 942 clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); 973 943 ··· 983 951 * not yet read from the head, so these two values are different: */ 984 952 remaining_len = total_buf_len(buf); 985 953 if (priv_len > remaining_len) 986 - return -EINVAL; 954 + goto unwrap_failed; 987 955 pad = remaining_len - priv_len; 988 956 buf->len -= pad; 989 957 fix_priv_head(buf, pad); 990 958 991 959 maj_stat = gss_unwrap(ctx, 0, priv_len, buf); 992 960 pad = priv_len - buf->len; 993 - buf->len -= pad; 994 961 /* The upper layers assume the buffer is aligned on 4-byte boundaries. 995 962 * In the krb5p case, at least, the data ends up offset, so we need to 996 963 * move it around. */ ··· 1003 972 fix_priv_head(buf, pad); 1004 973 } 1005 974 if (maj_stat != GSS_S_COMPLETE) 1006 - return -EINVAL; 975 + goto bad_unwrap; 1007 976 out_seq: 1008 - if (svc_getnl(&buf->head[0]) != seq) 1009 - return -EINVAL; 977 + rseqno = svc_getnl(&buf->head[0]); 978 + if (rseqno != seq) 979 + goto bad_seqno; 1010 980 return 0; 981 + 982 + unwrap_failed: 983 + trace_rpcgss_svc_unwrap_failed(rqstp); 984 + return -EINVAL; 985 + bad_seqno: 986 + trace_rpcgss_svc_seqno_bad(rqstp, seq, rseqno); 987 + return -EINVAL; 988 + bad_unwrap: 989 + trace_rpcgss_svc_unwrap(rqstp, maj_stat); 990 + return -EINVAL; 1011 991 } 1012 992 1013 993 struct gss_svc_data { ··· 1356 1314 if (status) 1357 1315 goto out; 1358 1316 1359 - trace_rpcgss_svc_accept_upcall(rqstp->rq_xid, ud.major_status, 1360 - ud.minor_status); 1317 + trace_rpcgss_svc_accept_upcall(rqstp, ud.major_status, ud.minor_status); 1361 1318 1362 1319 switch (ud.major_status) { 1363 1320 case GSS_S_CONTINUE_NEEDED: ··· 1531 1490 int ret; 1532 1491 struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); 1533 1492 1534 - trace_rpcgss_svc_accept(rqstp->rq_xid, argv->iov_len); 1535 - 1536 1493 *authp = rpc_autherr_badcred; 1537 1494 if (!svcdata) 1538 1495 svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL); ··· 1647 1608 GSS_C_QOP_DEFAULT, 1648 1609 gc->gc_svc); 1649 1610 ret = SVC_OK; 1611 + trace_rpcgss_svc_authenticate(rqstp, gc); 1650 1612 goto out; 1651 1613 } 1652 1614 garbage_args:
+3
net/sunrpc/auth_gss/trace.c
··· 5 5 6 6 #include <linux/sunrpc/clnt.h> 7 7 #include <linux/sunrpc/sched.h> 8 + #include <linux/sunrpc/svc.h> 9 + #include <linux/sunrpc/svc_xprt.h> 10 + #include <linux/sunrpc/auth_gss.h> 8 11 #include <linux/sunrpc/gss_err.h> 9 12 #include <linux/sunrpc/auth_gss.h> 10 13
-1
net/sunrpc/xprtrdma/frwr_ops.c
··· 40 40 * New MRs are created on demand. 41 41 */ 42 42 43 - #include <linux/sunrpc/rpc_rdma.h> 44 43 #include <linux/sunrpc/svc_rdma.h> 45 44 46 45 #include "xprt_rdma.h"
+10 -21
net/sunrpc/xprtrdma/rpc_rdma.c
··· 275 275 return n; 276 276 } 277 277 278 - static void 279 - xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr) 280 - { 281 - *iptr++ = cpu_to_be32(mr->mr_handle); 282 - *iptr++ = cpu_to_be32(mr->mr_length); 283 - xdr_encode_hyper(iptr, mr->mr_offset); 284 - } 285 - 286 278 static int 287 279 encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr) 288 280 { ··· 284 292 if (unlikely(!p)) 285 293 return -EMSGSIZE; 286 294 287 - xdr_encode_rdma_segment(p, mr); 295 + xdr_encode_rdma_segment(p, mr->mr_handle, mr->mr_length, mr->mr_offset); 288 296 return 0; 289 297 } 290 298 ··· 299 307 return -EMSGSIZE; 300 308 301 309 *p++ = xdr_one; /* Item present */ 302 - *p++ = cpu_to_be32(position); 303 - xdr_encode_rdma_segment(p, mr); 310 + xdr_encode_read_segment(p, position, mr->mr_handle, mr->mr_length, 311 + mr->mr_offset); 304 312 return 0; 305 313 } 306 314 ··· 1125 1133 p = xdr_inline_decode(xdr, 0); 1126 1134 1127 1135 /* Chunk lists */ 1128 - if (*p++ != xdr_zero) 1136 + if (xdr_item_is_present(p++)) 1129 1137 return false; 1130 - if (*p++ != xdr_zero) 1138 + if (xdr_item_is_present(p++)) 1131 1139 return false; 1132 - if (*p++ != xdr_zero) 1140 + if (xdr_item_is_present(p++)) 1133 1141 return false; 1134 1142 1135 1143 /* RPC header */ ··· 1168 1176 if (unlikely(!p)) 1169 1177 return -EIO; 1170 1178 1171 - handle = be32_to_cpup(p++); 1172 - *length = be32_to_cpup(p++); 1173 - xdr_decode_hyper(p, &offset); 1174 - 1179 + xdr_decode_rdma_segment(p, &handle, length, &offset); 1175 1180 trace_xprtrdma_decode_seg(handle, *length, offset); 1176 1181 return 0; 1177 1182 } ··· 1204 1215 p = xdr_inline_decode(xdr, sizeof(*p)); 1205 1216 if (unlikely(!p)) 1206 1217 return -EIO; 1207 - if (unlikely(*p != xdr_zero)) 1218 + if (unlikely(xdr_item_is_present(p))) 1208 1219 return -EIO; 1209 1220 return 0; 1210 1221 } ··· 1223 1234 p = xdr_inline_decode(xdr, sizeof(*p)); 1224 1235 if (unlikely(!p)) 1225 1236 return -EIO; 1226 - if (*p == xdr_zero) 1237 + if (xdr_item_is_absent(p)) 1227 1238 break; 1228 1239 if (!first) 1229 1240 return -EIO; ··· 1245 1256 return -EIO; 1246 1257 1247 1258 *length = 0; 1248 - if (*p != xdr_zero) 1259 + if (xdr_item_is_present(p)) 1249 1260 if (decode_write_chunk(xdr, length)) 1250 1261 return -EIO; 1251 1262 return 0;
+1 -1
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
··· 87 87 */ 88 88 get_page(virt_to_page(rqst->rq_buffer)); 89 89 ctxt->sc_send_wr.opcode = IB_WR_SEND; 90 - return svc_rdma_send(rdma, &ctxt->sc_send_wr); 90 + return svc_rdma_send(rdma, ctxt); 91 91 } 92 92 93 93 /* Server-side transport endpoint wants a whole page for its send
+39 -76
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
··· 117 117 rc_list); 118 118 } 119 119 120 + static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, 121 + struct rpc_rdma_cid *cid) 122 + { 123 + cid->ci_queue_id = rdma->sc_rq_cq->res.id; 124 + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); 125 + } 126 + 120 127 static struct svc_rdma_recv_ctxt * 121 128 svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) 122 129 { ··· 141 134 rdma->sc_max_req_size, DMA_FROM_DEVICE); 142 135 if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) 143 136 goto fail2; 137 + 138 + svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid); 144 139 145 140 ctxt->rc_recv_wr.next = NULL; 146 141 ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe; ··· 257 248 { 258 249 int ret; 259 250 260 - svc_xprt_get(&rdma->sc_xprt); 251 + trace_svcrdma_post_recv(ctxt); 261 252 ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); 262 - trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret); 263 253 if (ret) 264 254 goto err_post; 265 255 return 0; 266 256 267 257 err_post: 258 + trace_svcrdma_rq_post_err(rdma, ret); 268 259 svc_rdma_recv_ctxt_put(rdma, ctxt); 269 - svc_xprt_put(&rdma->sc_xprt); 270 260 return ret; 271 261 } 272 262 ··· 273 265 { 274 266 struct svc_rdma_recv_ctxt *ctxt; 275 267 268 + if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) 269 + return 0; 276 270 ctxt = svc_rdma_recv_ctxt_get(rdma); 277 271 if (!ctxt) 278 272 return -ENOMEM; ··· 319 309 struct ib_cqe *cqe = wc->wr_cqe; 320 310 struct svc_rdma_recv_ctxt *ctxt; 321 311 322 - trace_svcrdma_wc_receive(wc); 323 - 324 312 /* WARNING: Only wc->wr_cqe and wc->status are reliable */ 325 313 ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); 326 314 315 + trace_svcrdma_wc_receive(wc, &ctxt->rc_cid); 327 316 if (wc->status != IB_WC_SUCCESS) 328 317 goto flushed; 329 318 ··· 342 333 spin_unlock(&rdma->sc_rq_dto_lock); 343 334 if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) 344 335 svc_xprt_enqueue(&rdma->sc_xprt); 345 - goto out; 336 + return; 346 337 347 338 flushed: 348 339 post_err: 349 340 svc_rdma_recv_ctxt_put(rdma, ctxt); 350 341 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 351 342 svc_xprt_enqueue(&rdma->sc_xprt); 352 - out: 353 - svc_xprt_put(&rdma->sc_xprt); 354 343 } 355 344 356 345 /** ··· 426 419 427 420 len = 0; 428 421 first = true; 429 - while (*p != xdr_zero) { 422 + while (xdr_item_is_present(p)) { 430 423 p = xdr_inline_decode(&rctxt->rc_stream, 431 424 rpcrdma_readseg_maxsz * sizeof(*p)); 432 425 if (!p) ··· 473 466 if (!p) 474 467 return false; 475 468 476 - handle = be32_to_cpup(p++); 477 - length = be32_to_cpup(p++); 478 - xdr_decode_hyper(p, &offset); 469 + xdr_decode_rdma_segment(p, &handle, &length, &offset); 479 470 trace_svcrdma_decode_wseg(handle, length, offset); 480 471 481 472 total += length; ··· 505 500 if (!p) 506 501 return false; 507 502 rctxt->rc_write_list = p; 508 - while (*p != xdr_zero) { 503 + while (xdr_item_is_present(p)) { 509 504 if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK)) 510 505 return false; 511 506 ++chcount; ··· 537 532 p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 538 533 if (!p) 539 534 return false; 540 - rctxt->rc_reply_chunk = p; 541 - if (*p != xdr_zero) { 535 + rctxt->rc_reply_chunk = NULL; 536 + if (xdr_item_is_present(p)) { 542 537 if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK)) 543 538 return false; 544 - } else { 545 - rctxt->rc_reply_chunk = NULL; 539 + rctxt->rc_reply_chunk = p; 546 540 } 547 541 return true; 548 542 } ··· 572 568 p += rpcrdma_fixed_maxsz; 573 569 574 570 /* Read list */ 575 - while (*p++ != xdr_zero) { 571 + while (xdr_item_is_present(p++)) { 576 572 p++; /* position */ 577 573 if (inv_rkey == xdr_zero) 578 574 inv_rkey = *p; ··· 582 578 } 583 579 584 580 /* Write list */ 585 - while (*p++ != xdr_zero) { 581 + while (xdr_item_is_present(p++)) { 586 582 segcount = be32_to_cpup(p++); 587 583 for (i = 0; i < segcount; i++) { 588 584 if (inv_rkey == xdr_zero) ··· 594 590 } 595 591 596 592 /* Reply chunk */ 597 - if (*p++ != xdr_zero) { 593 + if (xdr_item_is_present(p++)) { 598 594 segcount = be32_to_cpup(p++); 599 595 for (i = 0; i < segcount; i++) { 600 596 if (inv_rkey == xdr_zero) ··· 665 661 hdr_len = xdr_stream_pos(&rctxt->rc_stream); 666 662 rq_arg->head[0].iov_len -= hdr_len; 667 663 rq_arg->len -= hdr_len; 668 - trace_svcrdma_decode_rqst(rdma_argp, hdr_len); 664 + trace_svcrdma_decode_rqst(rctxt, rdma_argp, hdr_len); 669 665 return hdr_len; 670 666 671 667 out_short: 672 - trace_svcrdma_decode_short_err(rq_arg->len); 668 + trace_svcrdma_decode_short_err(rctxt, rq_arg->len); 673 669 return -EINVAL; 674 670 675 671 out_version: 676 - trace_svcrdma_decode_badvers_err(rdma_argp); 672 + trace_svcrdma_decode_badvers_err(rctxt, rdma_argp); 677 673 return -EPROTONOSUPPORT; 678 674 679 675 out_drop: 680 - trace_svcrdma_decode_drop_err(rdma_argp); 676 + trace_svcrdma_decode_drop_err(rctxt, rdma_argp); 681 677 return 0; 682 678 683 679 out_proc: 684 - trace_svcrdma_decode_badproc_err(rdma_argp); 680 + trace_svcrdma_decode_badproc_err(rctxt, rdma_argp); 685 681 return -EINVAL; 686 682 687 683 out_inval: 688 - trace_svcrdma_decode_parse_err(rdma_argp); 684 + trace_svcrdma_decode_parse_err(rctxt, rdma_argp); 689 685 return -EINVAL; 690 686 } 691 687 ··· 718 714 rqstp->rq_arg.buflen = head->rc_arg.buflen; 719 715 } 720 716 721 - static void svc_rdma_send_error(struct svcxprt_rdma *xprt, 722 - __be32 *rdma_argp, int status) 717 + static void svc_rdma_send_error(struct svcxprt_rdma *rdma, 718 + struct svc_rdma_recv_ctxt *rctxt, 719 + int status) 723 720 { 724 - struct svc_rdma_send_ctxt *ctxt; 725 - __be32 *p; 726 - int ret; 721 + struct svc_rdma_send_ctxt *sctxt; 727 722 728 - ctxt = svc_rdma_send_ctxt_get(xprt); 729 - if (!ctxt) 723 + sctxt = svc_rdma_send_ctxt_get(rdma); 724 + if (!sctxt) 730 725 return; 731 - 732 - p = xdr_reserve_space(&ctxt->sc_stream, 733 - rpcrdma_fixed_maxsz * sizeof(*p)); 734 - if (!p) 735 - goto put_ctxt; 736 - 737 - *p++ = *rdma_argp; 738 - *p++ = *(rdma_argp + 1); 739 - *p++ = xprt->sc_fc_credits; 740 - *p = rdma_error; 741 - 742 - switch (status) { 743 - case -EPROTONOSUPPORT: 744 - p = xdr_reserve_space(&ctxt->sc_stream, 3 * sizeof(*p)); 745 - if (!p) 746 - goto put_ctxt; 747 - 748 - *p++ = err_vers; 749 - *p++ = rpcrdma_version; 750 - *p = rpcrdma_version; 751 - trace_svcrdma_err_vers(*rdma_argp); 752 - break; 753 - default: 754 - p = xdr_reserve_space(&ctxt->sc_stream, sizeof(*p)); 755 - if (!p) 756 - goto put_ctxt; 757 - 758 - *p = err_chunk; 759 - trace_svcrdma_err_chunk(*rdma_argp); 760 - } 761 - 762 - ctxt->sc_send_wr.num_sge = 1; 763 - ctxt->sc_send_wr.opcode = IB_WR_SEND; 764 - ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len; 765 - ret = svc_rdma_send(xprt, &ctxt->sc_send_wr); 766 - if (ret) 767 - goto put_ctxt; 768 - return; 769 - 770 - put_ctxt: 771 - svc_rdma_send_ctxt_put(xprt, ctxt); 726 + svc_rdma_send_error_msg(rdma, sctxt, rctxt, status); 772 727 } 773 728 774 729 /* By convention, backchannel calls arrive via rdma_msg type ··· 863 900 return 0; 864 901 865 902 out_err: 866 - svc_rdma_send_error(rdma_xprt, p, ret); 903 + svc_rdma_send_error(rdma_xprt, ctxt, ret); 867 904 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 868 905 return 0; 869 906 870 907 out_postfail: 871 908 if (ret == -EINVAL) 872 - svc_rdma_send_error(rdma_xprt, p, ret); 909 + svc_rdma_send_error(rdma_xprt, ctxt, ret); 873 910 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 874 911 return ret; 875 912
+49 -32
net/sunrpc/xprtrdma/svc_rdma_rw.c
··· 7 7 8 8 #include <rdma/rw.h> 9 9 10 + #include <linux/sunrpc/xdr.h> 10 11 #include <linux/sunrpc/rpc_rdma.h> 11 12 #include <linux/sunrpc/svc_rdma.h> 12 13 ··· 145 144 * demand, and not cached. 146 145 */ 147 146 struct svc_rdma_chunk_ctxt { 147 + struct rpc_rdma_cid cc_cid; 148 148 struct ib_cqe cc_cqe; 149 149 struct svcxprt_rdma *cc_rdma; 150 150 struct list_head cc_rwctxts; 151 151 int cc_sqecount; 152 152 }; 153 153 154 + static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma, 155 + struct rpc_rdma_cid *cid) 156 + { 157 + cid->ci_queue_id = rdma->sc_sq_cq->res.id; 158 + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); 159 + } 160 + 154 161 static void svc_rdma_cc_init(struct svcxprt_rdma *rdma, 155 162 struct svc_rdma_chunk_ctxt *cc) 156 163 { 164 + svc_rdma_cc_cid_init(rdma, &cc->cc_cid); 157 165 cc->cc_rdma = rdma; 158 - svc_xprt_get(&rdma->sc_xprt); 159 166 160 167 INIT_LIST_HEAD(&cc->cc_rwctxts); 161 168 cc->cc_sqecount = 0; ··· 183 174 ctxt->rw_nents, dir); 184 175 svc_rdma_put_rw_ctxt(rdma, ctxt); 185 176 } 186 - svc_xprt_put(&rdma->sc_xprt); 187 177 } 188 178 189 179 /* State for sending a Write or Reply chunk. ··· 244 236 struct svc_rdma_write_info *info = 245 237 container_of(cc, struct svc_rdma_write_info, wi_cc); 246 238 247 - trace_svcrdma_wc_write(wc); 239 + trace_svcrdma_wc_write(wc, &cc->cc_cid); 248 240 249 241 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); 250 242 wake_up(&rdma->sc_send_wait); ··· 302 294 struct svc_rdma_read_info *info = 303 295 container_of(cc, struct svc_rdma_read_info, ri_cc); 304 296 305 - trace_svcrdma_wc_read(wc); 297 + trace_svcrdma_wc_read(wc, &cc->cc_cid); 306 298 307 299 atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); 308 300 wake_up(&rdma->sc_send_wait); ··· 358 350 do { 359 351 if (atomic_sub_return(cc->cc_sqecount, 360 352 &rdma->sc_sq_avail) > 0) { 353 + trace_svcrdma_post_chunk(&cc->cc_cid, cc->cc_sqecount); 361 354 ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); 362 355 if (ret) 363 356 break; ··· 450 441 seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz; 451 442 do { 452 443 unsigned int write_len; 453 - u32 seg_length, seg_handle; 454 - u64 seg_offset; 444 + u32 handle, length; 445 + u64 offset; 455 446 456 447 if (info->wi_seg_no >= info->wi_nsegs) 457 448 goto out_overflow; 458 449 459 - seg_handle = be32_to_cpup(seg); 460 - seg_length = be32_to_cpup(seg + 1); 461 - xdr_decode_hyper(seg + 2, &seg_offset); 462 - seg_offset += info->wi_seg_off; 450 + xdr_decode_rdma_segment(seg, &handle, &length, &offset); 451 + offset += info->wi_seg_off; 463 452 464 - write_len = min(remaining, seg_length - info->wi_seg_off); 453 + write_len = min(remaining, length - info->wi_seg_off); 465 454 ctxt = svc_rdma_get_rw_ctxt(rdma, 466 455 (write_len >> PAGE_SHIFT) + 2); 467 456 if (!ctxt) 468 457 return -ENOMEM; 469 458 470 459 constructor(info, write_len, ctxt); 471 - ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle, 460 + ret = svc_rdma_rw_ctx_init(rdma, ctxt, offset, handle, 472 461 DMA_TO_DEVICE); 473 462 if (ret < 0) 474 463 return -EIO; 475 464 476 - trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset); 465 + trace_svcrdma_send_wseg(handle, write_len, offset); 477 466 478 467 list_add(&ctxt->rw_list, &cc->cc_rwctxts); 479 468 cc->cc_sqecount += ret; 480 - if (write_len == seg_length - info->wi_seg_off) { 469 + if (write_len == length - info->wi_seg_off) { 481 470 seg += 4; 482 471 info->wi_seg_no++; 483 472 info->wi_seg_off = 0; ··· 691 684 struct svc_rdma_read_info *info, 692 685 __be32 *p) 693 686 { 694 - unsigned int i; 695 687 int ret; 696 688 697 689 ret = -EINVAL; 698 690 info->ri_chunklen = 0; 699 691 while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) { 700 - u32 rs_handle, rs_length; 701 - u64 rs_offset; 692 + u32 handle, length; 693 + u64 offset; 702 694 703 - rs_handle = be32_to_cpup(p++); 704 - rs_length = be32_to_cpup(p++); 705 - p = xdr_decode_hyper(p, &rs_offset); 706 - 707 - ret = svc_rdma_build_read_segment(info, rqstp, 708 - rs_handle, rs_length, 709 - rs_offset); 695 + p = xdr_decode_rdma_segment(p, &handle, &length, &offset); 696 + ret = svc_rdma_build_read_segment(info, rqstp, handle, length, 697 + offset); 710 698 if (ret < 0) 711 699 break; 712 700 713 - trace_svcrdma_send_rseg(rs_handle, rs_length, rs_offset); 714 - info->ri_chunklen += rs_length; 701 + trace_svcrdma_send_rseg(handle, length, offset); 702 + info->ri_chunklen += length; 715 703 } 716 - 717 - /* Pages under I/O have been copied to head->rc_pages. 718 - * Prevent their premature release by svc_xprt_release() . 719 - */ 720 - for (i = 0; i < info->ri_readctxt->rc_page_count; i++) 721 - rqstp->rq_pages[i] = NULL; 722 704 723 705 return ret; 724 706 } ··· 803 807 return ret; 804 808 } 805 809 810 + /* Pages under I/O have been copied to head->rc_pages. Ensure they 811 + * are not released by svc_xprt_release() until the I/O is complete. 812 + * 813 + * This has to be done after all Read WRs are constructed to properly 814 + * handle a page that is part of I/O on behalf of two different RDMA 815 + * segments. 816 + * 817 + * Do this only if I/O has been posted. Otherwise, we do indeed want 818 + * svc_xprt_release() to clean things up properly. 819 + */ 820 + static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, 821 + const unsigned int start, 822 + const unsigned int num_pages) 823 + { 824 + unsigned int i; 825 + 826 + for (i = start; i < num_pages + start; i++) 827 + rqstp->rq_pages[i] = NULL; 828 + } 829 + 806 830 /** 807 831 * svc_rdma_recv_read_chunk - Pull a Read chunk from the client 808 832 * @rdma: controlling RDMA transport ··· 876 860 ret = svc_rdma_post_chunk_ctxt(&info->ri_cc); 877 861 if (ret < 0) 878 862 goto out_err; 863 + svc_rdma_save_io_pages(rqstp, 0, head->rc_page_count); 879 864 return 0; 880 865 881 866 out_err:
+78 -44
net/sunrpc/xprtrdma/svc_rdma_sendto.c
··· 106 106 #include <rdma/rdma_cm.h> 107 107 108 108 #include <linux/sunrpc/debug.h> 109 - #include <linux/sunrpc/rpc_rdma.h> 110 109 #include <linux/sunrpc/svc_rdma.h> 111 110 112 111 #include "xprt_rdma.h" ··· 120 121 { 121 122 return list_first_entry_or_null(list, struct svc_rdma_send_ctxt, 122 123 sc_list); 124 + } 125 + 126 + static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, 127 + struct rpc_rdma_cid *cid) 128 + { 129 + cid->ci_queue_id = rdma->sc_sq_cq->res.id; 130 + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); 123 131 } 124 132 125 133 static struct svc_rdma_send_ctxt * ··· 150 144 rdma->sc_max_req_size, DMA_TO_DEVICE); 151 145 if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) 152 146 goto fail2; 147 + 148 + svc_rdma_send_cid_init(rdma, &ctxt->sc_cid); 153 149 154 150 ctxt->sc_send_wr.next = NULL; 155 151 ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; ··· 277 269 { 278 270 struct svcxprt_rdma *rdma = cq->cq_context; 279 271 struct ib_cqe *cqe = wc->wr_cqe; 280 - struct svc_rdma_send_ctxt *ctxt; 272 + struct svc_rdma_send_ctxt *ctxt = 273 + container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); 281 274 282 - trace_svcrdma_wc_send(wc); 275 + trace_svcrdma_wc_send(wc, &ctxt->sc_cid); 283 276 284 277 atomic_inc(&rdma->sc_sq_avail); 285 278 wake_up(&rdma->sc_send_wait); 286 279 287 - ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); 288 280 svc_rdma_send_ctxt_put(rdma, ctxt); 289 281 290 282 if (unlikely(wc->status != IB_WC_SUCCESS)) { 291 283 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 292 284 svc_xprt_enqueue(&rdma->sc_xprt); 293 285 } 294 - 295 - svc_xprt_put(&rdma->sc_xprt); 296 286 } 297 287 298 288 /** 299 289 * svc_rdma_send - Post a single Send WR 300 290 * @rdma: transport on which to post the WR 301 - * @wr: prepared Send WR to post 291 + * @ctxt: send ctxt with a Send WR ready to post 302 292 * 303 293 * Returns zero the Send WR was posted successfully. Otherwise, a 304 294 * negative errno is returned. 305 295 */ 306 - int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) 296 + int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) 307 297 { 298 + struct ib_send_wr *wr = &ctxt->sc_send_wr; 308 299 int ret; 309 300 310 301 might_sleep(); ··· 328 321 continue; 329 322 } 330 323 331 - svc_xprt_get(&rdma->sc_xprt); 332 - trace_svcrdma_post_send(wr); 324 + trace_svcrdma_post_send(ctxt); 333 325 ret = ib_post_send(rdma->sc_qp, wr, NULL); 334 326 if (ret) 335 327 break; ··· 337 331 338 332 trace_svcrdma_sq_post_err(rdma, ret); 339 333 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 340 - svc_xprt_put(&rdma->sc_xprt); 341 334 wake_up(&rdma->sc_send_wait); 342 335 return ret; 343 336 } ··· 380 375 if (!p) 381 376 return -EMSGSIZE; 382 377 383 - handle = be32_to_cpup(src++); 384 - length = be32_to_cpup(src++); 385 - xdr_decode_hyper(src, &offset); 378 + xdr_decode_rdma_segment(src, &handle, &length, &offset); 386 379 387 - *p++ = cpu_to_be32(handle); 388 380 if (*remaining < length) { 389 381 /* segment only partly filled */ 390 382 length = *remaining; ··· 390 388 /* entire segment was consumed */ 391 389 *remaining -= length; 392 390 } 393 - *p++ = cpu_to_be32(length); 394 - xdr_encode_hyper(p, offset); 391 + xdr_encode_rdma_segment(p, handle, length, offset); 395 392 396 393 trace_svcrdma_encode_wseg(handle, length, offset); 397 394 return len; ··· 802 801 } else { 803 802 sctxt->sc_send_wr.opcode = IB_WR_SEND; 804 803 } 805 - return svc_rdma_send(rdma, &sctxt->sc_send_wr); 804 + return svc_rdma_send(rdma, sctxt); 806 805 } 807 806 808 - /* Given the client-provided Write and Reply chunks, the server was not 809 - * able to form a complete reply. Return an RDMA_ERROR message so the 810 - * client can retire this RPC transaction. As above, the Send completion 811 - * routine releases payload pages that were part of a previous RDMA Write. 807 + /** 808 + * svc_rdma_send_error_msg - Send an RPC/RDMA v1 error response 809 + * @rdma: controlling transport context 810 + * @sctxt: Send context for the response 811 + * @rctxt: Receive context for incoming bad message 812 + * @status: negative errno indicating error that occurred 812 813 * 813 - * Remote Invalidation is skipped for simplicity. 814 + * Given the client-provided Read, Write, and Reply chunks, the 815 + * server was not able to parse the Call or form a complete Reply. 816 + * Return an RDMA_ERROR message so the client can retire the RPC 817 + * transaction. 818 + * 819 + * The caller does not have to release @sctxt. It is released by 820 + * Send completion, or by this function on error. 814 821 */ 815 - static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, 816 - struct svc_rdma_send_ctxt *ctxt, 817 - struct svc_rqst *rqstp) 822 + void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, 823 + struct svc_rdma_send_ctxt *sctxt, 824 + struct svc_rdma_recv_ctxt *rctxt, 825 + int status) 818 826 { 819 - struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; 820 827 __be32 *rdma_argp = rctxt->rc_recv_buf; 821 828 __be32 *p; 822 829 823 - rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0); 824 - xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, 825 - NULL); 830 + rpcrdma_set_xdrlen(&sctxt->sc_hdrbuf, 0); 831 + xdr_init_encode(&sctxt->sc_stream, &sctxt->sc_hdrbuf, 832 + sctxt->sc_xprt_buf, NULL); 826 833 827 - p = xdr_reserve_space(&ctxt->sc_stream, RPCRDMA_HDRLEN_ERR); 834 + p = xdr_reserve_space(&sctxt->sc_stream, 835 + rpcrdma_fixed_maxsz * sizeof(*p)); 828 836 if (!p) 829 - return -ENOMSG; 837 + goto put_ctxt; 830 838 831 839 *p++ = *rdma_argp; 832 840 *p++ = *(rdma_argp + 1); 833 841 *p++ = rdma->sc_fc_credits; 834 - *p++ = rdma_error; 835 - *p = err_chunk; 836 - trace_svcrdma_err_chunk(*rdma_argp); 842 + *p = rdma_error; 837 843 838 - svc_rdma_save_io_pages(rqstp, ctxt); 844 + switch (status) { 845 + case -EPROTONOSUPPORT: 846 + p = xdr_reserve_space(&sctxt->sc_stream, 3 * sizeof(*p)); 847 + if (!p) 848 + goto put_ctxt; 839 849 840 - ctxt->sc_send_wr.num_sge = 1; 841 - ctxt->sc_send_wr.opcode = IB_WR_SEND; 842 - ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len; 843 - return svc_rdma_send(rdma, &ctxt->sc_send_wr); 850 + *p++ = err_vers; 851 + *p++ = rpcrdma_version; 852 + *p = rpcrdma_version; 853 + trace_svcrdma_err_vers(*rdma_argp); 854 + break; 855 + default: 856 + p = xdr_reserve_space(&sctxt->sc_stream, sizeof(*p)); 857 + if (!p) 858 + goto put_ctxt; 859 + 860 + *p = err_chunk; 861 + trace_svcrdma_err_chunk(*rdma_argp); 862 + } 863 + 864 + /* Remote Invalidation is skipped for simplicity. */ 865 + sctxt->sc_send_wr.num_sge = 1; 866 + sctxt->sc_send_wr.opcode = IB_WR_SEND; 867 + sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; 868 + if (svc_rdma_send(rdma, sctxt)) 869 + goto put_ctxt; 870 + return; 871 + 872 + put_ctxt: 873 + svc_rdma_send_ctxt_put(rdma, sctxt); 844 874 } 845 875 846 876 /** ··· 962 930 if (ret != -E2BIG && ret != -EINVAL) 963 931 goto err1; 964 932 965 - ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp); 966 - if (ret < 0) 967 - goto err1; 933 + /* Send completion releases payload pages that were part 934 + * of previously posted RDMA Writes. 935 + */ 936 + svc_rdma_save_io_pages(rqstp, sctxt); 937 + svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret); 968 938 return 0; 969 939 970 940 err1: 971 941 svc_rdma_send_ctxt_put(rdma, sctxt); 972 942 err0: 973 - trace_svcrdma_send_failed(rqstp, ret); 943 + trace_svcrdma_send_err(rqstp, ret); 974 944 set_bit(XPT_CLOSE, &xprt->xpt_flags); 975 945 return -ENOTCONN; 976 946 }
+26 -48
net/sunrpc/xprtrdma/svc_rdma_transport.c
··· 55 55 56 56 #include <linux/sunrpc/addr.h> 57 57 #include <linux/sunrpc/debug.h> 58 - #include <linux/sunrpc/rpc_rdma.h> 59 58 #include <linux/sunrpc/svc_xprt.h> 60 59 #include <linux/sunrpc/svc_rdma.h> 61 60 ··· 237 238 svc_xprt_enqueue(&listen_xprt->sc_xprt); 238 239 } 239 240 240 - /* 241 - * Handles events generated on the listening endpoint. These events will be 242 - * either be incoming connect requests or adapter removal events. 241 + /** 242 + * svc_rdma_listen_handler - Handle CM events generated on a listening endpoint 243 + * @cma_id: the server's listener rdma_cm_id 244 + * @event: details of the event 245 + * 246 + * Return values: 247 + * %0: Do not destroy @cma_id 248 + * %1: Destroy @cma_id (never returned here) 249 + * 250 + * NB: There is never a DEVICE_REMOVAL event for INADDR_ANY listeners. 243 251 */ 244 - static int rdma_listen_handler(struct rdma_cm_id *cma_id, 245 - struct rdma_cm_event *event) 252 + static int svc_rdma_listen_handler(struct rdma_cm_id *cma_id, 253 + struct rdma_cm_event *event) 246 254 { 247 255 switch (event->event) { 248 256 case RDMA_CM_EVENT_CONNECT_REQUEST: 249 - dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " 250 - "event = %s (%d)\n", cma_id, cma_id->context, 251 - rdma_event_msg(event->event), event->event); 252 257 handle_connect_req(cma_id, &event->param.conn); 253 258 break; 254 259 default: 255 - /* NB: No device removal upcall for INADDR_ANY listeners */ 256 - dprintk("svcrdma: Unexpected event on listening endpoint %p, " 257 - "event = %s (%d)\n", cma_id, 258 - rdma_event_msg(event->event), event->event); 259 260 break; 260 261 } 261 - 262 262 return 0; 263 263 } 264 264 265 - static int rdma_cma_handler(struct rdma_cm_id *cma_id, 266 - struct rdma_cm_event *event) 265 + /** 266 + * svc_rdma_cma_handler - Handle CM events on client connections 267 + * @cma_id: the server's listener rdma_cm_id 268 + * @event: details of the event 269 + * 270 + * Return values: 271 + * %0: Do not destroy @cma_id 272 + * %1: Destroy @cma_id (never returned here) 273 + */ 274 + static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id, 275 + struct rdma_cm_event *event) 267 276 { 268 277 struct svcxprt_rdma *rdma = cma_id->context; 269 278 struct svc_xprt *xprt = &rdma->sc_xprt; 270 279 271 280 switch (event->event) { 272 281 case RDMA_CM_EVENT_ESTABLISHED: 273 - /* Accept complete */ 274 - svc_xprt_get(xprt); 275 - dprintk("svcrdma: Connection completed on DTO xprt=%p, " 276 - "cm_id=%p\n", xprt, cma_id); 277 282 clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); 278 283 svc_xprt_enqueue(xprt); 279 284 break; 280 285 case RDMA_CM_EVENT_DISCONNECTED: 281 - dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n", 282 - xprt, cma_id); 283 - set_bit(XPT_CLOSE, &xprt->xpt_flags); 284 - svc_xprt_enqueue(xprt); 285 - svc_xprt_put(xprt); 286 - break; 287 286 case RDMA_CM_EVENT_DEVICE_REMOVAL: 288 - dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, " 289 - "event = %s (%d)\n", cma_id, xprt, 290 - rdma_event_msg(event->event), event->event); 291 287 set_bit(XPT_CLOSE, &xprt->xpt_flags); 292 288 svc_xprt_enqueue(xprt); 293 - svc_xprt_put(xprt); 294 289 break; 295 290 default: 296 - dprintk("svcrdma: Unexpected event on DTO endpoint %p, " 297 - "event = %s (%d)\n", cma_id, 298 - rdma_event_msg(event->event), event->event); 299 291 break; 300 292 } 301 293 return 0; ··· 312 322 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); 313 323 strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener"); 314 324 315 - listen_id = rdma_create_id(net, rdma_listen_handler, cma_xprt, 325 + listen_id = rdma_create_id(net, svc_rdma_listen_handler, cma_xprt, 316 326 RDMA_PS_TCP, IB_QPT_RC); 317 327 if (IS_ERR(listen_id)) { 318 328 ret = PTR_ERR(listen_id); ··· 476 486 goto errout; 477 487 478 488 /* Swap out the handler */ 479 - newxprt->sc_cm_id->event_handler = rdma_cma_handler; 489 + newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler; 480 490 481 491 /* Construct RDMA-CM private message */ 482 492 pmsg.cp_magic = rpcrdma_cmp_magic; ··· 530 540 return NULL; 531 541 } 532 542 533 - /* 534 - * When connected, an svc_xprt has at least two references: 535 - * 536 - * - A reference held by the cm_id between the ESTABLISHED and 537 - * DISCONNECTED events. If the remote peer disconnected first, this 538 - * reference could be gone. 539 - * 540 - * - A reference held by the svc_recv code that called this function 541 - * as part of close processing. 542 - * 543 - * At a minimum one references should still be held. 544 - */ 545 543 static void svc_rdma_detach(struct svc_xprt *xprt) 546 544 { 547 545 struct svcxprt_rdma *rdma = 548 546 container_of(xprt, struct svcxprt_rdma, sc_xprt); 549 547 550 - /* Disconnect and flush posted WQE */ 551 548 rdma_disconnect(rdma->sc_cm_id); 552 549 } 553 550 ··· 544 567 container_of(work, struct svcxprt_rdma, sc_work); 545 568 struct svc_xprt *xprt = &rdma->sc_xprt; 546 569 570 + /* This blocks until the Completion Queues are empty */ 547 571 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 548 572 ib_drain_qp(rdma->sc_qp); 549 573