Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nfs-for-5.3-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
"Highlights include:

Stable fixes:

- SUNRPC: Ensure bvecs are re-synced when we re-encode the RPC
request

- Fix an Oops in ff_layout_track_ds_error due to a PTR_ERR()
dereference

- Revert buggy NFS readdirplus optimisation

- NFSv4: Handle the special Linux file open access mode

- pnfs: Fix a problem where we gratuitously start doing I/O through
the MDS

Features:

- Allow NFS client to set up multiple TCP connections to the server
using a new 'nconnect=X' mount option. Queue length is used to
balance load.

- Enhance statistics reporting to report on all transports when using
multiple connections.

- Speed up SUNRPC by removing bh-safe spinlocks

- Add a mechanism to allow NFSv4 to request that containers set a
unique per-host identifier for when the hostname is not set.

- Ensure NFSv4 updates the lease_time after a clientid update

Bugfixes and cleanup:

- Fix use-after-free in rpcrdma_post_recvs

- Fix a memory leak when nfs_match_client() is interrupted

- Fix buggy file access checking in NFSv4 open for execute

- disable unsupported client side deduplication

- Fix spurious client disconnections

- Fix occasional RDMA transport deadlock

- Various RDMA cleanups

- Various tracepoint fixes

- Fix the TCP callback channel to guarantee the server can actually
send the number of callback requests that was negotiated at mount
time"

* tag 'nfs-for-5.3-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (68 commits)
pnfs/flexfiles: Add tracepoints for detecting pnfs fallback to MDS
pnfs: Fix a problem where we gratuitously start doing I/O through the MDS
SUNRPC: Optimise transport balancing code
SUNRPC: Ensure the bvecs are reset when we re-encode the RPC request
pnfs/flexfiles: Fix PTR_ERR() dereferences in ff_layout_track_ds_error
NFSv4: Don't use the zero stateid with layoutget
SUNRPC: Fix up backchannel slot table accounting
SUNRPC: Fix initialisation of struct rpc_xprt_switch
SUNRPC: Skip zero-refcount transports
SUNRPC: Replace division by multiplication in calculation of queue length
NFSv4: Validate the stateid before applying it to state recovery
nfs4.0: Refetch lease_time after clientid update
nfs4: Rename nfs41_setup_state_renewal
nfs4: Make nfs4_proc_get_lease_time available for nfs4.0
nfs: Fix copy-and-paste error in debug message
NFS: Replace 16 seq_printf() calls by seq_puts()
NFS: Use seq_putc() in nfs_show_stats()
Revert "NFS: readdirplus optimization by cache mechanism" (memleak)
SUNRPC: Fix transport accounting when caller specifies an rpc_xprt
NFS: Record task, client ID, and XID in xdr_status trace points
...

+1838 -877
+2 -1
fs/nfs/Makefile
··· 8 8 CFLAGS_nfstrace.o += -I$(src) 9 9 nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ 10 10 io.o direct.o pagelist.o read.o symlink.o unlink.o \ 11 - write.o namespace.o mount_clnt.o nfstrace.o export.o 11 + write.o namespace.o mount_clnt.o nfstrace.o \ 12 + export.o sysfs.o 12 13 nfs-$(CONFIG_ROOT_NFS) += nfsroot.o 13 14 nfs-$(CONFIG_SYSCTL) += sysctl.o 14 15 nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
+20 -8
fs/nfs/callback_proc.c
··· 414 414 validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot, 415 415 const struct cb_sequenceargs * args) 416 416 { 417 + __be32 ret; 418 + 419 + ret = cpu_to_be32(NFS4ERR_BADSLOT); 417 420 if (args->csa_slotid > tbl->server_highest_slotid) 418 - return htonl(NFS4ERR_BADSLOT); 421 + goto out_err; 419 422 420 423 /* Replay */ 421 424 if (args->csa_sequenceid == slot->seq_nr) { 425 + ret = cpu_to_be32(NFS4ERR_DELAY); 422 426 if (nfs4_test_locked_slot(tbl, slot->slot_nr)) 423 - return htonl(NFS4ERR_DELAY); 427 + goto out_err; 428 + 424 429 /* Signal process_op to set this error on next op */ 430 + ret = cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP); 425 431 if (args->csa_cachethis == 0) 426 - return htonl(NFS4ERR_RETRY_UNCACHED_REP); 432 + goto out_err; 427 433 428 434 /* Liar! We never allowed you to set csa_cachethis != 0 */ 429 - return htonl(NFS4ERR_SEQ_FALSE_RETRY); 435 + ret = cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY); 436 + goto out_err; 430 437 } 431 438 432 439 /* Note: wraparound relies on seq_nr being of type u32 */ 433 - if (likely(args->csa_sequenceid == slot->seq_nr + 1)) 434 - return htonl(NFS4_OK); 435 - 436 440 /* Misordered request */ 437 - return htonl(NFS4ERR_SEQ_MISORDERED); 441 + ret = cpu_to_be32(NFS4ERR_SEQ_MISORDERED); 442 + if (args->csa_sequenceid != slot->seq_nr + 1) 443 + goto out_err; 444 + 445 + return cpu_to_be32(NFS4_OK); 446 + 447 + out_err: 448 + trace_nfs4_cb_seqid_err(args, ret); 449 + return ret; 438 450 } 439 451 440 452 /*
+20 -4
fs/nfs/client.c
··· 49 49 #include "pnfs.h" 50 50 #include "nfs.h" 51 51 #include "netns.h" 52 + #include "sysfs.h" 52 53 53 54 #define NFSDBG_FACILITY NFSDBG_CLIENT 54 55 ··· 176 175 clp->cl_rpcclient = ERR_PTR(-EINVAL); 177 176 178 177 clp->cl_proto = cl_init->proto; 178 + clp->cl_nconnect = cl_init->nconnect; 179 179 clp->cl_net = get_net(cl_init->net); 180 180 181 181 clp->cl_principal = "*"; ··· 194 192 EXPORT_SYMBOL_GPL(nfs_alloc_client); 195 193 196 194 #if IS_ENABLED(CONFIG_NFS_V4) 197 - void nfs_cleanup_cb_ident_idr(struct net *net) 195 + static void nfs_cleanup_cb_ident_idr(struct net *net) 198 196 { 199 197 struct nfs_net *nn = net_generic(net, nfs_net_id); 200 198 ··· 216 214 } 217 215 218 216 #else 219 - void nfs_cleanup_cb_ident_idr(struct net *net) 217 + static void nfs_cleanup_cb_ident_idr(struct net *net) 220 218 { 221 219 } 222 220 ··· 408 406 clp = nfs_match_client(cl_init); 409 407 if (clp) { 410 408 spin_unlock(&nn->nfs_client_lock); 411 - if (IS_ERR(clp)) 412 - return clp; 413 409 if (new) 414 410 new->rpc_ops->free_client(new); 411 + if (IS_ERR(clp)) 412 + return clp; 415 413 return nfs_found_client(cl_init, clp); 416 414 } 417 415 if (new) { ··· 495 493 struct rpc_create_args args = { 496 494 .net = clp->cl_net, 497 495 .protocol = clp->cl_proto, 496 + .nconnect = clp->cl_nconnect, 498 497 .address = (struct sockaddr *)&clp->cl_addr, 499 498 .addrsize = clp->cl_addrlen, 500 499 .timeout = cl_init->timeparms, ··· 661 658 .net = data->net, 662 659 .timeparms = &timeparms, 663 660 .cred = server->cred, 661 + .nconnect = data->nfs_server.nconnect, 664 662 }; 665 663 struct nfs_client *clp; 666 664 int error; ··· 1076 1072 #endif 1077 1073 spin_lock_init(&nn->nfs_client_lock); 1078 1074 nn->boot_time = ktime_get_real(); 1075 + 1076 + nfs_netns_sysfs_setup(nn, net); 1077 + } 1078 + 1079 + void nfs_clients_exit(struct net *net) 1080 + { 1081 + struct nfs_net *nn = net_generic(net, nfs_net_id); 1082 + 1083 + nfs_netns_sysfs_destroy(nn); 1084 + nfs_cleanup_cb_ident_idr(net); 1085 + WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); 1086 + WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); 1079 1087 } 1080 1088 1081 1089 #ifdef CONFIG_PROC_FS
+10 -84
fs/nfs/dir.c
··· 80 80 ctx->dup_cookie = 0; 81 81 ctx->cred = get_cred(cred); 82 82 spin_lock(&dir->i_lock); 83 + if (list_empty(&nfsi->open_files) && 84 + (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) 85 + nfsi->cache_validity |= NFS_INO_INVALID_DATA | 86 + NFS_INO_REVAL_FORCED; 83 87 list_add(&ctx->list, &nfsi->open_files); 84 88 spin_unlock(&dir->i_lock); 85 89 return ctx; ··· 144 140 struct nfs_cache_array_entry array[0]; 145 141 }; 146 142 147 - struct readdirvec { 148 - unsigned long nr; 149 - unsigned long index; 150 - struct page *pages[NFS_MAX_READDIR_RAPAGES]; 151 - }; 152 - 153 143 typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool); 154 144 typedef struct { 155 145 struct file *file; 156 146 struct page *page; 157 147 struct dir_context *ctx; 158 148 unsigned long page_index; 159 - struct readdirvec pvec; 160 149 u64 *dir_cookie; 161 150 u64 last_cookie; 162 151 loff_t current_index; ··· 529 532 struct nfs_cache_array *array; 530 533 unsigned int count = 0; 531 534 int status; 532 - int max_rapages = NFS_MAX_READDIR_RAPAGES; 533 - 534 - desc->pvec.index = desc->page_index; 535 - desc->pvec.nr = 0; 536 535 537 536 scratch = alloc_page(GFP_KERNEL); 538 537 if (scratch == NULL) ··· 553 560 if (desc->plus) 554 561 nfs_prime_dcache(file_dentry(desc->file), entry); 555 562 556 - status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]); 557 - if (status == -ENOSPC) { 558 - desc->pvec.nr++; 559 - if (desc->pvec.nr == max_rapages) 560 - break; 561 - status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]); 562 - } 563 + status = nfs_readdir_add_to_array(entry, page); 563 564 if (status != 0) 564 565 break; 565 566 } while (!entry->eof); 566 567 567 - /* 568 - * page and desc->pvec.pages[0] are valid, don't need to check 569 - * whether or not to be NULL. 570 - */ 571 - copy_highpage(page, desc->pvec.pages[0]); 572 - 573 568 out_nopages: 574 569 if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { 575 - array = kmap_atomic(desc->pvec.pages[desc->pvec.nr]); 570 + array = kmap(page); 576 571 array->eof_index = array->size; 577 572 status = 0; 578 - kunmap_atomic(array); 573 + kunmap(page); 579 574 } 580 575 581 576 put_page(scratch); 582 - 583 - /* 584 - * desc->pvec.nr > 0 means at least one page was completely filled, 585 - * we should return -ENOSPC. Otherwise function 586 - * nfs_readdir_xdr_to_array will enter infinite loop. 587 - */ 588 - if (desc->pvec.nr > 0) 589 - return -ENOSPC; 590 577 return status; 591 578 } 592 579 ··· 600 627 return -ENOMEM; 601 628 } 602 629 603 - /* 604 - * nfs_readdir_rapages_init initialize rapages by nfs_cache_array structure. 605 - */ 606 - static 607 - void nfs_readdir_rapages_init(nfs_readdir_descriptor_t *desc) 608 - { 609 - struct nfs_cache_array *array; 610 - int max_rapages = NFS_MAX_READDIR_RAPAGES; 611 - int index; 612 - 613 - for (index = 0; index < max_rapages; index++) { 614 - array = kmap_atomic(desc->pvec.pages[index]); 615 - memset(array, 0, sizeof(struct nfs_cache_array)); 616 - array->eof_index = -1; 617 - kunmap_atomic(array); 618 - } 619 - } 620 - 621 630 static 622 631 int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) 623 632 { ··· 609 654 struct nfs_cache_array *array; 610 655 int status = -ENOMEM; 611 656 unsigned int array_size = ARRAY_SIZE(pages); 612 - 613 - /* 614 - * This means we hit readdir rdpages miss, the preallocated rdpages 615 - * are useless, the preallocate rdpages should be reinitialized. 616 - */ 617 - nfs_readdir_rapages_init(desc); 618 657 619 658 entry.prev_cookie = 0; 620 659 entry.cookie = desc->last_cookie; ··· 670 721 struct inode *inode = file_inode(desc->file); 671 722 int ret; 672 723 673 - /* 674 - * If desc->page_index in range desc->pvec.index and 675 - * desc->pvec.index + desc->pvec.nr, we get readdir cache hit. 676 - */ 677 - if (desc->page_index >= desc->pvec.index && 678 - desc->page_index < (desc->pvec.index + desc->pvec.nr)) { 679 - /* 680 - * page and desc->pvec.pages[x] are valid, don't need to check 681 - * whether or not to be NULL. 682 - */ 683 - copy_highpage(page, desc->pvec.pages[desc->page_index - desc->pvec.index]); 684 - ret = 0; 685 - } else { 686 - ret = nfs_readdir_xdr_to_array(desc, page, inode); 687 - if (ret < 0) 688 - goto error; 689 - } 690 - 724 + ret = nfs_readdir_xdr_to_array(desc, page, inode); 725 + if (ret < 0) 726 + goto error; 691 727 SetPageUptodate(page); 692 728 693 729 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { ··· 837 903 *desc = &my_desc; 838 904 struct nfs_open_dir_context *dir_ctx = file->private_data; 839 905 int res = 0; 840 - int max_rapages = NFS_MAX_READDIR_RAPAGES; 841 906 842 907 dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", 843 908 file, (long long)ctx->pos); ··· 855 922 desc->dir_cookie = &dir_ctx->dir_cookie; 856 923 desc->decode = NFS_PROTO(inode)->decode_dirent; 857 924 desc->plus = nfs_use_readdirplus(inode, ctx); 858 - 859 - res = nfs_readdir_alloc_pages(desc->pvec.pages, max_rapages); 860 - if (res < 0) 861 - return -ENOMEM; 862 - 863 - nfs_readdir_rapages_init(desc); 864 925 865 926 if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) 866 927 res = nfs_revalidate_mapping(inode, file->f_mapping); ··· 891 964 break; 892 965 } while (!desc->eof); 893 966 out: 894 - nfs_readdir_free_pages(desc->pvec.pages, max_rapages); 895 967 if (res > 0) 896 968 res = 0; 897 969 dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
+26
fs/nfs/flexfilelayout/flexfilelayout.c
··· 934 934 if (pgio->pg_error < 0) 935 935 return; 936 936 out_mds: 937 + trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode, 938 + 0, NFS4_MAX_UINT64, IOMODE_READ, 939 + NFS_I(pgio->pg_inode)->layout, 940 + pgio->pg_lseg); 937 941 pnfs_put_lseg(pgio->pg_lseg); 938 942 pgio->pg_lseg = NULL; 939 943 nfs_pageio_reset_read_mds(pgio); ··· 1004 1000 return; 1005 1001 1006 1002 out_mds: 1003 + trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode, 1004 + 0, NFS4_MAX_UINT64, IOMODE_RW, 1005 + NFS_I(pgio->pg_inode)->layout, 1006 + pgio->pg_lseg); 1007 1007 pnfs_put_lseg(pgio->pg_lseg); 1008 1008 pgio->pg_lseg = NULL; 1009 1009 nfs_pageio_reset_write_mds(pgio); ··· 1034 1026 if (pgio->pg_lseg) 1035 1027 return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); 1036 1028 1029 + trace_pnfs_mds_fallback_pg_get_mirror_count(pgio->pg_inode, 1030 + 0, NFS4_MAX_UINT64, IOMODE_RW, 1031 + NFS_I(pgio->pg_inode)->layout, 1032 + pgio->pg_lseg); 1037 1033 /* no lseg means that pnfs is not in use, so no mirroring here */ 1038 1034 nfs_pageio_reset_write_mds(pgio); 1039 1035 out: ··· 1087 1075 hdr->args.count, 1088 1076 (unsigned long long)hdr->args.offset); 1089 1077 1078 + trace_pnfs_mds_fallback_write_done(hdr->inode, 1079 + hdr->args.offset, hdr->args.count, 1080 + IOMODE_RW, NFS_I(hdr->inode)->layout, 1081 + hdr->lseg); 1090 1082 task->tk_status = pnfs_write_done_resend_to_mds(hdr); 1091 1083 } 1092 1084 } ··· 1110 1094 hdr->args.count, 1111 1095 (unsigned long long)hdr->args.offset); 1112 1096 1097 + trace_pnfs_mds_fallback_read_done(hdr->inode, 1098 + hdr->args.offset, hdr->args.count, 1099 + IOMODE_READ, NFS_I(hdr->inode)->layout, 1100 + hdr->lseg); 1113 1101 task->tk_status = pnfs_read_done_resend_to_mds(hdr); 1114 1102 } 1115 1103 } ··· 1847 1827 out_failed: 1848 1828 if (ff_layout_avoid_mds_available_ds(lseg)) 1849 1829 return PNFS_TRY_AGAIN; 1830 + trace_pnfs_mds_fallback_read_pagelist(hdr->inode, 1831 + hdr->args.offset, hdr->args.count, 1832 + IOMODE_READ, NFS_I(hdr->inode)->layout, lseg); 1850 1833 return PNFS_NOT_ATTEMPTED; 1851 1834 } 1852 1835 ··· 1915 1892 out_failed: 1916 1893 if (ff_layout_avoid_mds_available_ds(lseg)) 1917 1894 return PNFS_TRY_AGAIN; 1895 + trace_pnfs_mds_fallback_write_pagelist(hdr->inode, 1896 + hdr->args.offset, hdr->args.count, 1897 + IOMODE_RW, NFS_I(hdr->inode)->layout, lseg); 1918 1898 return PNFS_NOT_ATTEMPTED; 1919 1899 } 1920 1900
+1 -1
fs/nfs/flexfilelayout/flexfilelayoutdev.c
··· 257 257 if (status == 0) 258 258 return 0; 259 259 260 - if (mirror->mirror_ds == NULL) 260 + if (IS_ERR_OR_NULL(mirror->mirror_ds)) 261 261 return -EINVAL; 262 262 263 263 dserr = kmalloc(sizeof(*dserr), gfp_flags);
+21 -9
fs/nfs/inode.c
··· 51 51 #include "pnfs.h" 52 52 #include "nfs.h" 53 53 #include "netns.h" 54 + #include "sysfs.h" 54 55 55 56 #include "nfstrace.h" 56 57 ··· 209 208 } 210 209 211 210 if (inode->i_mapping->nrpages == 0) 212 - flags &= ~NFS_INO_INVALID_DATA; 211 + flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER); 213 212 nfsi->cache_validity |= flags; 214 213 if (flags & NFS_INO_INVALID_DATA) 215 214 nfs_fscache_invalidate(inode); ··· 653 652 i_size_write(inode, offset); 654 653 /* Optimisation */ 655 654 if (offset == 0) 656 - NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; 655 + NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_DATA | 656 + NFS_INO_DATA_INVAL_DEFER); 657 657 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; 658 658 659 659 spin_unlock(&inode->i_lock); ··· 1034 1032 struct nfs_inode *nfsi = NFS_I(inode); 1035 1033 1036 1034 spin_lock(&inode->i_lock); 1035 + if (list_empty(&nfsi->open_files) && 1036 + (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) 1037 + nfsi->cache_validity |= NFS_INO_INVALID_DATA | 1038 + NFS_INO_REVAL_FORCED; 1037 1039 list_add_tail_rcu(&ctx->list, &nfsi->open_files); 1038 1040 spin_unlock(&inode->i_lock); 1039 1041 } ··· 1106 1100 nfs_fscache_open_file(inode, filp); 1107 1101 return 0; 1108 1102 } 1103 + EXPORT_SYMBOL_GPL(nfs_open); 1109 1104 1110 1105 /* 1111 1106 * This function is called whenever some part of NFS notices that ··· 1319 1312 1320 1313 set_bit(NFS_INO_INVALIDATING, bitlock); 1321 1314 smp_wmb(); 1322 - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; 1315 + nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA| 1316 + NFS_INO_DATA_INVAL_DEFER); 1323 1317 spin_unlock(&inode->i_lock); 1324 1318 trace_nfs_invalidate_mapping_enter(inode); 1325 1319 ret = nfs_invalidate_mapping(inode, mapping); ··· 1878 1870 dprintk("NFS: change_attr change on server for file %s/%ld\n", 1879 1871 inode->i_sb->s_id, 1880 1872 inode->i_ino); 1881 - } 1873 + } else if (!have_delegation) 1874 + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; 1882 1875 inode_set_iversion_raw(inode, fattr->change_attr); 1883 1876 attr_changed = true; 1884 1877 } ··· 2168 2159 2169 2160 static void nfs_net_exit(struct net *net) 2170 2161 { 2171 - struct nfs_net *nn = net_generic(net, nfs_net_id); 2172 - 2173 2162 nfs_fs_proc_net_exit(net); 2174 - nfs_cleanup_cb_ident_idr(net); 2175 - WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); 2176 - WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); 2163 + nfs_clients_exit(net); 2177 2164 } 2178 2165 2179 2166 static struct pernet_operations nfs_net_ops = { ··· 2185 2180 static int __init init_nfs_fs(void) 2186 2181 { 2187 2182 int err; 2183 + 2184 + err = nfs_sysfs_init(); 2185 + if (err < 0) 2186 + goto out10; 2188 2187 2189 2188 err = register_pernet_subsys(&nfs_net_ops); 2190 2189 if (err < 0) ··· 2253 2244 out8: 2254 2245 unregister_pernet_subsys(&nfs_net_ops); 2255 2246 out9: 2247 + nfs_sysfs_exit(); 2248 + out10: 2256 2249 return err; 2257 2250 } 2258 2251 ··· 2271 2260 unregister_nfs_fs(); 2272 2261 nfs_fs_proc_exit(); 2273 2262 nfsiod_stop(); 2263 + nfs_sysfs_exit(); 2274 2264 } 2275 2265 2276 2266 /* Not quite true; I just maintain it */
+4 -3
fs/nfs/internal.h
··· 69 69 * Maximum number of pages that readdir can use for creating 70 70 * a vmapped array of pages. 71 71 */ 72 - #define NFS_MAX_READDIR_PAGES 64 73 - #define NFS_MAX_READDIR_RAPAGES 8 72 + #define NFS_MAX_READDIR_PAGES 8 74 73 75 74 struct nfs_client_initdata { 76 75 unsigned long init_flags; ··· 81 82 struct nfs_subversion *nfs_mod; 82 83 int proto; 83 84 u32 minorversion; 85 + unsigned int nconnect; 84 86 struct net *net; 85 87 const struct rpc_timeout *timeparms; 86 88 const struct cred *cred; ··· 123 123 char *export_path; 124 124 int port; 125 125 unsigned short protocol; 126 + unsigned short nconnect; 126 127 } nfs_server; 127 128 128 129 void *lsm_opts; ··· 159 158 /* client.c */ 160 159 extern const struct rpc_program nfs_program; 161 160 extern void nfs_clients_init(struct net *net); 161 + extern void nfs_clients_exit(struct net *net); 162 162 extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); 163 163 int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t); 164 164 struct nfs_client *nfs_get_client(const struct nfs_client_initdata *); ··· 172 170 struct nfs_server *nfs_alloc_server(void); 173 171 void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *); 174 172 175 - extern void nfs_cleanup_cb_ident_idr(struct net *); 176 173 extern void nfs_put_client(struct nfs_client *); 177 174 extern void nfs_free_client(struct nfs_client *); 178 175 extern struct nfs_client *nfs4_find_client_ident(struct net *, int);
+3
fs/nfs/netns.h
··· 15 15 uint32_t major, minor; 16 16 }; 17 17 18 + struct nfs_netns_client; 19 + 18 20 struct nfs_net { 19 21 struct cache_detail *nfs_dns_resolve; 20 22 struct rpc_pipe *bl_device_pipe; ··· 31 29 unsigned short nfs_callback_tcpport6; 32 30 int cb_users[NFS4_MAX_MINOR_VERSION + 1]; 33 31 #endif 32 + struct nfs_netns_client *nfs_client; 34 33 spinlock_t nfs_client_lock; 35 34 ktime_t boot_time; 36 35 #ifdef CONFIG_PROC_FS
+1 -1
fs/nfs/nfs2xdr.c
··· 151 151 return 0; 152 152 out_status: 153 153 *status = be32_to_cpup(p); 154 - trace_nfs_xdr_status((int)*status); 154 + trace_nfs_xdr_status(xdr, (int)*status); 155 155 return 0; 156 156 } 157 157
+3
fs/nfs/nfs3client.c
··· 102 102 return ERR_PTR(-EINVAL); 103 103 cl_init.hostname = buf; 104 104 105 + if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) 106 + cl_init.nconnect = mds_clp->cl_nconnect; 107 + 105 108 if (mds_srv->flags & NFS_MOUNT_NORESVPORT) 106 109 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); 107 110
+1 -1
fs/nfs/nfs3xdr.c
··· 343 343 return 0; 344 344 out_status: 345 345 *status = be32_to_cpup(p); 346 - trace_nfs_xdr_status((int)*status); 346 + trace_nfs_xdr_status(xdr, (int)*status); 347 347 return 0; 348 348 } 349 349
+2 -2
fs/nfs/nfs4_fs.h
··· 312 312 const struct nfs_lock_context *l_ctx, 313 313 fmode_t fmode); 314 314 315 + extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 316 + struct nfs_fsinfo *fsinfo); 315 317 #if defined(CONFIG_NFS_V4_1) 316 318 extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); 317 319 extern int nfs4_proc_create_session(struct nfs_client *, const struct cred *); 318 320 extern int nfs4_proc_destroy_session(struct nfs4_session *, const struct cred *); 319 - extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 320 - struct nfs_fsinfo *fsinfo); 321 321 extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, 322 322 bool sync); 323 323 extern int nfs4_detect_session_trunking(struct nfs_client *clp,
+12 -2
fs/nfs/nfs4client.c
··· 859 859 const size_t addrlen, 860 860 const char *ip_addr, 861 861 int proto, const struct rpc_timeout *timeparms, 862 - u32 minorversion, struct net *net) 862 + u32 minorversion, unsigned int nconnect, 863 + struct net *net) 863 864 { 864 865 struct nfs_client_initdata cl_init = { 865 866 .hostname = hostname, ··· 876 875 }; 877 876 struct nfs_client *clp; 878 877 878 + if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP) 879 + cl_init.nconnect = nconnect; 879 880 if (server->flags & NFS_MOUNT_NORESVPORT) 880 881 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); 881 882 if (server->options & NFS_OPTION_MIGRATION) ··· 943 940 if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) 944 941 return ERR_PTR(-EINVAL); 945 942 cl_init.hostname = buf; 943 + 944 + if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) 945 + cl_init.nconnect = mds_clp->cl_nconnect; 946 946 947 947 if (mds_srv->flags & NFS_MOUNT_NORESVPORT) 948 948 __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); ··· 1080 1074 data->nfs_server.protocol, 1081 1075 &timeparms, 1082 1076 data->minorversion, 1077 + data->nfs_server.nconnect, 1083 1078 data->net); 1084 1079 if (error < 0) 1085 1080 return error; ··· 1170 1163 XPRT_TRANSPORT_RDMA, 1171 1164 parent_server->client->cl_timeout, 1172 1165 parent_client->cl_mvops->minor_version, 1166 + parent_client->cl_nconnect, 1173 1167 parent_client->cl_net); 1174 1168 if (!error) 1175 1169 goto init_server; ··· 1184 1176 XPRT_TRANSPORT_TCP, 1185 1177 parent_server->client->cl_timeout, 1186 1178 parent_client->cl_mvops->minor_version, 1179 + parent_client->cl_nconnect, 1187 1180 parent_client->cl_net); 1188 1181 if (error < 0) 1189 1182 goto error; ··· 1280 1271 set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); 1281 1272 error = nfs4_set_client(server, hostname, sap, salen, buf, 1282 1273 clp->cl_proto, clnt->cl_timeout, 1283 - clp->cl_minorversion, net); 1274 + clp->cl_minorversion, 1275 + clp->cl_nconnect, net); 1284 1276 clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); 1285 1277 if (error != 0) { 1286 1278 nfs_server_insert_lists(server);
+6 -2
fs/nfs/nfs4file.c
··· 49 49 return err; 50 50 51 51 if ((openflags & O_ACCMODE) == 3) 52 - openflags--; 52 + return nfs_open(inode, filp); 53 53 54 54 /* We can't create new files here */ 55 55 openflags &= ~(O_CREAT|O_EXCL); ··· 204 204 bool same_inode = false; 205 205 int ret; 206 206 207 - if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 207 + /* NFS does not support deduplication. */ 208 + if (remap_flags & REMAP_FILE_DEDUP) 209 + return -EOPNOTSUPP; 210 + 211 + if (remap_flags & ~REMAP_FILE_ADVISORY) 208 212 return -EINVAL; 209 213 210 214 /* check alignment w.r.t. clone_blksize */
+60 -20
fs/nfs/nfs4proc.c
··· 428 428 return nfs4_delay_killable(timeout); 429 429 } 430 430 431 + static const nfs4_stateid * 432 + nfs4_recoverable_stateid(const nfs4_stateid *stateid) 433 + { 434 + if (!stateid) 435 + return NULL; 436 + switch (stateid->type) { 437 + case NFS4_OPEN_STATEID_TYPE: 438 + case NFS4_LOCK_STATEID_TYPE: 439 + case NFS4_DELEGATION_STATEID_TYPE: 440 + return stateid; 441 + default: 442 + break; 443 + } 444 + return NULL; 445 + } 446 + 431 447 /* This is the error handling routine for processes that are allowed 432 448 * to sleep. 433 449 */ ··· 452 436 { 453 437 struct nfs_client *clp = server->nfs_client; 454 438 struct nfs4_state *state = exception->state; 455 - const nfs4_stateid *stateid = exception->stateid; 439 + const nfs4_stateid *stateid; 456 440 struct inode *inode = exception->inode; 457 441 int ret = errorcode; 458 442 ··· 460 444 exception->recovering = 0; 461 445 exception->retry = 0; 462 446 447 + stateid = nfs4_recoverable_stateid(exception->stateid); 463 448 if (stateid == NULL && state != NULL) 464 - stateid = &state->stateid; 449 + stateid = nfs4_recoverable_stateid(&state->stateid); 465 450 466 451 switch(errorcode) { 467 452 case 0: ··· 1180 1163 server->caps &= ~NFS_CAP_ATOMIC_OPEN_V1; 1181 1164 exception->retry = 1; 1182 1165 return true; 1166 + } 1167 + 1168 + static fmode_t _nfs4_ctx_to_accessmode(const struct nfs_open_context *ctx) 1169 + { 1170 + return ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); 1171 + } 1172 + 1173 + static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx) 1174 + { 1175 + fmode_t ret = ctx->mode & (FMODE_READ|FMODE_WRITE); 1176 + 1177 + return (ctx->mode & FMODE_EXEC) ? FMODE_READ | ret : ret; 1183 1178 } 1184 1179 1185 1180 static u32 ··· 2929 2900 } 2930 2901 2931 2902 static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, 2932 - fmode_t fmode, 2933 - int flags, 2934 - struct nfs_open_context *ctx) 2903 + int flags, struct nfs_open_context *ctx) 2935 2904 { 2936 2905 struct nfs4_state_owner *sp = opendata->owner; 2937 2906 struct nfs_server *server = sp->so_server; 2938 2907 struct dentry *dentry; 2939 2908 struct nfs4_state *state; 2909 + fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx); 2940 2910 unsigned int seq; 2941 2911 int ret; 2942 2912 ··· 2974 2946 /* Parse layoutget results before we check for access */ 2975 2947 pnfs_parse_lgopen(state->inode, opendata->lgp, ctx); 2976 2948 2977 - ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags); 2949 + ret = nfs4_opendata_access(sp->so_cred, opendata, state, 2950 + acc_mode, flags); 2978 2951 if (ret != 0) 2979 2952 goto out; 2980 2953 ··· 3007 2978 struct dentry *dentry = ctx->dentry; 3008 2979 const struct cred *cred = ctx->cred; 3009 2980 struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; 3010 - fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); 2981 + fmode_t fmode = _nfs4_ctx_to_openmode(ctx); 3011 2982 enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; 3012 2983 struct iattr *sattr = c->sattr; 3013 2984 struct nfs4_label *label = c->label; ··· 3053 3024 if (d_really_is_positive(dentry)) 3054 3025 opendata->state = nfs4_get_open_state(d_inode(dentry), sp); 3055 3026 3056 - status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx); 3027 + status = _nfs4_open_and_get_state(opendata, flags, ctx); 3057 3028 if (status != 0) 3058 3029 goto err_free_label; 3059 3030 state = ctx->state; ··· 3623 3594 if (ctx->state == NULL) 3624 3595 return; 3625 3596 if (is_sync) 3626 - nfs4_close_sync(ctx->state, ctx->mode); 3597 + nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx)); 3627 3598 else 3628 - nfs4_close_state(ctx->state, ctx->mode); 3599 + nfs4_close_state(ctx->state, _nfs4_ctx_to_openmode(ctx)); 3629 3600 } 3630 3601 3631 3602 #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) ··· 6009 5980 .rpc_message = &msg, 6010 5981 .callback_ops = &nfs4_setclientid_ops, 6011 5982 .callback_data = &setclientid, 6012 - .flags = RPC_TASK_TIMEOUT, 5983 + .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, 6013 5984 }; 6014 5985 int status; 6015 5986 ··· 6075 6046 dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n", 6076 6047 clp->cl_rpcclient->cl_auth->au_ops->au_name, 6077 6048 clp->cl_clientid); 6078 - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 6049 + status = rpc_call_sync(clp->cl_rpcclient, &msg, 6050 + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); 6079 6051 trace_nfs4_setclientid_confirm(clp, status); 6080 6052 dprintk("NFS reply setclientid_confirm: %d\n", status); 6081 6053 return status; ··· 7657 7627 NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg); 7658 7628 7659 7629 status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args, 7660 - &res.seq_res, 0); 7630 + &res.seq_res, RPC_TASK_NO_ROUND_ROBIN); 7661 7631 dprintk("NFS reply secinfo: %d\n", status); 7662 7632 7663 7633 put_cred(cred); ··· 7995 7965 .rpc_client = clp->cl_rpcclient, 7996 7966 .callback_ops = &nfs4_exchange_id_call_ops, 7997 7967 .rpc_message = &msg, 7998 - .flags = RPC_TASK_TIMEOUT, 7968 + .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, 7999 7969 }; 8000 7970 struct nfs41_exchange_id_data *calldata; 8001 7971 int status; ··· 8220 8190 }; 8221 8191 int status; 8222 8192 8223 - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 8193 + status = rpc_call_sync(clp->cl_rpcclient, &msg, 8194 + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); 8224 8195 trace_nfs4_destroy_clientid(clp, status); 8225 8196 if (status) 8226 8197 dprintk("NFS: Got error %d from the server %s on " ··· 8272 8241 return ret; 8273 8242 } 8274 8243 8244 + #endif /* CONFIG_NFS_V4_1 */ 8245 + 8275 8246 struct nfs4_get_lease_time_data { 8276 8247 struct nfs4_get_lease_time_args *args; 8277 8248 struct nfs4_get_lease_time_res *res; ··· 8306 8273 (struct nfs4_get_lease_time_data *)calldata; 8307 8274 8308 8275 dprintk("--> %s\n", __func__); 8309 - if (!nfs41_sequence_done(task, &data->res->lr_seq_res)) 8276 + if (!nfs4_sequence_done(task, &data->res->lr_seq_res)) 8310 8277 return; 8311 8278 switch (task->tk_status) { 8312 8279 case -NFS4ERR_DELAY: ··· 8364 8331 return status; 8365 8332 } 8366 8333 8334 + #ifdef CONFIG_NFS_V4_1 8335 + 8367 8336 /* 8368 8337 * Initialize the values to be used by the client in CREATE_SESSION 8369 8338 * If nfs4_init_session set the fore channel request and response sizes, ··· 8380 8345 { 8381 8346 unsigned int max_rqst_sz, max_resp_sz; 8382 8347 unsigned int max_bc_payload = rpc_max_bc_payload(clnt); 8348 + unsigned int max_bc_slots = rpc_num_bc_slots(clnt); 8383 8349 8384 8350 max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead; 8385 8351 max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead; ··· 8403 8367 args->bc_attrs.max_resp_sz_cached = 0; 8404 8368 args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; 8405 8369 args->bc_attrs.max_reqs = max_t(unsigned short, max_session_cb_slots, 1); 8370 + if (args->bc_attrs.max_reqs > max_bc_slots) 8371 + args->bc_attrs.max_reqs = max_bc_slots; 8406 8372 8407 8373 dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " 8408 8374 "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", ··· 8507 8469 nfs4_init_channel_attrs(&args, clp->cl_rpcclient); 8508 8470 args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); 8509 8471 8510 - status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 8472 + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 8473 + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); 8511 8474 trace_nfs4_create_session(clp, status); 8512 8475 8513 8476 switch (status) { ··· 8584 8545 if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state)) 8585 8546 return 0; 8586 8547 8587 - status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 8548 + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 8549 + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); 8588 8550 trace_nfs4_destroy_session(session->clp, status); 8589 8551 8590 8552 if (status) ··· 8839 8799 .rpc_client = clp->cl_rpcclient, 8840 8800 .rpc_message = &msg, 8841 8801 .callback_ops = &nfs4_reclaim_complete_call_ops, 8842 - .flags = RPC_TASK_ASYNC, 8802 + .flags = RPC_TASK_ASYNC | RPC_TASK_NO_ROUND_ROBIN, 8843 8803 }; 8844 8804 int status = -ENOMEM; 8845 8805 ··· 9358 9318 9359 9319 dprintk("--> %s\n", __func__); 9360 9320 status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, 9361 - &res.seq_res, 0); 9321 + &res.seq_res, RPC_TASK_NO_ROUND_ROBIN); 9362 9322 dprintk("<-- %s status=%d\n", __func__, status); 9363 9323 9364 9324 put_cred(cred);
+24 -25
fs/nfs/nfs4state.c
··· 87 87 88 88 static DEFINE_MUTEX(nfs_clid_init_mutex); 89 89 90 + static int nfs4_setup_state_renewal(struct nfs_client *clp) 91 + { 92 + int status; 93 + struct nfs_fsinfo fsinfo; 94 + unsigned long now; 95 + 96 + if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { 97 + nfs4_schedule_state_renewal(clp); 98 + return 0; 99 + } 100 + 101 + now = jiffies; 102 + status = nfs4_proc_get_lease_time(clp, &fsinfo); 103 + if (status == 0) { 104 + nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); 105 + nfs4_schedule_state_renewal(clp); 106 + } 107 + 108 + return status; 109 + } 110 + 90 111 int nfs4_init_clientid(struct nfs_client *clp, const struct cred *cred) 91 112 { 92 113 struct nfs4_setclientid_res clid = { ··· 135 114 if (status != 0) 136 115 goto out; 137 116 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 138 - nfs4_schedule_state_renewal(clp); 117 + nfs4_setup_state_renewal(clp); 139 118 out: 140 119 return status; 141 120 } ··· 307 286 308 287 #if defined(CONFIG_NFS_V4_1) 309 288 310 - static int nfs41_setup_state_renewal(struct nfs_client *clp) 311 - { 312 - int status; 313 - struct nfs_fsinfo fsinfo; 314 - unsigned long now; 315 - 316 - if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { 317 - nfs4_schedule_state_renewal(clp); 318 - return 0; 319 - } 320 - 321 - now = jiffies; 322 - status = nfs4_proc_get_lease_time(clp, &fsinfo); 323 - if (status == 0) { 324 - nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); 325 - nfs4_schedule_state_renewal(clp); 326 - } 327 - 328 - return status; 329 - } 330 - 331 289 static void nfs41_finish_session_reset(struct nfs_client *clp) 332 290 { 333 291 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 334 292 clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 335 293 /* create_session negotiated new slot table */ 336 294 clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); 337 - nfs41_setup_state_renewal(clp); 295 + nfs4_setup_state_renewal(clp); 338 296 } 339 297 340 298 int nfs41_init_clientid(struct nfs_client *clp, const struct cred *cred) ··· 1064 1064 * choose to use. 1065 1065 */ 1066 1066 goto out; 1067 - nfs4_copy_open_stateid(dst, state); 1068 - ret = 0; 1067 + ret = nfs4_copy_open_stateid(dst, state) ? 0 : -EAGAIN; 1069 1068 out: 1070 1069 if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) 1071 1070 dst->seqid = 0;
+8
fs/nfs/nfs4trace.c
··· 16 16 EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_read); 17 17 EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_write); 18 18 EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_commit_ds); 19 + 20 + EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_pg_init_read); 21 + EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_pg_init_write); 22 + EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_pg_get_mirror_count); 23 + EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done); 24 + EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done); 25 + EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist); 26 + EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist); 19 27 #endif
+203 -80
fs/nfs/nfs4trace.h
··· 156 156 TRACE_DEFINE_ENUM(NFS4ERR_XDEV); 157 157 158 158 #define show_nfsv4_errors(error) \ 159 - __print_symbolic(-(error), \ 159 + __print_symbolic(error, \ 160 160 { NFS4_OK, "OK" }, \ 161 161 /* Mapped by nfs4_stat_to_errno() */ \ 162 162 { EPERM, "EPERM" }, \ ··· 348 348 349 349 TP_STRUCT__entry( 350 350 __string(dstaddr, clp->cl_hostname) 351 - __field(int, error) 351 + __field(unsigned long, error) 352 352 ), 353 353 354 354 TP_fast_assign( ··· 357 357 ), 358 358 359 359 TP_printk( 360 - "error=%d (%s) dstaddr=%s", 361 - __entry->error, 360 + "error=%ld (%s) dstaddr=%s", 361 + -__entry->error, 362 362 show_nfsv4_errors(__entry->error), 363 363 __get_str(dstaddr) 364 364 ) ··· 420 420 __field(unsigned int, highest_slotid) 421 421 __field(unsigned int, target_highest_slotid) 422 422 __field(unsigned int, status_flags) 423 - __field(int, error) 423 + __field(unsigned long, error) 424 424 ), 425 425 426 426 TP_fast_assign( ··· 435 435 __entry->error = res->sr_status; 436 436 ), 437 437 TP_printk( 438 - "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " 438 + "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " 439 439 "highest_slotid=%u target_highest_slotid=%u " 440 440 "status_flags=%u (%s)", 441 - __entry->error, 441 + -__entry->error, 442 442 show_nfsv4_errors(__entry->error), 443 443 __entry->session, 444 444 __entry->slot_nr, ··· 467 467 __field(unsigned int, seq_nr) 468 468 __field(unsigned int, highest_slotid) 469 469 __field(unsigned int, cachethis) 470 - __field(int, error) 470 + __field(unsigned long, error) 471 471 ), 472 472 473 473 TP_fast_assign( ··· 476 476 __entry->seq_nr = args->csa_sequenceid; 477 477 __entry->highest_slotid = args->csa_highestslotid; 478 478 __entry->cachethis = args->csa_cachethis; 479 - __entry->error = -be32_to_cpu(status); 479 + __entry->error = be32_to_cpu(status); 480 480 ), 481 481 482 482 TP_printk( 483 - "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " 483 + "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " 484 484 "highest_slotid=%u", 485 - __entry->error, 485 + -__entry->error, 486 486 show_nfsv4_errors(__entry->error), 487 487 __entry->session, 488 488 __entry->slot_nr, ··· 490 490 __entry->highest_slotid 491 491 ) 492 492 ); 493 + 494 + TRACE_EVENT(nfs4_cb_seqid_err, 495 + TP_PROTO( 496 + const struct cb_sequenceargs *args, 497 + __be32 status 498 + ), 499 + TP_ARGS(args, status), 500 + 501 + TP_STRUCT__entry( 502 + __field(unsigned int, session) 503 + __field(unsigned int, slot_nr) 504 + __field(unsigned int, seq_nr) 505 + __field(unsigned int, highest_slotid) 506 + __field(unsigned int, cachethis) 507 + __field(unsigned long, error) 508 + ), 509 + 510 + TP_fast_assign( 511 + __entry->session = nfs_session_id_hash(&args->csa_sessionid); 512 + __entry->slot_nr = args->csa_slotid; 513 + __entry->seq_nr = args->csa_sequenceid; 514 + __entry->highest_slotid = args->csa_highestslotid; 515 + __entry->cachethis = args->csa_cachethis; 516 + __entry->error = be32_to_cpu(status); 517 + ), 518 + 519 + TP_printk( 520 + "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " 521 + "highest_slotid=%u", 522 + -__entry->error, 523 + show_nfsv4_errors(__entry->error), 524 + __entry->session, 525 + __entry->slot_nr, 526 + __entry->seq_nr, 527 + __entry->highest_slotid 528 + ) 529 + ); 530 + 493 531 #endif /* CONFIG_NFS_V4_1 */ 494 532 495 533 TRACE_EVENT(nfs4_setup_sequence, ··· 564 526 565 527 TRACE_EVENT(nfs4_xdr_status, 566 528 TP_PROTO( 529 + const struct xdr_stream *xdr, 567 530 u32 op, 568 531 int error 569 532 ), 570 533 571 - TP_ARGS(op, error), 534 + TP_ARGS(xdr, op, error), 572 535 573 536 TP_STRUCT__entry( 537 + __field(unsigned int, task_id) 538 + __field(unsigned int, client_id) 539 + __field(u32, xid) 574 540 __field(u32, op) 575 - __field(int, error) 541 + __field(unsigned long, error) 576 542 ), 577 543 578 544 TP_fast_assign( 545 + const struct rpc_rqst *rqstp = xdr->rqst; 546 + const struct rpc_task *task = rqstp->rq_task; 547 + 548 + __entry->task_id = task->tk_pid; 549 + __entry->client_id = task->tk_client->cl_clid; 550 + __entry->xid = be32_to_cpu(rqstp->rq_xid); 579 551 __entry->op = op; 580 - __entry->error = -error; 552 + __entry->error = error; 581 553 ), 582 554 583 555 TP_printk( 584 - "operation %d: nfs status %d (%s)", 585 - __entry->op, 586 - __entry->error, show_nfsv4_errors(__entry->error) 556 + "task:%u@%d xid=0x%08x error=%ld (%s) operation=%u", 557 + __entry->task_id, __entry->client_id, __entry->xid, 558 + -__entry->error, show_nfsv4_errors(__entry->error), 559 + __entry->op 587 560 ) 588 561 ); 589 562 ··· 608 559 TP_ARGS(ctx, flags, error), 609 560 610 561 TP_STRUCT__entry( 611 - __field(int, error) 562 + __field(unsigned long, error) 612 563 __field(unsigned int, flags) 613 564 __field(unsigned int, fmode) 614 565 __field(dev_t, dev) ··· 626 577 const struct nfs4_state *state = ctx->state; 627 578 const struct inode *inode = NULL; 628 579 629 - __entry->error = error; 580 + __entry->error = -error; 630 581 __entry->flags = flags; 631 582 __entry->fmode = (__force unsigned int)ctx->mode; 632 583 __entry->dev = ctx->dentry->d_sb->s_dev; ··· 658 609 ), 659 610 660 611 TP_printk( 661 - "error=%d (%s) flags=%d (%s) fmode=%s " 612 + "error=%ld (%s) flags=%d (%s) fmode=%s " 662 613 "fileid=%02x:%02x:%llu fhandle=0x%08x " 663 614 "name=%02x:%02x:%llu/%s stateid=%d:0x%08x " 664 615 "openstateid=%d:0x%08x", 665 - __entry->error, 616 + -__entry->error, 666 617 show_nfsv4_errors(__entry->error), 667 618 __entry->flags, 668 619 show_open_flags(__entry->flags), ··· 744 695 __field(u32, fhandle) 745 696 __field(u64, fileid) 746 697 __field(unsigned int, fmode) 747 - __field(int, error) 698 + __field(unsigned long, error) 748 699 __field(int, stateid_seq) 749 700 __field(u32, stateid_hash) 750 701 ), ··· 764 715 ), 765 716 766 717 TP_printk( 767 - "error=%d (%s) fmode=%s fileid=%02x:%02x:%llu " 718 + "error=%ld (%s) fmode=%s fileid=%02x:%02x:%llu " 768 719 "fhandle=0x%08x openstateid=%d:0x%08x", 769 - __entry->error, 720 + -__entry->error, 770 721 show_nfsv4_errors(__entry->error), 771 722 __entry->fmode ? show_fmode_flags(__entry->fmode) : 772 723 "closed", ··· 806 757 TP_ARGS(request, state, cmd, error), 807 758 808 759 TP_STRUCT__entry( 809 - __field(int, error) 760 + __field(unsigned long, error) 810 761 __field(int, cmd) 811 762 __field(char, type) 812 763 __field(loff_t, start) ··· 836 787 ), 837 788 838 789 TP_printk( 839 - "error=%d (%s) cmd=%s:%s range=%lld:%lld " 790 + "error=%ld (%s) cmd=%s:%s range=%lld:%lld " 840 791 "fileid=%02x:%02x:%llu fhandle=0x%08x " 841 792 "stateid=%d:0x%08x", 842 - __entry->error, 793 + -__entry->error, 843 794 show_nfsv4_errors(__entry->error), 844 795 show_lock_cmd(__entry->cmd), 845 796 show_lock_type(__entry->type), ··· 876 827 TP_ARGS(request, state, lockstateid, cmd, error), 877 828 878 829 TP_STRUCT__entry( 879 - __field(int, error) 830 + __field(unsigned long, error) 880 831 __field(int, cmd) 881 832 __field(char, type) 882 833 __field(loff_t, start) ··· 912 863 ), 913 864 914 865 TP_printk( 915 - "error=%d (%s) cmd=%s:%s range=%lld:%lld " 866 + "error=%ld (%s) cmd=%s:%s range=%lld:%lld " 916 867 "fileid=%02x:%02x:%llu fhandle=0x%08x " 917 868 "stateid=%d:0x%08x lockstateid=%d:0x%08x", 918 - __entry->error, 869 + -__entry->error, 919 870 show_nfsv4_errors(__entry->error), 920 871 show_lock_cmd(__entry->cmd), 921 872 show_lock_type(__entry->type), ··· 981 932 TP_STRUCT__entry( 982 933 __field(dev_t, dev) 983 934 __field(u32, fhandle) 984 - __field(int, error) 935 + __field(unsigned long, error) 985 936 __field(int, stateid_seq) 986 937 __field(u32, stateid_hash) 987 938 ), ··· 997 948 ), 998 949 999 950 TP_printk( 1000 - "error=%d (%s) dev=%02x:%02x fhandle=0x%08x " 951 + "error=%ld (%s) dev=%02x:%02x fhandle=0x%08x " 1001 952 "stateid=%d:0x%08x", 1002 - __entry->error, 953 + -__entry->error, 1003 954 show_nfsv4_errors(__entry->error), 1004 955 MAJOR(__entry->dev), MINOR(__entry->dev), 1005 956 __entry->fhandle, ··· 1018 969 TP_ARGS(state, lsp, error), 1019 970 1020 971 TP_STRUCT__entry( 1021 - __field(int, error) 972 + __field(unsigned long, error) 1022 973 __field(dev_t, dev) 1023 974 __field(u32, fhandle) 1024 975 __field(u64, fileid) ··· 1040 991 ), 1041 992 1042 993 TP_printk( 1043 - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 994 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1044 995 "stateid=%d:0x%08x", 1045 - __entry->error, 996 + -__entry->error, 1046 997 show_nfsv4_errors(__entry->error), 1047 998 MAJOR(__entry->dev), MINOR(__entry->dev), 1048 999 (unsigned long long)__entry->fileid, ··· 1075 1026 1076 1027 TP_STRUCT__entry( 1077 1028 __field(dev_t, dev) 1078 - __field(int, error) 1029 + __field(unsigned long, error) 1079 1030 __field(u64, dir) 1080 1031 __string(name, name->name) 1081 1032 ), ··· 1083 1034 TP_fast_assign( 1084 1035 __entry->dev = dir->i_sb->s_dev; 1085 1036 __entry->dir = NFS_FILEID(dir); 1086 - __entry->error = error; 1037 + __entry->error = -error; 1087 1038 __assign_str(name, name->name); 1088 1039 ), 1089 1040 1090 1041 TP_printk( 1091 - "error=%d (%s) name=%02x:%02x:%llu/%s", 1092 - __entry->error, 1042 + "error=%ld (%s) name=%02x:%02x:%llu/%s", 1043 + -__entry->error, 1093 1044 show_nfsv4_errors(__entry->error), 1094 1045 MAJOR(__entry->dev), MINOR(__entry->dev), 1095 1046 (unsigned long long)__entry->dir, ··· 1125 1076 TP_STRUCT__entry( 1126 1077 __field(dev_t, dev) 1127 1078 __field(u64, ino) 1128 - __field(int, error) 1079 + __field(unsigned long, error) 1129 1080 ), 1130 1081 1131 1082 TP_fast_assign( ··· 1135 1086 ), 1136 1087 1137 1088 TP_printk( 1138 - "error=%d (%s) inode=%02x:%02x:%llu", 1139 - __entry->error, 1089 + "error=%ld (%s) inode=%02x:%02x:%llu", 1090 + -__entry->error, 1140 1091 show_nfsv4_errors(__entry->error), 1141 1092 MAJOR(__entry->dev), MINOR(__entry->dev), 1142 1093 (unsigned long long)__entry->ino ··· 1156 1107 1157 1108 TP_STRUCT__entry( 1158 1109 __field(dev_t, dev) 1159 - __field(int, error) 1110 + __field(unsigned long, error) 1160 1111 __field(u64, olddir) 1161 1112 __string(oldname, oldname->name) 1162 1113 __field(u64, newdir) ··· 1173 1124 ), 1174 1125 1175 1126 TP_printk( 1176 - "error=%d (%s) oldname=%02x:%02x:%llu/%s " 1127 + "error=%ld (%s) oldname=%02x:%02x:%llu/%s " 1177 1128 "newname=%02x:%02x:%llu/%s", 1178 - __entry->error, 1129 + -__entry->error, 1179 1130 show_nfsv4_errors(__entry->error), 1180 1131 MAJOR(__entry->dev), MINOR(__entry->dev), 1181 1132 (unsigned long long)__entry->olddir, ··· 1198 1149 __field(dev_t, dev) 1199 1150 __field(u32, fhandle) 1200 1151 __field(u64, fileid) 1201 - __field(int, error) 1152 + __field(unsigned long, error) 1202 1153 ), 1203 1154 1204 1155 TP_fast_assign( 1205 1156 __entry->dev = inode->i_sb->s_dev; 1206 1157 __entry->fileid = NFS_FILEID(inode); 1207 1158 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 1208 - __entry->error = error; 1159 + __entry->error = error < 0 ? -error : 0; 1209 1160 ), 1210 1161 1211 1162 TP_printk( 1212 - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", 1213 - __entry->error, 1163 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", 1164 + -__entry->error, 1214 1165 show_nfsv4_errors(__entry->error), 1215 1166 MAJOR(__entry->dev), MINOR(__entry->dev), 1216 1167 (unsigned long long)__entry->fileid, ··· 1249 1200 __field(dev_t, dev) 1250 1201 __field(u32, fhandle) 1251 1202 __field(u64, fileid) 1252 - __field(int, error) 1203 + __field(unsigned long, error) 1253 1204 __field(int, stateid_seq) 1254 1205 __field(u32, stateid_hash) 1255 1206 ), ··· 1266 1217 ), 1267 1218 1268 1219 TP_printk( 1269 - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1220 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1270 1221 "stateid=%d:0x%08x", 1271 - __entry->error, 1222 + -__entry->error, 1272 1223 show_nfsv4_errors(__entry->error), 1273 1224 MAJOR(__entry->dev), MINOR(__entry->dev), 1274 1225 (unsigned long long)__entry->fileid, ··· 1306 1257 __field(u32, fhandle) 1307 1258 __field(u64, fileid) 1308 1259 __field(unsigned int, valid) 1309 - __field(int, error) 1260 + __field(unsigned long, error) 1310 1261 ), 1311 1262 1312 1263 TP_fast_assign( ··· 1318 1269 ), 1319 1270 1320 1271 TP_printk( 1321 - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1272 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1322 1273 "valid=%s", 1323 - __entry->error, 1274 + -__entry->error, 1324 1275 show_nfsv4_errors(__entry->error), 1325 1276 MAJOR(__entry->dev), MINOR(__entry->dev), 1326 1277 (unsigned long long)__entry->fileid, ··· 1353 1304 TP_ARGS(clp, fhandle, inode, error), 1354 1305 1355 1306 TP_STRUCT__entry( 1356 - __field(int, error) 1307 + __field(unsigned long, error) 1357 1308 __field(dev_t, dev) 1358 1309 __field(u32, fhandle) 1359 1310 __field(u64, fileid) ··· 1374 1325 ), 1375 1326 1376 1327 TP_printk( 1377 - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1328 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1378 1329 "dstaddr=%s", 1379 - __entry->error, 1330 + -__entry->error, 1380 1331 show_nfsv4_errors(__entry->error), 1381 1332 MAJOR(__entry->dev), MINOR(__entry->dev), 1382 1333 (unsigned long long)__entry->fileid, ··· 1408 1359 TP_ARGS(clp, fhandle, inode, stateid, error), 1409 1360 1410 1361 TP_STRUCT__entry( 1411 - __field(int, error) 1362 + __field(unsigned long, error) 1412 1363 __field(dev_t, dev) 1413 1364 __field(u32, fhandle) 1414 1365 __field(u64, fileid) ··· 1435 1386 ), 1436 1387 1437 1388 TP_printk( 1438 - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1389 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1439 1390 "stateid=%d:0x%08x dstaddr=%s", 1440 - __entry->error, 1391 + -__entry->error, 1441 1392 show_nfsv4_errors(__entry->error), 1442 1393 MAJOR(__entry->dev), MINOR(__entry->dev), 1443 1394 (unsigned long long)__entry->fileid, ··· 1471 1422 TP_ARGS(name, len, id, error), 1472 1423 1473 1424 TP_STRUCT__entry( 1474 - __field(int, error) 1425 + __field(unsigned long, error) 1475 1426 __field(u32, id) 1476 1427 __dynamic_array(char, name, len > 0 ? len + 1 : 1) 1477 1428 ), ··· 1486 1437 ), 1487 1438 1488 1439 TP_printk( 1489 - "error=%d id=%u name=%s", 1490 - __entry->error, 1440 + "error=%ld (%s) id=%u name=%s", 1441 + -__entry->error, show_nfsv4_errors(__entry->error), 1491 1442 __entry->id, 1492 1443 __get_str(name) 1493 1444 ) ··· 1520 1471 __field(u64, fileid) 1521 1472 __field(loff_t, offset) 1522 1473 __field(size_t, count) 1523 - __field(int, error) 1474 + __field(unsigned long, error) 1524 1475 __field(int, stateid_seq) 1525 1476 __field(u32, stateid_hash) 1526 1477 ), ··· 1534 1485 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 1535 1486 __entry->offset = hdr->args.offset; 1536 1487 __entry->count = hdr->args.count; 1537 - __entry->error = error; 1488 + __entry->error = error < 0 ? -error : 0; 1538 1489 __entry->stateid_seq = 1539 1490 be32_to_cpu(state->stateid.seqid); 1540 1491 __entry->stateid_hash = ··· 1542 1493 ), 1543 1494 1544 1495 TP_printk( 1545 - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1496 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1546 1497 "offset=%lld count=%zu stateid=%d:0x%08x", 1547 - __entry->error, 1498 + -__entry->error, 1548 1499 show_nfsv4_errors(__entry->error), 1549 1500 MAJOR(__entry->dev), MINOR(__entry->dev), 1550 1501 (unsigned long long)__entry->fileid, ··· 1580 1531 __field(u64, fileid) 1581 1532 __field(loff_t, offset) 1582 1533 __field(size_t, count) 1583 - __field(int, error) 1534 + __field(unsigned long, error) 1584 1535 __field(int, stateid_seq) 1585 1536 __field(u32, stateid_hash) 1586 1537 ), ··· 1594 1545 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 1595 1546 __entry->offset = hdr->args.offset; 1596 1547 __entry->count = hdr->args.count; 1597 - __entry->error = error; 1548 + __entry->error = error < 0 ? -error : 0; 1598 1549 __entry->stateid_seq = 1599 1550 be32_to_cpu(state->stateid.seqid); 1600 1551 __entry->stateid_hash = ··· 1602 1553 ), 1603 1554 1604 1555 TP_printk( 1605 - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1556 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1606 1557 "offset=%lld count=%zu stateid=%d:0x%08x", 1607 - __entry->error, 1558 + -__entry->error, 1608 1559 show_nfsv4_errors(__entry->error), 1609 1560 MAJOR(__entry->dev), MINOR(__entry->dev), 1610 1561 (unsigned long long)__entry->fileid, ··· 1641 1592 __field(u64, fileid) 1642 1593 __field(loff_t, offset) 1643 1594 __field(size_t, count) 1644 - __field(int, error) 1595 + __field(unsigned long, error) 1645 1596 ), 1646 1597 1647 1598 TP_fast_assign( ··· 1655 1606 ), 1656 1607 1657 1608 TP_printk( 1658 - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1609 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1659 1610 "offset=%lld count=%zu", 1660 - __entry->error, 1611 + -__entry->error, 1661 1612 show_nfsv4_errors(__entry->error), 1662 1613 MAJOR(__entry->dev), MINOR(__entry->dev), 1663 1614 (unsigned long long)__entry->fileid, ··· 1705 1656 __field(u32, iomode) 1706 1657 __field(u64, offset) 1707 1658 __field(u64, count) 1708 - __field(int, error) 1659 + __field(unsigned long, error) 1709 1660 __field(int, stateid_seq) 1710 1661 __field(u32, stateid_hash) 1711 1662 __field(int, layoutstateid_seq) ··· 1738 1689 ), 1739 1690 1740 1691 TP_printk( 1741 - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1692 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1742 1693 "iomode=%s offset=%llu count=%llu stateid=%d:0x%08x " 1743 1694 "layoutstateid=%d:0x%08x", 1744 - __entry->error, 1695 + -__entry->error, 1745 1696 show_nfsv4_errors(__entry->error), 1746 1697 MAJOR(__entry->dev), MINOR(__entry->dev), 1747 1698 (unsigned long long)__entry->fileid, ··· 1771 1722 TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_INVALID_OPEN); 1772 1723 TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_RETRY); 1773 1724 TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); 1725 + TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_EXIT); 1774 1726 1775 1727 #define show_pnfs_update_layout_reason(reason) \ 1776 1728 __print_symbolic(reason, \ ··· 1787 1737 { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \ 1788 1738 { PNFS_UPDATE_LAYOUT_INVALID_OPEN, "invalid open" }, \ 1789 1739 { PNFS_UPDATE_LAYOUT_RETRY, "retrying" }, \ 1790 - { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }) 1740 + { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }, \ 1741 + { PNFS_UPDATE_LAYOUT_EXIT, "exit" }) 1791 1742 1792 1743 TRACE_EVENT(pnfs_update_layout, 1793 1744 TP_PROTO(struct inode *inode, ··· 1846 1795 show_pnfs_update_layout_reason(__entry->reason) 1847 1796 ) 1848 1797 ); 1798 + 1799 + DECLARE_EVENT_CLASS(pnfs_layout_event, 1800 + TP_PROTO(struct inode *inode, 1801 + loff_t pos, 1802 + u64 count, 1803 + enum pnfs_iomode iomode, 1804 + struct pnfs_layout_hdr *lo, 1805 + struct pnfs_layout_segment *lseg 1806 + ), 1807 + TP_ARGS(inode, pos, count, iomode, lo, lseg), 1808 + TP_STRUCT__entry( 1809 + __field(dev_t, dev) 1810 + __field(u64, fileid) 1811 + __field(u32, fhandle) 1812 + __field(loff_t, pos) 1813 + __field(u64, count) 1814 + __field(enum pnfs_iomode, iomode) 1815 + __field(int, layoutstateid_seq) 1816 + __field(u32, layoutstateid_hash) 1817 + __field(long, lseg) 1818 + ), 1819 + TP_fast_assign( 1820 + __entry->dev = inode->i_sb->s_dev; 1821 + __entry->fileid = NFS_FILEID(inode); 1822 + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 1823 + __entry->pos = pos; 1824 + __entry->count = count; 1825 + __entry->iomode = iomode; 1826 + if (lo != NULL) { 1827 + __entry->layoutstateid_seq = 1828 + be32_to_cpu(lo->plh_stateid.seqid); 1829 + __entry->layoutstateid_hash = 1830 + nfs_stateid_hash(&lo->plh_stateid); 1831 + } else { 1832 + __entry->layoutstateid_seq = 0; 1833 + __entry->layoutstateid_hash = 0; 1834 + } 1835 + __entry->lseg = (long)lseg; 1836 + ), 1837 + TP_printk( 1838 + "fileid=%02x:%02x:%llu fhandle=0x%08x " 1839 + "iomode=%s pos=%llu count=%llu " 1840 + "layoutstateid=%d:0x%08x lseg=0x%lx", 1841 + MAJOR(__entry->dev), MINOR(__entry->dev), 1842 + (unsigned long long)__entry->fileid, 1843 + __entry->fhandle, 1844 + show_pnfs_iomode(__entry->iomode), 1845 + (unsigned long long)__entry->pos, 1846 + (unsigned long long)__entry->count, 1847 + __entry->layoutstateid_seq, __entry->layoutstateid_hash, 1848 + __entry->lseg 1849 + ) 1850 + ); 1851 + 1852 + #define DEFINE_PNFS_LAYOUT_EVENT(name) \ 1853 + DEFINE_EVENT(pnfs_layout_event, name, \ 1854 + TP_PROTO(struct inode *inode, \ 1855 + loff_t pos, \ 1856 + u64 count, \ 1857 + enum pnfs_iomode iomode, \ 1858 + struct pnfs_layout_hdr *lo, \ 1859 + struct pnfs_layout_segment *lseg \ 1860 + ), \ 1861 + TP_ARGS(inode, pos, count, iomode, lo, lseg)) 1862 + 1863 + DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_pg_init_read); 1864 + DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_pg_init_write); 1865 + DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_pg_get_mirror_count); 1866 + DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_done); 1867 + DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done); 1868 + DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist); 1869 + DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist); 1849 1870 1850 1871 #endif /* CONFIG_NFS_V4_1 */ 1851 1872
+13 -3
fs/nfs/nfs4xdr.c
··· 837 837 #define NFS4_dec_sequence_sz \ 838 838 (compound_decode_hdr_maxsz + \ 839 839 decode_sequence_maxsz) 840 + #endif 840 841 #define NFS4_enc_get_lease_time_sz (compound_encode_hdr_maxsz + \ 841 842 encode_sequence_maxsz + \ 842 843 encode_putrootfh_maxsz + \ ··· 846 845 decode_sequence_maxsz + \ 847 846 decode_putrootfh_maxsz + \ 848 847 decode_fsinfo_maxsz) 848 + #if defined(CONFIG_NFS_V4_1) 849 849 #define NFS4_enc_reclaim_complete_sz (compound_encode_hdr_maxsz + \ 850 850 encode_sequence_maxsz + \ 851 851 encode_reclaim_complete_maxsz) ··· 2959 2957 encode_nops(&hdr); 2960 2958 } 2961 2959 2960 + #endif 2961 + 2962 2962 /* 2963 2963 * a GET_LEASE_TIME request 2964 2964 */ ··· 2980 2976 encode_fsinfo(xdr, lease_bitmap, &hdr); 2981 2977 encode_nops(&hdr); 2982 2978 } 2979 + 2980 + #ifdef CONFIG_NFS_V4_1 2983 2981 2984 2982 /* 2985 2983 * a RECLAIM_COMPLETE request ··· 3193 3187 return true; 3194 3188 out_status: 3195 3189 nfserr = be32_to_cpup(p); 3196 - trace_nfs4_xdr_status(opnum, nfserr); 3190 + trace_nfs4_xdr_status(xdr, opnum, nfserr); 3197 3191 *nfs_retval = nfs4_stat_to_errno(nfserr); 3198 3192 return true; 3199 3193 out_bad_operation: ··· 3433 3427 *res = be32_to_cpup(p); 3434 3428 bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; 3435 3429 } 3436 - dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); 3430 + dprintk("%s: lease time=%u\n", __func__, (unsigned int)*res); 3437 3431 return 0; 3438 3432 } 3439 3433 ··· 7128 7122 return status; 7129 7123 } 7130 7124 7125 + #endif 7126 + 7131 7127 /* 7132 7128 * Decode GET_LEASE_TIME response 7133 7129 */ ··· 7150 7142 status = decode_fsinfo(xdr, res->lr_fsinfo); 7151 7143 return status; 7152 7144 } 7145 + 7146 + #ifdef CONFIG_NFS_V4_1 7153 7147 7154 7148 /* 7155 7149 * Decode RECLAIM_COMPLETE response ··· 7561 7551 PROC41(CREATE_SESSION, enc_create_session, dec_create_session), 7562 7552 PROC41(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), 7563 7553 PROC41(SEQUENCE, enc_sequence, dec_sequence), 7564 - PROC41(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 7554 + PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 7565 7555 PROC41(RECLAIM_COMPLETE,enc_reclaim_complete, dec_reclaim_complete), 7566 7556 PROC41(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), 7567 7557 PROC41(LAYOUTGET, enc_layoutget, dec_layoutget),
+174 -59
fs/nfs/nfstrace.h
··· 11 11 #include <linux/tracepoint.h> 12 12 #include <linux/iversion.h> 13 13 14 + TRACE_DEFINE_ENUM(DT_UNKNOWN); 15 + TRACE_DEFINE_ENUM(DT_FIFO); 16 + TRACE_DEFINE_ENUM(DT_CHR); 17 + TRACE_DEFINE_ENUM(DT_DIR); 18 + TRACE_DEFINE_ENUM(DT_BLK); 19 + TRACE_DEFINE_ENUM(DT_REG); 20 + TRACE_DEFINE_ENUM(DT_LNK); 21 + TRACE_DEFINE_ENUM(DT_SOCK); 22 + TRACE_DEFINE_ENUM(DT_WHT); 23 + 14 24 #define nfs_show_file_type(ftype) \ 15 25 __print_symbolic(ftype, \ 16 26 { DT_UNKNOWN, "UNKNOWN" }, \ ··· 33 23 { DT_SOCK, "SOCK" }, \ 34 24 { DT_WHT, "WHT" }) 35 25 26 + TRACE_DEFINE_ENUM(NFS_INO_INVALID_DATA); 27 + TRACE_DEFINE_ENUM(NFS_INO_INVALID_ATIME); 28 + TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACCESS); 29 + TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACL); 30 + TRACE_DEFINE_ENUM(NFS_INO_REVAL_PAGECACHE); 31 + TRACE_DEFINE_ENUM(NFS_INO_REVAL_FORCED); 32 + TRACE_DEFINE_ENUM(NFS_INO_INVALID_LABEL); 33 + TRACE_DEFINE_ENUM(NFS_INO_INVALID_CHANGE); 34 + TRACE_DEFINE_ENUM(NFS_INO_INVALID_CTIME); 35 + TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME); 36 + TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE); 37 + TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER); 38 + 36 39 #define nfs_show_cache_validity(v) \ 37 40 __print_flags(v, "|", \ 38 - { NFS_INO_INVALID_ATTR, "INVALID_ATTR" }, \ 39 41 { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \ 40 42 { NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \ 41 43 { NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \ 42 44 { NFS_INO_INVALID_ACL, "INVALID_ACL" }, \ 43 45 { NFS_INO_REVAL_PAGECACHE, "REVAL_PAGECACHE" }, \ 44 46 { NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \ 45 - { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }) 47 + { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }, \ 48 + { NFS_INO_INVALID_CHANGE, "INVALID_CHANGE" }, \ 49 + { NFS_INO_INVALID_CTIME, "INVALID_CTIME" }, \ 50 + { NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \ 51 + { NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \ 52 + { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }) 53 + 54 + TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); 55 + TRACE_DEFINE_ENUM(NFS_INO_STALE); 56 + TRACE_DEFINE_ENUM(NFS_INO_ACL_LRU_SET); 57 + TRACE_DEFINE_ENUM(NFS_INO_INVALIDATING); 58 + TRACE_DEFINE_ENUM(NFS_INO_FSCACHE); 59 + TRACE_DEFINE_ENUM(NFS_INO_FSCACHE_LOCK); 60 + TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMIT); 61 + TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMITTING); 62 + TRACE_DEFINE_ENUM(NFS_INO_LAYOUTSTATS); 63 + TRACE_DEFINE_ENUM(NFS_INO_ODIRECT); 46 64 47 65 #define nfs_show_nfsi_flags(v) \ 48 66 __print_flags(v, "|", \ 49 - { 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \ 50 - { 1 << NFS_INO_STALE, "STALE" }, \ 51 - { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ 52 - { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ 53 - { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ 54 - { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) 67 + { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" }, \ 68 + { BIT(NFS_INO_STALE), "STALE" }, \ 69 + { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, \ 70 + { BIT(NFS_INO_INVALIDATING), "INVALIDATING" }, \ 71 + { BIT(NFS_INO_FSCACHE), "FSCACHE" }, \ 72 + { BIT(NFS_INO_FSCACHE_LOCK), "FSCACHE_LOCK" }, \ 73 + { BIT(NFS_INO_LAYOUTCOMMIT), "NEED_LAYOUTCOMMIT" }, \ 74 + { BIT(NFS_INO_LAYOUTCOMMITTING), "LAYOUTCOMMIT" }, \ 75 + { BIT(NFS_INO_LAYOUTSTATS), "LAYOUTSTATS" }, \ 76 + { BIT(NFS_INO_ODIRECT), "ODIRECT" }) 55 77 56 78 DECLARE_EVENT_CLASS(nfs_inode_event, 57 79 TP_PROTO( ··· 125 83 TP_ARGS(inode, error), 126 84 127 85 TP_STRUCT__entry( 128 - __field(int, error) 86 + __field(unsigned long, error) 129 87 __field(dev_t, dev) 130 88 __field(u32, fhandle) 131 89 __field(unsigned char, type) ··· 138 96 139 97 TP_fast_assign( 140 98 const struct nfs_inode *nfsi = NFS_I(inode); 141 - __entry->error = error; 99 + __entry->error = error < 0 ? -error : 0; 142 100 __entry->dev = inode->i_sb->s_dev; 143 101 __entry->fileid = nfsi->fileid; 144 102 __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); ··· 150 108 ), 151 109 152 110 TP_printk( 153 - "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " 111 + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 154 112 "type=%u (%s) version=%llu size=%lld " 155 - "cache_validity=%lu (%s) nfs_flags=%ld (%s)", 156 - __entry->error, 113 + "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s)", 114 + -__entry->error, nfs_show_status(__entry->error), 157 115 MAJOR(__entry->dev), MINOR(__entry->dev), 158 116 (unsigned long long)__entry->fileid, 159 117 __entry->fhandle, ··· 200 158 DEFINE_NFS_INODE_EVENT(nfs_access_enter); 201 159 DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit); 202 160 161 + TRACE_DEFINE_ENUM(LOOKUP_FOLLOW); 162 + TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY); 163 + TRACE_DEFINE_ENUM(LOOKUP_AUTOMOUNT); 164 + TRACE_DEFINE_ENUM(LOOKUP_PARENT); 165 + TRACE_DEFINE_ENUM(LOOKUP_REVAL); 166 + TRACE_DEFINE_ENUM(LOOKUP_RCU); 167 + TRACE_DEFINE_ENUM(LOOKUP_NO_REVAL); 168 + TRACE_DEFINE_ENUM(LOOKUP_NO_EVAL); 169 + TRACE_DEFINE_ENUM(LOOKUP_OPEN); 170 + TRACE_DEFINE_ENUM(LOOKUP_CREATE); 171 + TRACE_DEFINE_ENUM(LOOKUP_EXCL); 172 + TRACE_DEFINE_ENUM(LOOKUP_RENAME_TARGET); 173 + TRACE_DEFINE_ENUM(LOOKUP_JUMPED); 174 + TRACE_DEFINE_ENUM(LOOKUP_ROOT); 175 + TRACE_DEFINE_ENUM(LOOKUP_EMPTY); 176 + TRACE_DEFINE_ENUM(LOOKUP_DOWN); 177 + 203 178 #define show_lookup_flags(flags) \ 204 - __print_flags((unsigned long)flags, "|", \ 205 - { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ 179 + __print_flags(flags, "|", \ 180 + { LOOKUP_FOLLOW, "FOLLOW" }, \ 206 181 { LOOKUP_DIRECTORY, "DIRECTORY" }, \ 182 + { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ 183 + { LOOKUP_PARENT, "PARENT" }, \ 184 + { LOOKUP_REVAL, "REVAL" }, \ 185 + { LOOKUP_RCU, "RCU" }, \ 186 + { LOOKUP_NO_REVAL, "NO_REVAL" }, \ 187 + { LOOKUP_NO_EVAL, "NO_EVAL" }, \ 207 188 { LOOKUP_OPEN, "OPEN" }, \ 208 189 { LOOKUP_CREATE, "CREATE" }, \ 209 - { LOOKUP_EXCL, "EXCL" }) 190 + { LOOKUP_EXCL, "EXCL" }, \ 191 + { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \ 192 + { LOOKUP_JUMPED, "JUMPED" }, \ 193 + { LOOKUP_ROOT, "ROOT" }, \ 194 + { LOOKUP_EMPTY, "EMPTY" }, \ 195 + { LOOKUP_DOWN, "DOWN" }) 210 196 211 197 DECLARE_EVENT_CLASS(nfs_lookup_event, 212 198 TP_PROTO( ··· 246 176 TP_ARGS(dir, dentry, flags), 247 177 248 178 TP_STRUCT__entry( 249 - __field(unsigned int, flags) 179 + __field(unsigned long, flags) 250 180 __field(dev_t, dev) 251 181 __field(u64, dir) 252 182 __string(name, dentry->d_name.name) ··· 260 190 ), 261 191 262 192 TP_printk( 263 - "flags=%u (%s) name=%02x:%02x:%llu/%s", 193 + "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", 264 194 __entry->flags, 265 195 show_lookup_flags(__entry->flags), 266 196 MAJOR(__entry->dev), MINOR(__entry->dev), ··· 289 219 TP_ARGS(dir, dentry, flags, error), 290 220 291 221 TP_STRUCT__entry( 292 - __field(int, error) 293 - __field(unsigned int, flags) 222 + __field(unsigned long, error) 223 + __field(unsigned long, flags) 294 224 __field(dev_t, dev) 295 225 __field(u64, dir) 296 226 __string(name, dentry->d_name.name) ··· 299 229 TP_fast_assign( 300 230 __entry->dev = dir->i_sb->s_dev; 301 231 __entry->dir = NFS_FILEID(dir); 302 - __entry->error = error; 232 + __entry->error = error < 0 ? -error : 0; 303 233 __entry->flags = flags; 304 234 __assign_str(name, dentry->d_name.name); 305 235 ), 306 236 307 237 TP_printk( 308 - "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s", 309 - __entry->error, 238 + "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", 239 + -__entry->error, nfs_show_status(__entry->error), 310 240 __entry->flags, 311 241 show_lookup_flags(__entry->flags), 312 242 MAJOR(__entry->dev), MINOR(__entry->dev), ··· 330 260 DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); 331 261 DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); 332 262 263 + TRACE_DEFINE_ENUM(O_WRONLY); 264 + TRACE_DEFINE_ENUM(O_RDWR); 265 + TRACE_DEFINE_ENUM(O_CREAT); 266 + TRACE_DEFINE_ENUM(O_EXCL); 267 + TRACE_DEFINE_ENUM(O_NOCTTY); 268 + TRACE_DEFINE_ENUM(O_TRUNC); 269 + TRACE_DEFINE_ENUM(O_APPEND); 270 + TRACE_DEFINE_ENUM(O_NONBLOCK); 271 + TRACE_DEFINE_ENUM(O_DSYNC); 272 + TRACE_DEFINE_ENUM(O_DIRECT); 273 + TRACE_DEFINE_ENUM(O_LARGEFILE); 274 + TRACE_DEFINE_ENUM(O_DIRECTORY); 275 + TRACE_DEFINE_ENUM(O_NOFOLLOW); 276 + TRACE_DEFINE_ENUM(O_NOATIME); 277 + TRACE_DEFINE_ENUM(O_CLOEXEC); 278 + 333 279 #define show_open_flags(flags) \ 334 - __print_flags((unsigned long)flags, "|", \ 280 + __print_flags(flags, "|", \ 281 + { O_WRONLY, "O_WRONLY" }, \ 282 + { O_RDWR, "O_RDWR" }, \ 335 283 { O_CREAT, "O_CREAT" }, \ 336 284 { O_EXCL, "O_EXCL" }, \ 285 + { O_NOCTTY, "O_NOCTTY" }, \ 337 286 { O_TRUNC, "O_TRUNC" }, \ 338 287 { O_APPEND, "O_APPEND" }, \ 288 + { O_NONBLOCK, "O_NONBLOCK" }, \ 339 289 { O_DSYNC, "O_DSYNC" }, \ 340 290 { O_DIRECT, "O_DIRECT" }, \ 341 - { O_DIRECTORY, "O_DIRECTORY" }) 291 + { O_LARGEFILE, "O_LARGEFILE" }, \ 292 + { O_DIRECTORY, "O_DIRECTORY" }, \ 293 + { O_NOFOLLOW, "O_NOFOLLOW" }, \ 294 + { O_NOATIME, "O_NOATIME" }, \ 295 + { O_CLOEXEC, "O_CLOEXEC" }) 296 + 297 + TRACE_DEFINE_ENUM(FMODE_READ); 298 + TRACE_DEFINE_ENUM(FMODE_WRITE); 299 + TRACE_DEFINE_ENUM(FMODE_EXEC); 342 300 343 301 #define show_fmode_flags(mode) \ 344 302 __print_flags(mode, "|", \ ··· 384 286 TP_ARGS(dir, ctx, flags), 385 287 386 288 TP_STRUCT__entry( 387 - __field(unsigned int, flags) 289 + __field(unsigned long, flags) 388 290 __field(unsigned int, fmode) 389 291 __field(dev_t, dev) 390 292 __field(u64, dir) ··· 400 302 ), 401 303 402 304 TP_printk( 403 - "flags=%u (%s) fmode=%s name=%02x:%02x:%llu/%s", 305 + "flags=0x%lx (%s) fmode=%s name=%02x:%02x:%llu/%s", 404 306 __entry->flags, 405 307 show_open_flags(__entry->flags), 406 308 show_fmode_flags(__entry->fmode), ··· 421 323 TP_ARGS(dir, ctx, flags, error), 422 324 423 325 TP_STRUCT__entry( 424 - __field(int, error) 425 - __field(unsigned int, flags) 326 + __field(unsigned long, error) 327 + __field(unsigned long, flags) 426 328 __field(unsigned int, fmode) 427 329 __field(dev_t, dev) 428 330 __field(u64, dir) ··· 430 332 ), 431 333 432 334 TP_fast_assign( 433 - __entry->error = error; 335 + __entry->error = -error; 434 336 __entry->dev = dir->i_sb->s_dev; 435 337 __entry->dir = NFS_FILEID(dir); 436 338 __entry->flags = flags; ··· 439 341 ), 440 342 441 343 TP_printk( 442 - "error=%d flags=%u (%s) fmode=%s " 344 + "error=%ld (%s) flags=0x%lx (%s) fmode=%s " 443 345 "name=%02x:%02x:%llu/%s", 444 - __entry->error, 346 + -__entry->error, nfs_show_status(__entry->error), 445 347 __entry->flags, 446 348 show_open_flags(__entry->flags), 447 349 show_fmode_flags(__entry->fmode), ··· 461 363 TP_ARGS(dir, dentry, flags), 462 364 463 365 TP_STRUCT__entry( 464 - __field(unsigned int, flags) 366 + __field(unsigned long, flags) 465 367 __field(dev_t, dev) 466 368 __field(u64, dir) 467 369 __string(name, dentry->d_name.name) ··· 475 377 ), 476 378 477 379 TP_printk( 478 - "flags=%u (%s) name=%02x:%02x:%llu/%s", 380 + "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", 479 381 __entry->flags, 480 382 show_open_flags(__entry->flags), 481 383 MAJOR(__entry->dev), MINOR(__entry->dev), ··· 495 397 TP_ARGS(dir, dentry, flags, error), 496 398 497 399 TP_STRUCT__entry( 498 - __field(int, error) 499 - __field(unsigned int, flags) 400 + __field(unsigned long, error) 401 + __field(unsigned long, flags) 500 402 __field(dev_t, dev) 501 403 __field(u64, dir) 502 404 __string(name, dentry->d_name.name) 503 405 ), 504 406 505 407 TP_fast_assign( 506 - __entry->error = error; 408 + __entry->error = -error; 507 409 __entry->dev = dir->i_sb->s_dev; 508 410 __entry->dir = NFS_FILEID(dir); 509 411 __entry->flags = flags; ··· 511 413 ), 512 414 513 415 TP_printk( 514 - "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s", 515 - __entry->error, 416 + "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", 417 + -__entry->error, nfs_show_status(__entry->error), 516 418 __entry->flags, 517 419 show_open_flags(__entry->flags), 518 420 MAJOR(__entry->dev), MINOR(__entry->dev), ··· 567 469 TP_ARGS(dir, dentry, error), 568 470 569 471 TP_STRUCT__entry( 570 - __field(int, error) 472 + __field(unsigned long, error) 571 473 __field(dev_t, dev) 572 474 __field(u64, dir) 573 475 __string(name, dentry->d_name.name) ··· 576 478 TP_fast_assign( 577 479 __entry->dev = dir->i_sb->s_dev; 578 480 __entry->dir = NFS_FILEID(dir); 579 - __entry->error = error; 481 + __entry->error = error < 0 ? -error : 0; 580 482 __assign_str(name, dentry->d_name.name); 581 483 ), 582 484 583 485 TP_printk( 584 - "error=%d name=%02x:%02x:%llu/%s", 585 - __entry->error, 486 + "error=%ld (%s) name=%02x:%02x:%llu/%s", 487 + -__entry->error, nfs_show_status(__entry->error), 586 488 MAJOR(__entry->dev), MINOR(__entry->dev), 587 489 (unsigned long long)__entry->dir, 588 490 __get_str(name) ··· 655 557 TP_ARGS(inode, dir, dentry, error), 656 558 657 559 TP_STRUCT__entry( 658 - __field(int, error) 560 + __field(unsigned long, error) 659 561 __field(dev_t, dev) 660 562 __field(u64, fileid) 661 563 __field(u64, dir) ··· 666 568 __entry->dev = inode->i_sb->s_dev; 667 569 __entry->fileid = NFS_FILEID(inode); 668 570 __entry->dir = NFS_FILEID(dir); 669 - __entry->error = error; 571 + __entry->error = error < 0 ? -error : 0; 670 572 __assign_str(name, dentry->d_name.name); 671 573 ), 672 574 673 575 TP_printk( 674 - "error=%d fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", 675 - __entry->error, 576 + "error=%ld (%s) fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", 577 + -__entry->error, nfs_show_status(__entry->error), 676 578 MAJOR(__entry->dev), MINOR(__entry->dev), 677 579 __entry->fileid, 678 580 MAJOR(__entry->dev), MINOR(__entry->dev), ··· 740 642 741 643 TP_STRUCT__entry( 742 644 __field(dev_t, dev) 743 - __field(int, error) 645 + __field(unsigned long, error) 744 646 __field(u64, old_dir) 745 647 __string(old_name, old_dentry->d_name.name) 746 648 __field(u64, new_dir) ··· 749 651 750 652 TP_fast_assign( 751 653 __entry->dev = old_dir->i_sb->s_dev; 654 + __entry->error = -error; 752 655 __entry->old_dir = NFS_FILEID(old_dir); 753 656 __entry->new_dir = NFS_FILEID(new_dir); 754 - __entry->error = error; 755 657 __assign_str(old_name, old_dentry->d_name.name); 756 658 __assign_str(new_name, new_dentry->d_name.name); 757 659 ), 758 660 759 661 TP_printk( 760 - "error=%d old_name=%02x:%02x:%llu/%s " 662 + "error=%ld (%s) old_name=%02x:%02x:%llu/%s " 761 663 "new_name=%02x:%02x:%llu/%s", 762 - __entry->error, 664 + -__entry->error, nfs_show_status(__entry->error), 763 665 MAJOR(__entry->dev), MINOR(__entry->dev), 764 666 (unsigned long long)__entry->old_dir, 765 667 __get_str(old_name), ··· 795 697 796 698 TP_STRUCT__entry( 797 699 __field(dev_t, dev) 798 - __field(int, error) 700 + __field(unsigned long, error) 799 701 __field(u64, dir) 800 702 __dynamic_array(char, name, data->args.name.len + 1) 801 703 ), ··· 805 707 size_t len = data->args.name.len; 806 708 __entry->dev = dir->i_sb->s_dev; 807 709 __entry->dir = NFS_FILEID(dir); 808 - __entry->error = error; 710 + __entry->error = -error; 809 711 memcpy(__get_str(name), 810 712 data->args.name.name, len); 811 713 __get_str(name)[len] = 0; 812 714 ), 813 715 814 716 TP_printk( 815 - "error=%d name=%02x:%02x:%llu/%s", 816 - __entry->error, 717 + "error=%ld (%s) name=%02x:%02x:%llu/%s", 718 + -__entry->error, nfs_show_status(__entry->error), 817 719 MAJOR(__entry->dev), MINOR(__entry->dev), 818 720 (unsigned long long)__entry->dir, 819 721 __get_str(name) ··· 1072 974 TRACE_DEFINE_ENUM(NFSERR_NOENT); 1073 975 TRACE_DEFINE_ENUM(NFSERR_IO); 1074 976 TRACE_DEFINE_ENUM(NFSERR_NXIO); 977 + TRACE_DEFINE_ENUM(ECHILD); 978 + TRACE_DEFINE_ENUM(NFSERR_EAGAIN); 1075 979 TRACE_DEFINE_ENUM(NFSERR_ACCES); 1076 980 TRACE_DEFINE_ENUM(NFSERR_EXIST); 1077 981 TRACE_DEFINE_ENUM(NFSERR_XDEV); ··· 1085 985 TRACE_DEFINE_ENUM(NFSERR_NOSPC); 1086 986 TRACE_DEFINE_ENUM(NFSERR_ROFS); 1087 987 TRACE_DEFINE_ENUM(NFSERR_MLINK); 988 + TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP); 1088 989 TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); 1089 990 TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); 1090 991 TRACE_DEFINE_ENUM(NFSERR_DQUOT); ··· 1108 1007 { NFSERR_NOENT, "NOENT" }, \ 1109 1008 { NFSERR_IO, "IO" }, \ 1110 1009 { NFSERR_NXIO, "NXIO" }, \ 1010 + { ECHILD, "CHILD" }, \ 1011 + { NFSERR_EAGAIN, "AGAIN" }, \ 1111 1012 { NFSERR_ACCES, "ACCES" }, \ 1112 1013 { NFSERR_EXIST, "EXIST" }, \ 1113 1014 { NFSERR_XDEV, "XDEV" }, \ ··· 1121 1018 { NFSERR_NOSPC, "NOSPC" }, \ 1122 1019 { NFSERR_ROFS, "ROFS" }, \ 1123 1020 { NFSERR_MLINK, "MLINK" }, \ 1021 + { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \ 1124 1022 { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ 1125 1023 { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ 1126 1024 { NFSERR_DQUOT, "DQUOT" }, \ ··· 1139 1035 1140 1036 TRACE_EVENT(nfs_xdr_status, 1141 1037 TP_PROTO( 1038 + const struct xdr_stream *xdr, 1142 1039 int error 1143 1040 ), 1144 1041 1145 - TP_ARGS(error), 1042 + TP_ARGS(xdr, error), 1146 1043 1147 1044 TP_STRUCT__entry( 1148 - __field(int, error) 1045 + __field(unsigned int, task_id) 1046 + __field(unsigned int, client_id) 1047 + __field(u32, xid) 1048 + __field(unsigned long, error) 1149 1049 ), 1150 1050 1151 1051 TP_fast_assign( 1052 + const struct rpc_rqst *rqstp = xdr->rqst; 1053 + const struct rpc_task *task = rqstp->rq_task; 1054 + 1055 + __entry->task_id = task->tk_pid; 1056 + __entry->client_id = task->tk_client->cl_clid; 1057 + __entry->xid = be32_to_cpu(rqstp->rq_xid); 1152 1058 __entry->error = error; 1153 1059 ), 1154 1060 1155 1061 TP_printk( 1156 - "error=%d (%s)", 1157 - __entry->error, nfs_show_status(__entry->error) 1062 + "task:%u@%d xid=0x%08x error=%ld (%s)", 1063 + __entry->task_id, __entry->client_id, __entry->xid, 1064 + -__entry->error, nfs_show_status(__entry->error) 1158 1065 ) 1159 1066 ); 1160 1067
+2 -4
fs/nfs/pagelist.c
··· 77 77 static inline struct nfs_page * 78 78 nfs_page_alloc(void) 79 79 { 80 - struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_NOIO); 80 + struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); 81 81 if (p) 82 82 INIT_LIST_HEAD(&p->wb_list); 83 83 return p; ··· 775 775 if (pagecount <= ARRAY_SIZE(pg_array->page_array)) 776 776 pg_array->pagevec = pg_array->page_array; 777 777 else { 778 - if (hdr->rw_mode == FMODE_WRITE) 779 - gfp_flags = GFP_NOIO; 780 778 pg_array->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags); 781 779 if (!pg_array->pagevec) { 782 780 pg_array->npages = 0; ··· 849 851 desc->pg_mirrors_dynamic = NULL; 850 852 if (mirror_count == 1) 851 853 return desc->pg_mirrors_static; 852 - ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_NOFS); 854 + ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL); 853 855 if (ret != NULL) { 854 856 for (i = 0; i < mirror_count; i++) 855 857 nfs_pageio_mirror_init(&ret[i], desc->pg_bsize);
+15 -5
fs/nfs/pnfs.c
··· 1890 1890 spin_unlock(&ino->i_lock); 1891 1891 lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding, 1892 1892 !atomic_read(&lo->plh_outstanding))); 1893 - if (IS_ERR(lseg) || !list_empty(&lo->plh_segs)) 1893 + if (IS_ERR(lseg)) 1894 1894 goto out_put_layout_hdr; 1895 1895 pnfs_put_layout_hdr(lo); 1896 1896 goto lookup_again; ··· 1915 1915 * stateid. 1916 1916 */ 1917 1917 if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { 1918 + int status; 1918 1919 1919 1920 /* 1920 1921 * The first layoutget for the file. Need to serialize per ··· 1935 1934 } 1936 1935 1937 1936 first = true; 1938 - if (nfs4_select_rw_stateid(ctx->state, 1937 + status = nfs4_select_rw_stateid(ctx->state, 1939 1938 iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, 1940 - NULL, &stateid, NULL) != 0) { 1939 + NULL, &stateid, NULL); 1940 + if (status != 0) { 1941 1941 trace_pnfs_update_layout(ino, pos, count, 1942 1942 iomode, lo, lseg, 1943 1943 PNFS_UPDATE_LAYOUT_INVALID_OPEN); 1944 - goto out_unlock; 1944 + if (status != -EAGAIN) 1945 + goto out_unlock; 1946 + spin_unlock(&ino->i_lock); 1947 + nfs4_schedule_stateid_recovery(server, ctx->state); 1948 + pnfs_clear_first_layoutget(lo); 1949 + pnfs_put_layout_hdr(lo); 1950 + goto lookup_again; 1945 1951 } 1946 1952 } else { 1947 1953 nfs4_stateid_copy(&stateid, &lo->plh_stateid); ··· 2037 2029 out_put_layout_hdr: 2038 2030 if (first) 2039 2031 pnfs_clear_first_layoutget(lo); 2032 + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 2033 + PNFS_UPDATE_LAYOUT_EXIT); 2040 2034 pnfs_put_layout_hdr(lo); 2041 2035 out: 2042 2036 dprintk("%s: inode %s/%llu pNFS layout segment %s for " ··· 2478 2468 wb_size, 2479 2469 IOMODE_RW, 2480 2470 false, 2481 - GFP_NOFS); 2471 + GFP_KERNEL); 2482 2472 if (IS_ERR(pgio->pg_lseg)) { 2483 2473 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 2484 2474 pgio->pg_lseg = NULL;
+40 -17
fs/nfs/super.c
··· 77 77 #define NFS_DEFAULT_VERSION 2 78 78 #endif 79 79 80 + #define NFS_MAX_CONNECTIONS 16 81 + 80 82 enum { 81 83 /* Mount options that take no arguments */ 82 84 Opt_soft, Opt_softerr, Opt_hard, ··· 110 108 Opt_nfsvers, 111 109 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, 112 110 Opt_addr, Opt_mountaddr, Opt_clientaddr, 111 + Opt_nconnect, 113 112 Opt_lookupcache, 114 113 Opt_fscache_uniq, 115 114 Opt_local_lock, ··· 183 180 { Opt_clientaddr, "clientaddr=%s" }, 184 181 { Opt_mounthost, "mounthost=%s" }, 185 182 { Opt_mountaddr, "mountaddr=%s" }, 183 + 184 + { Opt_nconnect, "nconnect=%s" }, 186 185 187 186 { Opt_lookupcache, "lookupcache=%s" }, 188 187 { Opt_fscache_uniq, "fsc=%s" }, ··· 587 582 } 588 583 default: 589 584 if (showdefaults) 590 - seq_printf(m, ",mountaddr=unspecified"); 585 + seq_puts(m, ",mountaddr=unspecified"); 591 586 } 592 587 593 588 if (nfss->mountd_version || showdefaults) ··· 678 673 seq_printf(m, ",proto=%s", 679 674 rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); 680 675 rcu_read_unlock(); 676 + if (clp->cl_nconnect > 0) 677 + seq_printf(m, ",nconnect=%u", clp->cl_nconnect); 681 678 if (version == 4) { 682 679 if (nfss->port != NFS_PORT) 683 680 seq_printf(m, ",port=%u", nfss->port); ··· 697 690 nfs_show_nfsv4_options(m, nfss, showdefaults); 698 691 699 692 if (nfss->options & NFS_OPTION_FSCACHE) 700 - seq_printf(m, ",fsc"); 693 + seq_puts(m, ",fsc"); 701 694 702 695 if (nfss->options & NFS_OPTION_MIGRATION) 703 - seq_printf(m, ",migration"); 696 + seq_puts(m, ",migration"); 704 697 705 698 if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { 706 699 if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) 707 - seq_printf(m, ",lookupcache=none"); 700 + seq_puts(m, ",lookupcache=none"); 708 701 else 709 - seq_printf(m, ",lookupcache=pos"); 702 + seq_puts(m, ",lookupcache=pos"); 710 703 } 711 704 712 705 local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK; 713 706 local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL; 714 707 715 708 if (!local_flock && !local_fcntl) 716 - seq_printf(m, ",local_lock=none"); 709 + seq_puts(m, ",local_lock=none"); 717 710 else if (local_flock && local_fcntl) 718 - seq_printf(m, ",local_lock=all"); 711 + seq_puts(m, ",local_lock=all"); 719 712 else if (local_flock) 720 - seq_printf(m, ",local_lock=flock"); 713 + seq_puts(m, ",local_lock=flock"); 721 714 else 722 - seq_printf(m, ",local_lock=posix"); 715 + seq_puts(m, ",local_lock=posix"); 723 716 } 724 717 725 718 /* ··· 742 735 EXPORT_SYMBOL_GPL(nfs_show_options); 743 736 744 737 #if IS_ENABLED(CONFIG_NFS_V4) 738 + static void show_lease(struct seq_file *m, struct nfs_server *server) 739 + { 740 + struct nfs_client *clp = server->nfs_client; 741 + unsigned long expire; 742 + 743 + seq_printf(m, ",lease_time=%ld", clp->cl_lease_time / HZ); 744 + expire = clp->cl_last_renewal + clp->cl_lease_time; 745 + seq_printf(m, ",lease_expired=%ld", 746 + time_after(expire, jiffies) ? 0 : (jiffies - expire) / HZ); 747 + } 745 748 #ifdef CONFIG_NFS_V4_1 746 749 static void show_sessions(struct seq_file *m, struct nfs_server *server) 747 750 { 748 751 if (nfs4_has_session(server->nfs_client)) 749 - seq_printf(m, ",sessions"); 752 + seq_puts(m, ",sessions"); 750 753 } 751 754 #else 752 755 static void show_sessions(struct seq_file *m, struct nfs_server *server) {} ··· 833 816 /* 834 817 * Display all mount option settings 835 818 */ 836 - seq_printf(m, "\n\topts:\t"); 819 + seq_puts(m, "\n\topts:\t"); 837 820 seq_puts(m, sb_rdonly(root->d_sb) ? "ro" : "rw"); 838 821 seq_puts(m, root->d_sb->s_flags & SB_SYNCHRONOUS ? ",sync" : ""); 839 822 seq_puts(m, root->d_sb->s_flags & SB_NOATIME ? ",noatime" : ""); ··· 844 827 845 828 show_implementation_id(m, nfss); 846 829 847 - seq_printf(m, "\n\tcaps:\t"); 830 + seq_puts(m, "\n\tcaps:\t"); 848 831 seq_printf(m, "caps=0x%x", nfss->caps); 849 832 seq_printf(m, ",wtmult=%u", nfss->wtmult); 850 833 seq_printf(m, ",dtsize=%u", nfss->dtsize); ··· 853 836 854 837 #if IS_ENABLED(CONFIG_NFS_V4) 855 838 if (nfss->nfs_client->rpc_ops->version == 4) { 856 - seq_printf(m, "\n\tnfsv4:\t"); 839 + seq_puts(m, "\n\tnfsv4:\t"); 857 840 seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); 858 841 seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); 859 842 seq_printf(m, ",bm2=0x%x", nfss->attr_bitmask[2]); 860 843 seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); 861 844 show_sessions(m, nfss); 862 845 show_pnfs(m, nfss); 846 + show_lease(m, nfss); 863 847 } 864 848 #endif 865 849 ··· 892 874 preempt_enable(); 893 875 } 894 876 895 - seq_printf(m, "\n\tevents:\t"); 877 + seq_puts(m, "\n\tevents:\t"); 896 878 for (i = 0; i < __NFSIOS_COUNTSMAX; i++) 897 879 seq_printf(m, "%lu ", totals.events[i]); 898 - seq_printf(m, "\n\tbytes:\t"); 880 + seq_puts(m, "\n\tbytes:\t"); 899 881 for (i = 0; i < __NFSIOS_BYTESMAX; i++) 900 882 seq_printf(m, "%Lu ", totals.bytes[i]); 901 883 #ifdef CONFIG_NFS_FSCACHE 902 884 if (nfss->options & NFS_OPTION_FSCACHE) { 903 - seq_printf(m, "\n\tfsc:\t"); 885 + seq_puts(m, "\n\tfsc:\t"); 904 886 for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) 905 887 seq_printf(m, "%Lu ", totals.fscache[i]); 906 888 } 907 889 #endif 908 - seq_printf(m, "\n"); 890 + seq_putc(m, '\n'); 909 891 910 892 rpc_clnt_show_stats(m, nfss->client); 911 893 ··· 1566 1548 kfree(string); 1567 1549 if (mnt->mount_server.addrlen == 0) 1568 1550 goto out_invalid_address; 1551 + break; 1552 + case Opt_nconnect: 1553 + if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) 1554 + goto out_invalid_value; 1555 + mnt->nfs_server.nconnect = option; 1569 1556 break; 1570 1557 case Opt_lookupcache: 1571 1558 string = match_strdup(args);
+187
fs/nfs/sysfs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2019 Hammerspace Inc 4 + */ 5 + 6 + #include <linux/module.h> 7 + #include <linux/kobject.h> 8 + #include <linux/sysfs.h> 9 + #include <linux/fs.h> 10 + #include <linux/slab.h> 11 + #include <linux/netdevice.h> 12 + #include <linux/string.h> 13 + #include <linux/nfs_fs.h> 14 + #include <linux/rcupdate.h> 15 + 16 + #include "nfs4_fs.h" 17 + #include "netns.h" 18 + #include "sysfs.h" 19 + 20 + struct kobject *nfs_client_kobj; 21 + static struct kset *nfs_client_kset; 22 + 23 + static void nfs_netns_object_release(struct kobject *kobj) 24 + { 25 + kfree(kobj); 26 + } 27 + 28 + static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type( 29 + struct kobject *kobj) 30 + { 31 + return &net_ns_type_operations; 32 + } 33 + 34 + static struct kobj_type nfs_netns_object_type = { 35 + .release = nfs_netns_object_release, 36 + .sysfs_ops = &kobj_sysfs_ops, 37 + .child_ns_type = nfs_netns_object_child_ns_type, 38 + }; 39 + 40 + static struct kobject *nfs_netns_object_alloc(const char *name, 41 + struct kset *kset, struct kobject *parent) 42 + { 43 + struct kobject *kobj; 44 + 45 + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); 46 + if (kobj) { 47 + kobj->kset = kset; 48 + if (kobject_init_and_add(kobj, &nfs_netns_object_type, 49 + parent, "%s", name) == 0) 50 + return kobj; 51 + kobject_put(kobj); 52 + } 53 + return NULL; 54 + } 55 + 56 + int nfs_sysfs_init(void) 57 + { 58 + nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj); 59 + if (!nfs_client_kset) 60 + return -ENOMEM; 61 + nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL); 62 + if (!nfs_client_kobj) { 63 + kset_unregister(nfs_client_kset); 64 + nfs_client_kset = NULL; 65 + return -ENOMEM; 66 + } 67 + return 0; 68 + } 69 + 70 + void nfs_sysfs_exit(void) 71 + { 72 + kobject_put(nfs_client_kobj); 73 + kset_unregister(nfs_client_kset); 74 + } 75 + 76 + static ssize_t nfs_netns_identifier_show(struct kobject *kobj, 77 + struct kobj_attribute *attr, char *buf) 78 + { 79 + struct nfs_netns_client *c = container_of(kobj, 80 + struct nfs_netns_client, 81 + kobject); 82 + return scnprintf(buf, PAGE_SIZE, "%s\n", c->identifier); 83 + } 84 + 85 + /* Strip trailing '\n' */ 86 + static size_t nfs_string_strip(const char *c, size_t len) 87 + { 88 + while (len > 0 && c[len-1] == '\n') 89 + --len; 90 + return len; 91 + } 92 + 93 + static ssize_t nfs_netns_identifier_store(struct kobject *kobj, 94 + struct kobj_attribute *attr, 95 + const char *buf, size_t count) 96 + { 97 + struct nfs_netns_client *c = container_of(kobj, 98 + struct nfs_netns_client, 99 + kobject); 100 + const char *old; 101 + char *p; 102 + size_t len; 103 + 104 + len = nfs_string_strip(buf, min_t(size_t, count, CONTAINER_ID_MAXLEN)); 105 + if (!len) 106 + return 0; 107 + p = kmemdup_nul(buf, len, GFP_KERNEL); 108 + if (!p) 109 + return -ENOMEM; 110 + old = xchg(&c->identifier, p); 111 + if (old) { 112 + synchronize_rcu(); 113 + kfree(old); 114 + } 115 + return count; 116 + } 117 + 118 + static void nfs_netns_client_release(struct kobject *kobj) 119 + { 120 + struct nfs_netns_client *c = container_of(kobj, 121 + struct nfs_netns_client, 122 + kobject); 123 + 124 + if (c->identifier) 125 + kfree(c->identifier); 126 + kfree(c); 127 + } 128 + 129 + static const void *nfs_netns_client_namespace(struct kobject *kobj) 130 + { 131 + return container_of(kobj, struct nfs_netns_client, kobject)->net; 132 + } 133 + 134 + static struct kobj_attribute nfs_netns_client_id = __ATTR(identifier, 135 + 0644, nfs_netns_identifier_show, nfs_netns_identifier_store); 136 + 137 + static struct attribute *nfs_netns_client_attrs[] = { 138 + &nfs_netns_client_id.attr, 139 + NULL, 140 + }; 141 + 142 + static struct kobj_type nfs_netns_client_type = { 143 + .release = nfs_netns_client_release, 144 + .default_attrs = nfs_netns_client_attrs, 145 + .sysfs_ops = &kobj_sysfs_ops, 146 + .namespace = nfs_netns_client_namespace, 147 + }; 148 + 149 + static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, 150 + struct net *net) 151 + { 152 + struct nfs_netns_client *p; 153 + 154 + p = kzalloc(sizeof(*p), GFP_KERNEL); 155 + if (p) { 156 + p->net = net; 157 + p->kobject.kset = nfs_client_kset; 158 + if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type, 159 + parent, "nfs_client") == 0) 160 + return p; 161 + kobject_put(&p->kobject); 162 + } 163 + return NULL; 164 + } 165 + 166 + void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net) 167 + { 168 + struct nfs_netns_client *clp; 169 + 170 + clp = nfs_netns_client_alloc(nfs_client_kobj, net); 171 + if (clp) { 172 + netns->nfs_client = clp; 173 + kobject_uevent(&clp->kobject, KOBJ_ADD); 174 + } 175 + } 176 + 177 + void nfs_netns_sysfs_destroy(struct nfs_net *netns) 178 + { 179 + struct nfs_netns_client *clp = netns->nfs_client; 180 + 181 + if (clp) { 182 + kobject_uevent(&clp->kobject, KOBJ_REMOVE); 183 + kobject_del(&clp->kobject); 184 + kobject_put(&clp->kobject); 185 + netns->nfs_client = NULL; 186 + } 187 + }
+25
fs/nfs/sysfs.h
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2019 Hammerspace Inc 4 + */ 5 + 6 + #ifndef __NFS_SYSFS_H 7 + #define __NFS_SYSFS_H 8 + 9 + #define CONTAINER_ID_MAXLEN (64) 10 + 11 + struct nfs_netns_client { 12 + struct kobject kobject; 13 + struct net *net; 14 + const char *identifier; 15 + }; 16 + 17 + extern struct kobject *nfs_client_kobj; 18 + 19 + extern int nfs_sysfs_init(void); 20 + extern void nfs_sysfs_exit(void); 21 + 22 + void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net); 23 + void nfs_netns_sysfs_destroy(struct nfs_net *netns); 24 + 25 + #endif
+2 -5
fs/nfs/write.c
··· 103 103 104 104 static struct nfs_pgio_header *nfs_writehdr_alloc(void) 105 105 { 106 - struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); 106 + struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL); 107 107 108 108 memset(p, 0, sizeof(*p)); 109 109 p->rw_mode = FMODE_WRITE; ··· 721 721 struct inode *inode = mapping->host; 722 722 struct nfs_pageio_descriptor pgio; 723 723 struct nfs_io_completion *ioc; 724 - unsigned int pflags = memalloc_nofs_save(); 725 724 int err; 726 725 727 726 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 728 727 729 - ioc = nfs_io_completion_alloc(GFP_NOFS); 728 + ioc = nfs_io_completion_alloc(GFP_KERNEL); 730 729 if (ioc) 731 730 nfs_io_completion_init(ioc, nfs_io_completion_commit, inode); 732 731 ··· 735 736 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 736 737 nfs_pageio_complete(&pgio); 737 738 nfs_io_completion_put(ioc); 738 - 739 - memalloc_nofs_restore(pflags); 740 739 741 740 if (err < 0) 742 741 goto out_err;
+1
include/linux/nfs4.h
··· 660 660 PNFS_UPDATE_LAYOUT_BLOCKED, 661 661 PNFS_UPDATE_LAYOUT_INVALID_OPEN, 662 662 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, 663 + PNFS_UPDATE_LAYOUT_EXIT, 663 664 }; 664 665 665 666 #define NFS4_OP_MAP_NUM_LONGS \
+2
include/linux/nfs_fs.h
··· 223 223 #define NFS_INO_INVALID_MTIME BIT(10) /* cached mtime is invalid */ 224 224 #define NFS_INO_INVALID_SIZE BIT(11) /* cached size is invalid */ 225 225 #define NFS_INO_INVALID_OTHER BIT(12) /* other attrs are invalid */ 226 + #define NFS_INO_DATA_INVAL_DEFER \ 227 + BIT(13) /* Deferred cache invalidation */ 226 228 227 229 #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ 228 230 | NFS_INO_INVALID_CTIME \
+1
include/linux/nfs_fs_sb.h
··· 58 58 struct nfs_subversion * cl_nfs_mod; /* pointer to nfs version module */ 59 59 60 60 u32 cl_minorversion;/* NFSv4 minorversion */ 61 + unsigned int cl_nconnect; /* Number of connections */ 61 62 const char * cl_principal; /* used for machine cred */ 62 63 63 64 #if IS_ENABLED(CONFIG_NFS_V4)
+1
include/linux/sunrpc/bc_xprt.h
··· 43 43 int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); 44 44 void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs); 45 45 void xprt_free_bc_rqst(struct rpc_rqst *req); 46 + unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt); 46 47 47 48 /* 48 49 * Determine if a shared backchannel is in use
+4
include/linux/sunrpc/clnt.h
··· 124 124 u32 prognumber; /* overrides program->number */ 125 125 u32 version; 126 126 rpc_authflavor_t authflavor; 127 + u32 nconnect; 127 128 unsigned long flags; 128 129 char *client_name; 129 130 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ ··· 164 163 void rpc_release_client(struct rpc_clnt *); 165 164 void rpc_task_release_transport(struct rpc_task *); 166 165 void rpc_task_release_client(struct rpc_task *); 166 + struct rpc_xprt *rpc_task_get_xprt(struct rpc_clnt *clnt, 167 + struct rpc_xprt *xprt); 167 168 168 169 int rpcb_create_local(struct net *); 169 170 void rpcb_put_local(struct net *); ··· 194 191 struct net * rpc_net_ns(struct rpc_clnt *); 195 192 size_t rpc_max_payload(struct rpc_clnt *); 196 193 size_t rpc_max_bc_payload(struct rpc_clnt *); 194 + unsigned int rpc_num_bc_slots(struct rpc_clnt *); 197 195 void rpc_force_rebind(struct rpc_clnt *); 198 196 size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); 199 197 const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
+6 -1
include/linux/sunrpc/metrics.h
··· 30 30 #include <linux/ktime.h> 31 31 #include <linux/spinlock.h> 32 32 33 - #define RPC_IOSTATS_VERS "1.0" 33 + #define RPC_IOSTATS_VERS "1.1" 34 34 35 35 struct rpc_iostats { 36 36 spinlock_t om_lock; ··· 66 66 ktime_t om_queue, /* queued for xmit */ 67 67 om_rtt, /* RPC RTT */ 68 68 om_execute; /* RPC execution */ 69 + /* 70 + * The count of operations that complete with tk_status < 0. 71 + * These statuses usually indicate error conditions. 72 + */ 73 + unsigned long om_error_status; 69 74 } ____cacheline_aligned; 70 75 71 76 struct rpc_task;
+3 -1
include/linux/sunrpc/sched.h
··· 126 126 #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ 127 127 #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ 128 128 #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ 129 + #define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ 129 130 #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ 130 131 #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ 131 132 #define RPC_TASK_SENT 0x0800 /* message was sent */ ··· 184 183 #define RPC_NR_PRIORITY (1 + RPC_PRIORITY_PRIVILEGED - RPC_PRIORITY_LOW) 185 184 186 185 struct rpc_timer { 187 - struct timer_list timer; 188 186 struct list_head list; 187 + unsigned long expires; 188 + struct delayed_work dwork; 189 189 }; 190 190 191 191 /*
+8 -2
include/linux/sunrpc/xprt.h
··· 158 158 int (*bc_setup)(struct rpc_xprt *xprt, 159 159 unsigned int min_reqs); 160 160 size_t (*bc_maxpayload)(struct rpc_xprt *xprt); 161 + unsigned int (*bc_num_slots)(struct rpc_xprt *xprt); 161 162 void (*bc_free_rqst)(struct rpc_rqst *rqst); 162 163 void (*bc_destroy)(struct rpc_xprt *xprt, 163 164 unsigned int max_reqs); ··· 239 238 /* 240 239 * Send stuff 241 240 */ 241 + atomic_long_t queuelen; 242 242 spinlock_t transport_lock; /* lock transport info */ 243 243 spinlock_t reserve_lock; /* lock slot table */ 244 244 spinlock_t queue_lock; /* send/receive queue lock */ ··· 252 250 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 253 251 struct svc_serv *bc_serv; /* The RPC service which will */ 254 252 /* process the callback */ 255 - int bc_alloc_count; /* Total number of preallocs */ 256 - atomic_t bc_free_slots; 253 + unsigned int bc_alloc_max; 254 + unsigned int bc_alloc_count; /* Total number of preallocs */ 255 + atomic_t bc_slot_count; /* Number of allocated slots */ 257 256 spinlock_t bc_pa_lock; /* Protects the preallocated 258 257 * items */ 259 258 struct list_head bc_pa_list; /* List of preallocated ··· 337 334 */ 338 335 struct rpc_xprt *xprt_create_transport(struct xprt_create *args); 339 336 void xprt_connect(struct rpc_task *task); 337 + unsigned long xprt_reconnect_delay(const struct rpc_xprt *xprt); 338 + void xprt_reconnect_backoff(struct rpc_xprt *xprt, 339 + unsigned long init_to); 340 340 void xprt_reserve(struct rpc_task *task); 341 341 void xprt_retry_reserve(struct rpc_task *task); 342 342 int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
+2
include/linux/sunrpc/xprtmultipath.h
··· 15 15 struct kref xps_kref; 16 16 17 17 unsigned int xps_nxprts; 18 + unsigned int xps_nactive; 19 + atomic_long_t xps_queuelen; 18 20 struct list_head xps_xprt_list; 19 21 20 22 struct net * xps_net;
+5
include/linux/sunrpc/xprtsock.h
··· 56 56 */ 57 57 unsigned long sock_state; 58 58 struct delayed_work connect_worker; 59 + struct work_struct error_worker; 59 60 struct work_struct recv_worker; 60 61 struct mutex recv_mutex; 61 62 struct sockaddr_storage srcaddr; ··· 85 84 #define XPRT_SOCK_CONNECTING 1U 86 85 #define XPRT_SOCK_DATA_READY (2) 87 86 #define XPRT_SOCK_UPD_TIMEOUT (3) 87 + #define XPRT_SOCK_WAKE_ERROR (4) 88 + #define XPRT_SOCK_WAKE_WRITE (5) 89 + #define XPRT_SOCK_WAKE_PENDING (6) 90 + #define XPRT_SOCK_WAKE_DISCONNECT (7) 88 91 89 92 #endif /* __KERNEL__ */ 90 93
+68 -22
include/trace/events/rpcrdma.h
··· 181 181 ), \ 182 182 TP_ARGS(task, mr, nsegs)) 183 183 184 - TRACE_DEFINE_ENUM(FRWR_IS_INVALID); 185 - TRACE_DEFINE_ENUM(FRWR_IS_VALID); 186 - TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR); 187 - TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI); 188 - 189 - #define xprtrdma_show_frwr_state(x) \ 190 - __print_symbolic(x, \ 191 - { FRWR_IS_INVALID, "INVALID" }, \ 192 - { FRWR_IS_VALID, "VALID" }, \ 193 - { FRWR_FLUSHED_FR, "FLUSHED_FR" }, \ 194 - { FRWR_FLUSHED_LI, "FLUSHED_LI" }) 195 - 196 184 DECLARE_EVENT_CLASS(xprtrdma_frwr_done, 197 185 TP_PROTO( 198 186 const struct ib_wc *wc, ··· 191 203 192 204 TP_STRUCT__entry( 193 205 __field(const void *, mr) 194 - __field(unsigned int, state) 195 206 __field(unsigned int, status) 196 207 __field(unsigned int, vendor_err) 197 208 ), 198 209 199 210 TP_fast_assign( 200 211 __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr); 201 - __entry->state = frwr->fr_state; 202 212 __entry->status = wc->status; 203 213 __entry->vendor_err = __entry->status ? wc->vendor_err : 0; 204 214 ), 205 215 206 216 TP_printk( 207 - "mr=%p state=%s: %s (%u/0x%x)", 208 - __entry->mr, xprtrdma_show_frwr_state(__entry->state), 209 - rdma_show_wc_status(__entry->status), 217 + "mr=%p: %s (%u/0x%x)", 218 + __entry->mr, rdma_show_wc_status(__entry->status), 210 219 __entry->status, __entry->vendor_err 211 220 ) 212 221 ); ··· 375 390 DEFINE_RXPRT_EVENT(xprtrdma_op_close); 376 391 DEFINE_RXPRT_EVENT(xprtrdma_op_connect); 377 392 393 + TRACE_EVENT(xprtrdma_op_set_cto, 394 + TP_PROTO( 395 + const struct rpcrdma_xprt *r_xprt, 396 + unsigned long connect, 397 + unsigned long reconnect 398 + ), 399 + 400 + TP_ARGS(r_xprt, connect, reconnect), 401 + 402 + TP_STRUCT__entry( 403 + __field(const void *, r_xprt) 404 + __field(unsigned long, connect) 405 + __field(unsigned long, reconnect) 406 + __string(addr, rpcrdma_addrstr(r_xprt)) 407 + __string(port, rpcrdma_portstr(r_xprt)) 408 + ), 409 + 410 + TP_fast_assign( 411 + __entry->r_xprt = r_xprt; 412 + __entry->connect = connect; 413 + __entry->reconnect = reconnect; 414 + __assign_str(addr, rpcrdma_addrstr(r_xprt)); 415 + __assign_str(port, rpcrdma_portstr(r_xprt)); 416 + ), 417 + 418 + TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu", 419 + __get_str(addr), __get_str(port), __entry->r_xprt, 420 + __entry->connect / HZ, __entry->reconnect / HZ 421 + ) 422 + ); 423 + 378 424 TRACE_EVENT(xprtrdma_qp_event, 379 425 TP_PROTO( 380 426 const struct rpcrdma_xprt *r_xprt, ··· 486 470 487 471 TRACE_EVENT(xprtrdma_marshal, 488 472 TP_PROTO( 489 - const struct rpc_rqst *rqst, 490 - unsigned int hdrlen, 473 + const struct rpcrdma_req *req, 491 474 unsigned int rtype, 492 475 unsigned int wtype 493 476 ), 494 477 495 - TP_ARGS(rqst, hdrlen, rtype, wtype), 478 + TP_ARGS(req, rtype, wtype), 496 479 497 480 TP_STRUCT__entry( 498 481 __field(unsigned int, task_id) ··· 506 491 ), 507 492 508 493 TP_fast_assign( 494 + const struct rpc_rqst *rqst = &req->rl_slot; 495 + 509 496 __entry->task_id = rqst->rq_task->tk_pid; 510 497 __entry->client_id = rqst->rq_task->tk_client->cl_clid; 511 498 __entry->xid = be32_to_cpu(rqst->rq_xid); 512 - __entry->hdrlen = hdrlen; 499 + __entry->hdrlen = req->rl_hdrbuf.len; 513 500 __entry->headlen = rqst->rq_snd_buf.head[0].iov_len; 514 501 __entry->pagelen = rqst->rq_snd_buf.page_len; 515 502 __entry->taillen = rqst->rq_snd_buf.tail[0].iov_len; ··· 529 512 ); 530 513 531 514 TRACE_EVENT(xprtrdma_marshal_failed, 515 + TP_PROTO(const struct rpc_rqst *rqst, 516 + int ret 517 + ), 518 + 519 + TP_ARGS(rqst, ret), 520 + 521 + TP_STRUCT__entry( 522 + __field(unsigned int, task_id) 523 + __field(unsigned int, client_id) 524 + __field(u32, xid) 525 + __field(int, ret) 526 + ), 527 + 528 + TP_fast_assign( 529 + __entry->task_id = rqst->rq_task->tk_pid; 530 + __entry->client_id = rqst->rq_task->tk_client->cl_clid; 531 + __entry->xid = be32_to_cpu(rqst->rq_xid); 532 + __entry->ret = ret; 533 + ), 534 + 535 + TP_printk("task:%u@%u xid=0x%08x: ret=%d", 536 + __entry->task_id, __entry->client_id, __entry->xid, 537 + __entry->ret 538 + ) 539 + ); 540 + 541 + TRACE_EVENT(xprtrdma_prepsend_failed, 532 542 TP_PROTO(const struct rpc_rqst *rqst, 533 543 int ret 534 544 ), ··· 603 559 const struct rpc_rqst *rqst = &req->rl_slot; 604 560 605 561 __entry->task_id = rqst->rq_task->tk_pid; 606 - __entry->client_id = rqst->rq_task->tk_client->cl_clid; 562 + __entry->client_id = rqst->rq_task->tk_client ? 563 + rqst->rq_task->tk_client->cl_clid : -1; 607 564 __entry->req = req; 608 565 __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; 609 566 __entry->signaled = req->rl_sendctx->sc_wr.send_flags & ··· 743 698 DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg); 744 699 DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li); 745 700 DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); 701 + DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_done); 746 702 747 703 TRACE_EVENT(xprtrdma_frwr_alloc, 748 704 TP_PROTO(
+1 -1
net/sunrpc/Kconfig
··· 35 35 36 36 If unsure, say Y. 37 37 38 - config CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES 38 + config SUNRPC_DISABLE_INSECURE_ENCTYPES 39 39 bool "Secure RPC: Disable insecure Kerberos encryption types" 40 40 depends on RPCSEC_GSS_KRB5 41 41 default n
+22 -18
net/sunrpc/backchannel_rqst.c
··· 31 31 #define RPCDBG_FACILITY RPCDBG_TRANS 32 32 #endif 33 33 34 + #define BC_MAX_SLOTS 64U 35 + 36 + unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt) 37 + { 38 + return BC_MAX_SLOTS; 39 + } 40 + 34 41 /* 35 42 * Helper routines that track the number of preallocation elements 36 43 * on the transport. 37 44 */ 38 45 static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) 39 46 { 40 - return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots); 41 - } 42 - 43 - static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) 44 - { 45 - atomic_add(n, &xprt->bc_free_slots); 46 - xprt->bc_alloc_count += n; 47 - } 48 - 49 - static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) 50 - { 51 - atomic_sub(n, &xprt->bc_free_slots); 52 - return xprt->bc_alloc_count -= n; 47 + return xprt->bc_alloc_count < xprt->bc_alloc_max; 53 48 } 54 49 55 50 /* ··· 140 145 141 146 dprintk("RPC: setup backchannel transport\n"); 142 147 148 + if (min_reqs > BC_MAX_SLOTS) 149 + min_reqs = BC_MAX_SLOTS; 150 + 143 151 /* 144 152 * We use a temporary list to keep track of the preallocated 145 153 * buffers. Once we're done building the list we splice it ··· 170 172 */ 171 173 spin_lock(&xprt->bc_pa_lock); 172 174 list_splice(&tmp_list, &xprt->bc_pa_list); 173 - xprt_inc_alloc_count(xprt, min_reqs); 175 + xprt->bc_alloc_count += min_reqs; 176 + xprt->bc_alloc_max += min_reqs; 177 + atomic_add(min_reqs, &xprt->bc_slot_count); 174 178 spin_unlock(&xprt->bc_pa_lock); 175 179 176 180 dprintk("RPC: setup backchannel transport done\n"); ··· 220 220 goto out; 221 221 222 222 spin_lock_bh(&xprt->bc_pa_lock); 223 - xprt_dec_alloc_count(xprt, max_reqs); 223 + xprt->bc_alloc_max -= max_reqs; 224 224 list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { 225 225 dprintk("RPC: req=%p\n", req); 226 226 list_del(&req->rq_bc_pa_list); 227 227 xprt_free_allocation(req); 228 + xprt->bc_alloc_count--; 229 + atomic_dec(&xprt->bc_slot_count); 228 230 if (--max_reqs == 0) 229 231 break; 230 232 } ··· 243 241 struct rpc_rqst *req = NULL; 244 242 245 243 dprintk("RPC: allocate a backchannel request\n"); 246 - if (atomic_read(&xprt->bc_free_slots) <= 0) 247 - goto not_found; 248 244 if (list_empty(&xprt->bc_pa_list)) { 249 245 if (!new) 250 246 goto not_found; 247 + if (atomic_read(&xprt->bc_slot_count) >= BC_MAX_SLOTS) 248 + goto not_found; 251 249 list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list); 252 250 xprt->bc_alloc_count++; 251 + atomic_inc(&xprt->bc_slot_count); 253 252 } 254 253 req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, 255 254 rq_bc_pa_list); ··· 294 291 if (xprt_need_to_requeue(xprt)) { 295 292 list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); 296 293 xprt->bc_alloc_count++; 294 + atomic_inc(&xprt->bc_slot_count); 297 295 req = NULL; 298 296 } 299 297 spin_unlock_bh(&xprt->bc_pa_lock); ··· 361 357 362 358 spin_lock(&xprt->bc_pa_lock); 363 359 list_del(&req->rq_bc_pa_list); 364 - xprt_dec_alloc_count(xprt, 1); 360 + xprt->bc_alloc_count--; 365 361 spin_unlock(&xprt->bc_pa_lock); 366 362 367 363 req->rq_private_buf.len = copied;
+88 -7
net/sunrpc/clnt.c
··· 528 528 .bc_xprt = args->bc_xprt, 529 529 }; 530 530 char servername[48]; 531 + struct rpc_clnt *clnt; 532 + int i; 531 533 532 534 if (args->bc_xprt) { 533 535 WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC)); ··· 592 590 if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) 593 591 xprt->resvport = 0; 594 592 595 - return rpc_create_xprt(args, xprt); 593 + clnt = rpc_create_xprt(args, xprt); 594 + if (IS_ERR(clnt) || args->nconnect <= 1) 595 + return clnt; 596 + 597 + for (i = 0; i < args->nconnect - 1; i++) { 598 + if (rpc_clnt_add_xprt(clnt, &xprtargs, NULL, NULL) < 0) 599 + break; 600 + } 601 + return clnt; 596 602 } 597 603 EXPORT_SYMBOL_GPL(rpc_create); 598 604 ··· 978 968 } 979 969 EXPORT_SYMBOL_GPL(rpc_bind_new_program); 980 970 971 + struct rpc_xprt * 972 + rpc_task_get_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) 973 + { 974 + struct rpc_xprt_switch *xps; 975 + 976 + if (!xprt) 977 + return NULL; 978 + rcu_read_lock(); 979 + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); 980 + atomic_long_inc(&xps->xps_queuelen); 981 + rcu_read_unlock(); 982 + atomic_long_inc(&xprt->queuelen); 983 + 984 + return xprt; 985 + } 986 + 987 + static void 988 + rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) 989 + { 990 + struct rpc_xprt_switch *xps; 991 + 992 + atomic_long_dec(&xprt->queuelen); 993 + rcu_read_lock(); 994 + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); 995 + atomic_long_dec(&xps->xps_queuelen); 996 + rcu_read_unlock(); 997 + 998 + xprt_put(xprt); 999 + } 1000 + 981 1001 void rpc_task_release_transport(struct rpc_task *task) 982 1002 { 983 1003 struct rpc_xprt *xprt = task->tk_xprt; 984 1004 985 1005 if (xprt) { 986 1006 task->tk_xprt = NULL; 987 - xprt_put(xprt); 1007 + if (task->tk_client) 1008 + rpc_task_release_xprt(task->tk_client, xprt); 1009 + else 1010 + xprt_put(xprt); 988 1011 } 989 1012 } 990 1013 EXPORT_SYMBOL_GPL(rpc_task_release_transport); ··· 1026 983 { 1027 984 struct rpc_clnt *clnt = task->tk_client; 1028 985 986 + rpc_task_release_transport(task); 1029 987 if (clnt != NULL) { 1030 988 /* Remove from client task list */ 1031 989 spin_lock(&clnt->cl_lock); ··· 1036 992 1037 993 rpc_release_client(clnt); 1038 994 } 1039 - rpc_task_release_transport(task); 995 + } 996 + 997 + static struct rpc_xprt * 998 + rpc_task_get_first_xprt(struct rpc_clnt *clnt) 999 + { 1000 + struct rpc_xprt *xprt; 1001 + 1002 + rcu_read_lock(); 1003 + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); 1004 + rcu_read_unlock(); 1005 + return rpc_task_get_xprt(clnt, xprt); 1006 + } 1007 + 1008 + static struct rpc_xprt * 1009 + rpc_task_get_next_xprt(struct rpc_clnt *clnt) 1010 + { 1011 + return rpc_task_get_xprt(clnt, xprt_iter_get_next(&clnt->cl_xpi)); 1040 1012 } 1041 1013 1042 1014 static 1043 1015 void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) 1044 1016 { 1045 - if (!task->tk_xprt) 1046 - task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi); 1017 + if (task->tk_xprt) 1018 + return; 1019 + if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) 1020 + task->tk_xprt = rpc_task_get_first_xprt(clnt); 1021 + else 1022 + task->tk_xprt = rpc_task_get_next_xprt(clnt); 1047 1023 } 1048 1024 1049 1025 static ··· 1526 1462 } 1527 1463 EXPORT_SYMBOL_GPL(rpc_max_bc_payload); 1528 1464 1465 + unsigned int rpc_num_bc_slots(struct rpc_clnt *clnt) 1466 + { 1467 + struct rpc_xprt *xprt; 1468 + unsigned int ret; 1469 + 1470 + rcu_read_lock(); 1471 + xprt = rcu_dereference(clnt->cl_xprt); 1472 + ret = xprt->ops->bc_num_slots(xprt); 1473 + rcu_read_unlock(); 1474 + return ret; 1475 + } 1476 + EXPORT_SYMBOL_GPL(rpc_num_bc_slots); 1477 + 1529 1478 /** 1530 1479 * rpc_force_rebind - force transport to check that remote port is unchanged 1531 1480 * @clnt: client to rebind ··· 1865 1788 req->rq_snd_buf.head[0].iov_len = 0; 1866 1789 xdr_init_encode(&xdr, &req->rq_snd_buf, 1867 1790 req->rq_snd_buf.head[0].iov_base, req); 1791 + xdr_free_bvec(&req->rq_snd_buf); 1868 1792 if (rpc_encode_header(task, &xdr)) 1869 1793 return; 1870 1794 ··· 1905 1827 rpc_call_rpcerror(task, task->tk_status); 1906 1828 } 1907 1829 return; 1908 - } else { 1909 - xprt_request_prepare(task->tk_rqstp); 1910 1830 } 1911 1831 1912 1832 /* Add task to reply queue before transmission to avoid races */ ··· 2772 2696 return -ENOMEM; 2773 2697 data->xps = xprt_switch_get(xps); 2774 2698 data->xprt = xprt_get(xprt); 2699 + if (rpc_xprt_switch_has_addr(data->xps, (struct sockaddr *)&xprt->addr)) { 2700 + rpc_cb_add_xprt_release(data); 2701 + goto success; 2702 + } 2775 2703 2776 2704 task = rpc_call_null_helper(clnt, xprt, NULL, 2777 2705 RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS, ··· 2783 2703 if (IS_ERR(task)) 2784 2704 return PTR_ERR(task); 2785 2705 rpc_put_task(task); 2706 + success: 2786 2707 return 1; 2787 2708 } 2788 2709 EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
+29 -23
net/sunrpc/debugfs.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 - /** 2 + /* 3 3 * debugfs interface for sunrpc 4 4 * 5 5 * (c) 2014 Jeff Layton <jlayton@primarydata.com> ··· 117 117 .release = tasks_release, 118 118 }; 119 119 120 + static int do_xprt_debugfs(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *numv) 121 + { 122 + int len; 123 + char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */ 124 + char link[9]; /* enough for 8 hex digits + NULL */ 125 + int *nump = numv; 126 + 127 + if (IS_ERR_OR_NULL(xprt->debugfs)) 128 + return 0; 129 + len = snprintf(name, sizeof(name), "../../rpc_xprt/%s", 130 + xprt->debugfs->d_name.name); 131 + if (len > sizeof(name)) 132 + return -1; 133 + if (*nump == 0) 134 + strcpy(link, "xprt"); 135 + else { 136 + len = snprintf(link, sizeof(link), "xprt%d", *nump); 137 + if (len > sizeof(link)) 138 + return -1; 139 + } 140 + debugfs_create_symlink(link, clnt->cl_debugfs, name); 141 + (*nump)++; 142 + return 0; 143 + } 144 + 120 145 void 121 146 rpc_clnt_debugfs_register(struct rpc_clnt *clnt) 122 147 { 123 148 int len; 124 - char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */ 125 - struct rpc_xprt *xprt; 149 + char name[9]; /* enough for 8 hex digits + NULL */ 150 + int xprtnum = 0; 126 151 127 152 len = snprintf(name, sizeof(name), "%x", clnt->cl_clid); 128 153 if (len >= sizeof(name)) ··· 160 135 debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs, clnt, 161 136 &tasks_fops); 162 137 163 - rcu_read_lock(); 164 - xprt = rcu_dereference(clnt->cl_xprt); 165 - /* no "debugfs" dentry? Don't bother with the symlink. */ 166 - if (IS_ERR_OR_NULL(xprt->debugfs)) { 167 - rcu_read_unlock(); 168 - return; 169 - } 170 - len = snprintf(name, sizeof(name), "../../rpc_xprt/%s", 171 - xprt->debugfs->d_name.name); 172 - rcu_read_unlock(); 173 - 174 - if (len >= sizeof(name)) 175 - goto out_err; 176 - 177 - debugfs_create_symlink("xprt", clnt->cl_debugfs, name); 178 - 179 - return; 180 - out_err: 181 - debugfs_remove_recursive(clnt->cl_debugfs); 182 - clnt->cl_debugfs = NULL; 138 + rpc_clnt_iterate_for_each_xprt(clnt, do_xprt_debugfs, &xprtnum); 183 139 } 184 140 185 141 void
+49 -32
net/sunrpc/sched.c
··· 23 23 #include <linux/sched/mm.h> 24 24 25 25 #include <linux/sunrpc/clnt.h> 26 + #include <linux/sunrpc/metrics.h> 26 27 27 28 #include "sunrpc.h" 28 29 ··· 47 46 48 47 static void rpc_async_schedule(struct work_struct *); 49 48 static void rpc_release_task(struct rpc_task *task); 50 - static void __rpc_queue_timer_fn(struct timer_list *t); 49 + static void __rpc_queue_timer_fn(struct work_struct *); 51 50 52 51 /* 53 52 * RPC tasks sit here while waiting for conditions to improve. ··· 59 58 */ 60 59 struct workqueue_struct *rpciod_workqueue __read_mostly; 61 60 struct workqueue_struct *xprtiod_workqueue __read_mostly; 61 + EXPORT_SYMBOL_GPL(xprtiod_workqueue); 62 62 63 63 unsigned long 64 64 rpc_task_timeout(const struct rpc_task *task) ··· 89 87 task->tk_timeout = 0; 90 88 list_del(&task->u.tk_wait.timer_list); 91 89 if (list_empty(&queue->timer_list.list)) 92 - del_timer(&queue->timer_list.timer); 90 + cancel_delayed_work(&queue->timer_list.dwork); 93 91 } 94 92 95 93 static void 96 94 rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) 97 95 { 98 - timer_reduce(&queue->timer_list.timer, expires); 96 + unsigned long now = jiffies; 97 + queue->timer_list.expires = expires; 98 + if (time_before_eq(expires, now)) 99 + expires = 0; 100 + else 101 + expires -= now; 102 + mod_delayed_work(rpciod_workqueue, &queue->timer_list.dwork, expires); 99 103 } 100 104 101 105 /* ··· 115 107 task->tk_pid, jiffies_to_msecs(timeout - jiffies)); 116 108 117 109 task->tk_timeout = timeout; 118 - rpc_set_queue_timer(queue, timeout); 110 + if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires)) 111 + rpc_set_queue_timer(queue, timeout); 119 112 list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); 120 113 } 121 114 ··· 259 250 queue->maxpriority = nr_queues - 1; 260 251 rpc_reset_waitqueue_priority(queue); 261 252 queue->qlen = 0; 262 - timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0); 253 + queue->timer_list.expires = 0; 254 + INIT_DEFERRABLE_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn); 263 255 INIT_LIST_HEAD(&queue->timer_list.list); 264 256 rpc_assign_waitqueue_name(queue, qname); 265 257 } ··· 279 269 280 270 void rpc_destroy_wait_queue(struct rpc_wait_queue *queue) 281 271 { 282 - del_timer_sync(&queue->timer_list.timer); 272 + cancel_delayed_work_sync(&queue->timer_list.dwork); 283 273 } 284 274 EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); 285 275 ··· 434 424 /* 435 425 * Protect the queue operations. 436 426 */ 437 - spin_lock_bh(&q->lock); 427 + spin_lock(&q->lock); 438 428 __rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority); 439 - spin_unlock_bh(&q->lock); 429 + spin_unlock(&q->lock); 440 430 } 441 431 EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout); 442 432 ··· 452 442 /* 453 443 * Protect the queue operations. 454 444 */ 455 - spin_lock_bh(&q->lock); 445 + spin_lock(&q->lock); 456 446 __rpc_sleep_on_priority(q, task, task->tk_priority); 457 - spin_unlock_bh(&q->lock); 447 + spin_unlock(&q->lock); 458 448 } 459 449 EXPORT_SYMBOL_GPL(rpc_sleep_on); 460 450 ··· 468 458 /* 469 459 * Protect the queue operations. 470 460 */ 471 - spin_lock_bh(&q->lock); 461 + spin_lock(&q->lock); 472 462 __rpc_sleep_on_priority_timeout(q, task, timeout, priority); 473 - spin_unlock_bh(&q->lock); 463 + spin_unlock(&q->lock); 474 464 } 475 465 EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout); 476 466 ··· 485 475 /* 486 476 * Protect the queue operations. 487 477 */ 488 - spin_lock_bh(&q->lock); 478 + spin_lock(&q->lock); 489 479 __rpc_sleep_on_priority(q, task, priority); 490 - spin_unlock_bh(&q->lock); 480 + spin_unlock(&q->lock); 491 481 } 492 482 EXPORT_SYMBOL_GPL(rpc_sleep_on_priority); 493 483 ··· 565 555 { 566 556 if (!RPC_IS_QUEUED(task)) 567 557 return; 568 - spin_lock_bh(&queue->lock); 558 + spin_lock(&queue->lock); 569 559 rpc_wake_up_task_on_wq_queue_locked(wq, queue, task); 570 - spin_unlock_bh(&queue->lock); 560 + spin_unlock(&queue->lock); 571 561 } 572 562 573 563 /* ··· 577 567 { 578 568 if (!RPC_IS_QUEUED(task)) 579 569 return; 580 - spin_lock_bh(&queue->lock); 570 + spin_lock(&queue->lock); 581 571 rpc_wake_up_task_queue_locked(queue, task); 582 - spin_unlock_bh(&queue->lock); 572 + spin_unlock(&queue->lock); 583 573 } 584 574 EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); 585 575 ··· 612 602 { 613 603 if (!RPC_IS_QUEUED(task)) 614 604 return; 615 - spin_lock_bh(&queue->lock); 605 + spin_lock(&queue->lock); 616 606 rpc_wake_up_task_queue_set_status_locked(queue, task, status); 617 - spin_unlock_bh(&queue->lock); 607 + spin_unlock(&queue->lock); 618 608 } 619 609 620 610 /* ··· 677 667 678 668 dprintk("RPC: wake_up_first(%p \"%s\")\n", 679 669 queue, rpc_qname(queue)); 680 - spin_lock_bh(&queue->lock); 670 + spin_lock(&queue->lock); 681 671 task = __rpc_find_next_queued(queue); 682 672 if (task != NULL) 683 673 task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, 684 674 task, func, data); 685 - spin_unlock_bh(&queue->lock); 675 + spin_unlock(&queue->lock); 686 676 687 677 return task; 688 678 } ··· 721 711 { 722 712 struct list_head *head; 723 713 724 - spin_lock_bh(&queue->lock); 714 + spin_lock(&queue->lock); 725 715 head = &queue->tasks[queue->maxpriority]; 726 716 for (;;) { 727 717 while (!list_empty(head)) { ··· 735 725 break; 736 726 head--; 737 727 } 738 - spin_unlock_bh(&queue->lock); 728 + spin_unlock(&queue->lock); 739 729 } 740 730 EXPORT_SYMBOL_GPL(rpc_wake_up); 741 731 ··· 750 740 { 751 741 struct list_head *head; 752 742 753 - spin_lock_bh(&queue->lock); 743 + spin_lock(&queue->lock); 754 744 head = &queue->tasks[queue->maxpriority]; 755 745 for (;;) { 756 746 while (!list_empty(head)) { ··· 765 755 break; 766 756 head--; 767 757 } 768 - spin_unlock_bh(&queue->lock); 758 + spin_unlock(&queue->lock); 769 759 } 770 760 EXPORT_SYMBOL_GPL(rpc_wake_up_status); 771 761 772 - static void __rpc_queue_timer_fn(struct timer_list *t) 762 + static void __rpc_queue_timer_fn(struct work_struct *work) 773 763 { 774 - struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer); 764 + struct rpc_wait_queue *queue = container_of(work, 765 + struct rpc_wait_queue, 766 + timer_list.dwork.work); 775 767 struct rpc_task *task, *n; 776 768 unsigned long expires, now, timeo; 777 769 ··· 844 832 void rpc_exit_task(struct rpc_task *task) 845 833 { 846 834 task->tk_action = NULL; 835 + if (task->tk_ops->rpc_count_stats) 836 + task->tk_ops->rpc_count_stats(task, task->tk_calldata); 837 + else if (task->tk_client) 838 + rpc_count_iostats(task, task->tk_client->cl_metrics); 847 839 if (task->tk_ops->rpc_call_done != NULL) { 848 840 task->tk_ops->rpc_call_done(task, task->tk_calldata); 849 841 if (task->tk_action != NULL) { ··· 943 927 * rpc_task pointer may still be dereferenced. 944 928 */ 945 929 queue = task->tk_waitqueue; 946 - spin_lock_bh(&queue->lock); 930 + spin_lock(&queue->lock); 947 931 if (!RPC_IS_QUEUED(task)) { 948 - spin_unlock_bh(&queue->lock); 932 + spin_unlock(&queue->lock); 949 933 continue; 950 934 } 951 935 rpc_clear_running(task); 952 - spin_unlock_bh(&queue->lock); 936 + spin_unlock(&queue->lock); 953 937 if (task_is_async) 954 938 return; 955 939 ··· 1092 1076 /* Initialize workqueue for async tasks */ 1093 1077 task->tk_workqueue = task_setup_data->workqueue; 1094 1078 1095 - task->tk_xprt = xprt_get(task_setup_data->rpc_xprt); 1079 + task->tk_xprt = rpc_task_get_xprt(task_setup_data->rpc_client, 1080 + xprt_get(task_setup_data->rpc_xprt)); 1096 1081 1097 1082 task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred); 1098 1083
+15 -8
net/sunrpc/stats.c
··· 177 177 178 178 execute = ktime_sub(now, task->tk_start); 179 179 op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); 180 + if (task->tk_status < 0) 181 + op_metrics->om_error_status++; 180 182 181 183 spin_unlock(&op_metrics->om_lock); 182 184 ··· 221 219 a->om_queue = ktime_add(a->om_queue, b->om_queue); 222 220 a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); 223 221 a->om_execute = ktime_add(a->om_execute, b->om_execute); 222 + a->om_error_status += b->om_error_status; 224 223 } 225 224 226 225 static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, 227 226 int op, const struct rpc_procinfo *procs) 228 227 { 229 228 _print_name(seq, op, procs); 230 - seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n", 229 + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n", 231 230 stats->om_ops, 232 231 stats->om_ntrans, 233 232 stats->om_timeouts, ··· 236 233 stats->om_bytes_recv, 237 234 ktime_to_ms(stats->om_queue), 238 235 ktime_to_ms(stats->om_rtt), 239 - ktime_to_ms(stats->om_execute)); 236 + ktime_to_ms(stats->om_execute), 237 + stats->om_error_status); 238 + } 239 + 240 + static int do_print_stats(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *seqv) 241 + { 242 + struct seq_file *seq = seqv; 243 + 244 + xprt->ops->print_stats(xprt, seq); 245 + return 0; 240 246 } 241 247 242 248 void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) 243 249 { 244 - struct rpc_xprt *xprt; 245 250 unsigned int op, maxproc = clnt->cl_maxproc; 246 251 247 252 if (!clnt->cl_metrics) ··· 259 248 seq_printf(seq, "p/v: %u/%u (%s)\n", 260 249 clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name); 261 250 262 - rcu_read_lock(); 263 - xprt = rcu_dereference(clnt->cl_xprt); 264 - if (xprt) 265 - xprt->ops->print_stats(xprt, seq); 266 - rcu_read_unlock(); 251 + rpc_clnt_iterate_for_each_xprt(clnt, do_print_stats, seq); 267 252 268 253 seq_printf(seq, "\tper-op statistics\n"); 269 254 for (op = 0; op < maxproc; op++) {
+1 -1
net/sunrpc/svc.c
··· 1595 1595 /* Parse and execute the bc call */ 1596 1596 proc_error = svc_process_common(rqstp, argv, resv); 1597 1597 1598 - atomic_inc(&req->rq_xprt->bc_free_slots); 1598 + atomic_dec(&req->rq_xprt->bc_slot_count); 1599 1599 if (!proc_error) { 1600 1600 /* Processing error: drop the request */ 1601 1601 xprt_free_bc_request(req);
+63 -38
net/sunrpc/xprt.c
··· 302 302 303 303 if (test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == task) 304 304 return 1; 305 - spin_lock_bh(&xprt->transport_lock); 305 + spin_lock(&xprt->transport_lock); 306 306 retval = xprt->ops->reserve_xprt(xprt, task); 307 - spin_unlock_bh(&xprt->transport_lock); 307 + spin_unlock(&xprt->transport_lock); 308 308 return retval; 309 309 } 310 310 ··· 381 381 { 382 382 if (xprt->snd_task != task) 383 383 return; 384 - spin_lock_bh(&xprt->transport_lock); 384 + spin_lock(&xprt->transport_lock); 385 385 xprt->ops->release_xprt(xprt, task); 386 - spin_unlock_bh(&xprt->transport_lock); 386 + spin_unlock(&xprt->transport_lock); 387 387 } 388 388 389 389 /* ··· 435 435 436 436 if (req->rq_cong) 437 437 return true; 438 - spin_lock_bh(&xprt->transport_lock); 438 + spin_lock(&xprt->transport_lock); 439 439 ret = __xprt_get_cong(xprt, req) != 0; 440 - spin_unlock_bh(&xprt->transport_lock); 440 + spin_unlock(&xprt->transport_lock); 441 441 return ret; 442 442 } 443 443 EXPORT_SYMBOL_GPL(xprt_request_get_cong); ··· 464 464 xprt_clear_congestion_window_wait(struct rpc_xprt *xprt) 465 465 { 466 466 if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) { 467 - spin_lock_bh(&xprt->transport_lock); 467 + spin_lock(&xprt->transport_lock); 468 468 __xprt_lock_write_next_cong(xprt); 469 - spin_unlock_bh(&xprt->transport_lock); 469 + spin_unlock(&xprt->transport_lock); 470 470 } 471 471 } 472 472 ··· 563 563 564 564 if (!test_bit(XPRT_WRITE_SPACE, &xprt->state)) 565 565 return false; 566 - spin_lock_bh(&xprt->transport_lock); 566 + spin_lock(&xprt->transport_lock); 567 567 ret = xprt_clear_write_space_locked(xprt); 568 - spin_unlock_bh(&xprt->transport_lock); 568 + spin_unlock(&xprt->transport_lock); 569 569 return ret; 570 570 } 571 571 EXPORT_SYMBOL_GPL(xprt_write_space); ··· 634 634 req->rq_retries = 0; 635 635 xprt_reset_majortimeo(req); 636 636 /* Reset the RTT counters == "slow start" */ 637 - spin_lock_bh(&xprt->transport_lock); 637 + spin_lock(&xprt->transport_lock); 638 638 rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); 639 - spin_unlock_bh(&xprt->transport_lock); 639 + spin_unlock(&xprt->transport_lock); 640 640 status = -ETIMEDOUT; 641 641 } 642 642 ··· 668 668 void xprt_disconnect_done(struct rpc_xprt *xprt) 669 669 { 670 670 dprintk("RPC: disconnected transport %p\n", xprt); 671 - spin_lock_bh(&xprt->transport_lock); 671 + spin_lock(&xprt->transport_lock); 672 672 xprt_clear_connected(xprt); 673 673 xprt_clear_write_space_locked(xprt); 674 674 xprt_wake_pending_tasks(xprt, -ENOTCONN); 675 - spin_unlock_bh(&xprt->transport_lock); 675 + spin_unlock(&xprt->transport_lock); 676 676 } 677 677 EXPORT_SYMBOL_GPL(xprt_disconnect_done); 678 678 ··· 684 684 void xprt_force_disconnect(struct rpc_xprt *xprt) 685 685 { 686 686 /* Don't race with the test_bit() in xprt_clear_locked() */ 687 - spin_lock_bh(&xprt->transport_lock); 687 + spin_lock(&xprt->transport_lock); 688 688 set_bit(XPRT_CLOSE_WAIT, &xprt->state); 689 689 /* Try to schedule an autoclose RPC call */ 690 690 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) ··· 692 692 else if (xprt->snd_task) 693 693 rpc_wake_up_queued_task_set_status(&xprt->pending, 694 694 xprt->snd_task, -ENOTCONN); 695 - spin_unlock_bh(&xprt->transport_lock); 695 + spin_unlock(&xprt->transport_lock); 696 696 } 697 697 EXPORT_SYMBOL_GPL(xprt_force_disconnect); 698 698 ··· 726 726 void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) 727 727 { 728 728 /* Don't race with the test_bit() in xprt_clear_locked() */ 729 - spin_lock_bh(&xprt->transport_lock); 729 + spin_lock(&xprt->transport_lock); 730 730 if (cookie != xprt->connect_cookie) 731 731 goto out; 732 732 if (test_bit(XPRT_CLOSING, &xprt->state)) ··· 737 737 queue_work(xprtiod_workqueue, &xprt->task_cleanup); 738 738 xprt_wake_pending_tasks(xprt, -EAGAIN); 739 739 out: 740 - spin_unlock_bh(&xprt->transport_lock); 740 + spin_unlock(&xprt->transport_lock); 741 741 } 742 742 743 743 static bool ··· 750 750 xprt_schedule_autodisconnect(struct rpc_xprt *xprt) 751 751 __must_hold(&xprt->transport_lock) 752 752 { 753 + xprt->last_used = jiffies; 753 754 if (RB_EMPTY_ROOT(&xprt->recv_queue) && xprt_has_timer(xprt)) 754 755 mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); 755 756 } ··· 760 759 { 761 760 struct rpc_xprt *xprt = from_timer(xprt, t, timer); 762 761 763 - spin_lock(&xprt->transport_lock); 764 762 if (!RB_EMPTY_ROOT(&xprt->recv_queue)) 765 - goto out_abort; 763 + return; 766 764 /* Reset xprt->last_used to avoid connect/autodisconnect cycling */ 767 765 xprt->last_used = jiffies; 768 766 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 769 - goto out_abort; 770 - spin_unlock(&xprt->transport_lock); 767 + return; 771 768 queue_work(xprtiod_workqueue, &xprt->task_cleanup); 772 - return; 773 - out_abort: 774 - spin_unlock(&xprt->transport_lock); 775 769 } 776 770 777 771 bool xprt_lock_connect(struct rpc_xprt *xprt, ··· 775 779 { 776 780 bool ret = false; 777 781 778 - spin_lock_bh(&xprt->transport_lock); 782 + spin_lock(&xprt->transport_lock); 779 783 if (!test_bit(XPRT_LOCKED, &xprt->state)) 780 784 goto out; 781 785 if (xprt->snd_task != task) ··· 783 787 xprt->snd_task = cookie; 784 788 ret = true; 785 789 out: 786 - spin_unlock_bh(&xprt->transport_lock); 790 + spin_unlock(&xprt->transport_lock); 787 791 return ret; 788 792 } 789 793 790 794 void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) 791 795 { 792 - spin_lock_bh(&xprt->transport_lock); 796 + spin_lock(&xprt->transport_lock); 793 797 if (xprt->snd_task != cookie) 794 798 goto out; 795 799 if (!test_bit(XPRT_LOCKED, &xprt->state)) ··· 798 802 xprt->ops->release_xprt(xprt, NULL); 799 803 xprt_schedule_autodisconnect(xprt); 800 804 out: 801 - spin_unlock_bh(&xprt->transport_lock); 805 + spin_unlock(&xprt->transport_lock); 802 806 wake_up_bit(&xprt->state, XPRT_LOCKED); 803 807 } 804 808 ··· 845 849 } 846 850 xprt_release_write(xprt, task); 847 851 } 852 + 853 + /** 854 + * xprt_reconnect_delay - compute the wait before scheduling a connect 855 + * @xprt: transport instance 856 + * 857 + */ 858 + unsigned long xprt_reconnect_delay(const struct rpc_xprt *xprt) 859 + { 860 + unsigned long start, now = jiffies; 861 + 862 + start = xprt->stat.connect_start + xprt->reestablish_timeout; 863 + if (time_after(start, now)) 864 + return start - now; 865 + return 0; 866 + } 867 + EXPORT_SYMBOL_GPL(xprt_reconnect_delay); 868 + 869 + /** 870 + * xprt_reconnect_backoff - compute the new re-establish timeout 871 + * @xprt: transport instance 872 + * @init_to: initial reestablish timeout 873 + * 874 + */ 875 + void xprt_reconnect_backoff(struct rpc_xprt *xprt, unsigned long init_to) 876 + { 877 + xprt->reestablish_timeout <<= 1; 878 + if (xprt->reestablish_timeout > xprt->max_reconnect_timeout) 879 + xprt->reestablish_timeout = xprt->max_reconnect_timeout; 880 + if (xprt->reestablish_timeout < init_to) 881 + xprt->reestablish_timeout = init_to; 882 + } 883 + EXPORT_SYMBOL_GPL(xprt_reconnect_backoff); 848 884 849 885 enum xprt_xid_rb_cmp { 850 886 XID_RB_EQUAL, ··· 1041 1013 1042 1014 if (!xprt_request_need_enqueue_receive(task, req)) 1043 1015 return; 1016 + 1017 + xprt_request_prepare(task->tk_rqstp); 1044 1018 spin_lock(&xprt->queue_lock); 1045 1019 1046 1020 /* Update the softirq receive buffer */ ··· 1442 1412 xprt_inject_disconnect(xprt); 1443 1413 1444 1414 task->tk_flags |= RPC_TASK_SENT; 1445 - spin_lock_bh(&xprt->transport_lock); 1415 + spin_lock(&xprt->transport_lock); 1446 1416 1447 1417 xprt->stat.sends++; 1448 1418 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; 1449 1419 xprt->stat.bklog_u += xprt->backlog.qlen; 1450 1420 xprt->stat.sending_u += xprt->sending.qlen; 1451 1421 xprt->stat.pending_u += xprt->pending.qlen; 1452 - spin_unlock_bh(&xprt->transport_lock); 1422 + spin_unlock(&xprt->transport_lock); 1453 1423 1454 1424 req->rq_connect_cookie = connect_cookie; 1455 1425 out_dequeue: ··· 1795 1765 } 1796 1766 1797 1767 xprt = req->rq_xprt; 1798 - if (task->tk_ops->rpc_count_stats != NULL) 1799 - task->tk_ops->rpc_count_stats(task, task->tk_calldata); 1800 - else if (task->tk_client) 1801 - rpc_count_iostats(task, task->tk_client->cl_metrics); 1802 1768 xprt_request_dequeue_all(task, req); 1803 - spin_lock_bh(&xprt->transport_lock); 1769 + spin_lock(&xprt->transport_lock); 1804 1770 xprt->ops->release_xprt(xprt, task); 1805 1771 if (xprt->ops->release_request) 1806 1772 xprt->ops->release_request(task); 1807 - xprt->last_used = jiffies; 1808 1773 xprt_schedule_autodisconnect(xprt); 1809 - spin_unlock_bh(&xprt->transport_lock); 1774 + spin_unlock(&xprt->transport_lock); 1810 1775 if (req->rq_buffer) 1811 1776 xprt->ops->buf_free(task); 1812 1777 xprt_inject_disconnect(xprt);
+67 -22
net/sunrpc/xprtmultipath.c
··· 19 19 #include <linux/sunrpc/addr.h> 20 20 #include <linux/sunrpc/xprtmultipath.h> 21 21 22 - typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head, 22 + typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps, 23 23 const struct rpc_xprt *cur); 24 24 25 25 static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular; ··· 36 36 if (xps->xps_nxprts == 0) 37 37 xps->xps_net = xprt->xprt_net; 38 38 xps->xps_nxprts++; 39 + xps->xps_nactive++; 39 40 } 40 41 41 42 /** ··· 52 51 if (xprt == NULL) 53 52 return; 54 53 spin_lock(&xps->xps_lock); 55 - if ((xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) && 56 - !rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr)) 54 + if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) 57 55 xprt_switch_add_xprt_locked(xps, xprt); 58 56 spin_unlock(&xps->xps_lock); 59 57 } ··· 62 62 { 63 63 if (unlikely(xprt == NULL)) 64 64 return; 65 + xps->xps_nactive--; 65 66 xps->xps_nxprts--; 66 67 if (xps->xps_nxprts == 0) 67 68 xps->xps_net = NULL; ··· 103 102 if (xps != NULL) { 104 103 spin_lock_init(&xps->xps_lock); 105 104 kref_init(&xps->xps_kref); 106 - xps->xps_nxprts = 0; 105 + xps->xps_nxprts = xps->xps_nactive = 0; 106 + atomic_long_set(&xps->xps_queuelen, 0); 107 + xps->xps_net = NULL; 107 108 INIT_LIST_HEAD(&xps->xps_xprt_list); 108 109 xps->xps_iter_ops = &rpc_xprt_iter_singular; 109 110 xprt_switch_add_xprt_locked(xps, xprt); ··· 196 193 } 197 194 198 195 static 196 + bool xprt_is_active(const struct rpc_xprt *xprt) 197 + { 198 + return kref_read(&xprt->kref) != 0; 199 + } 200 + 201 + static 199 202 struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head) 200 203 { 201 - return list_first_or_null_rcu(head, struct rpc_xprt, xprt_switch); 204 + struct rpc_xprt *pos; 205 + 206 + list_for_each_entry_rcu(pos, head, xprt_switch) { 207 + if (xprt_is_active(pos)) 208 + return pos; 209 + } 210 + return NULL; 202 211 } 203 212 204 213 static ··· 228 213 const struct rpc_xprt *cur) 229 214 { 230 215 struct rpc_xprt *pos; 216 + bool found = false; 231 217 232 218 list_for_each_entry_rcu(pos, head, xprt_switch) { 233 219 if (cur == pos) 220 + found = true; 221 + if (found && xprt_is_active(pos)) 234 222 return pos; 235 223 } 236 224 return NULL; ··· 278 260 const struct rpc_xprt *cur) 279 261 { 280 262 struct rpc_xprt *pos, *prev = NULL; 263 + bool found = false; 281 264 282 265 list_for_each_entry_rcu(pos, head, xprt_switch) { 283 266 if (cur == prev) 267 + found = true; 268 + if (found && xprt_is_active(pos)) 284 269 return pos; 285 270 prev = pos; 286 271 } ··· 291 270 } 292 271 293 272 static 294 - struct rpc_xprt *xprt_switch_set_next_cursor(struct list_head *head, 273 + struct rpc_xprt *xprt_switch_set_next_cursor(struct rpc_xprt_switch *xps, 295 274 struct rpc_xprt **cursor, 296 275 xprt_switch_find_xprt_t find_next) 297 276 { 298 - struct rpc_xprt *cur, *pos, *old; 277 + struct rpc_xprt *pos, *old; 299 278 300 - cur = READ_ONCE(*cursor); 301 - for (;;) { 302 - old = cur; 303 - pos = find_next(head, old); 304 - if (pos == NULL) 305 - break; 306 - cur = cmpxchg_relaxed(cursor, old, pos); 307 - if (cur == old) 308 - break; 309 - } 279 + old = smp_load_acquire(cursor); 280 + pos = find_next(xps, old); 281 + smp_store_release(cursor, pos); 310 282 return pos; 311 283 } 312 284 ··· 311 297 312 298 if (xps == NULL) 313 299 return NULL; 314 - return xprt_switch_set_next_cursor(&xps->xps_xprt_list, 315 - &xpi->xpi_cursor, 316 - find_next); 300 + return xprt_switch_set_next_cursor(xps, &xpi->xpi_cursor, find_next); 317 301 } 318 302 319 303 static 320 - struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head, 304 + struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head, 321 305 const struct rpc_xprt *cur) 322 306 { 323 307 struct rpc_xprt *ret; ··· 327 315 } 328 316 329 317 static 318 + struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct rpc_xprt_switch *xps, 319 + const struct rpc_xprt *cur) 320 + { 321 + struct list_head *head = &xps->xps_xprt_list; 322 + struct rpc_xprt *xprt; 323 + unsigned int nactive; 324 + 325 + for (;;) { 326 + unsigned long xprt_queuelen, xps_queuelen; 327 + 328 + xprt = __xprt_switch_find_next_entry_roundrobin(head, cur); 329 + if (!xprt) 330 + break; 331 + xprt_queuelen = atomic_long_read(&xprt->queuelen); 332 + xps_queuelen = atomic_long_read(&xps->xps_queuelen); 333 + nactive = READ_ONCE(xps->xps_nactive); 334 + /* Exit loop if xprt_queuelen <= average queue length */ 335 + if (xprt_queuelen * nactive <= xps_queuelen) 336 + break; 337 + cur = xprt; 338 + } 339 + return xprt; 340 + } 341 + 342 + static 330 343 struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi) 331 344 { 332 345 return xprt_iter_next_entry_multiple(xpi, ··· 359 322 } 360 323 361 324 static 325 + struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps, 326 + const struct rpc_xprt *cur) 327 + { 328 + return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur); 329 + } 330 + 331 + static 362 332 struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi) 363 333 { 364 - return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry); 334 + return xprt_iter_next_entry_multiple(xpi, 335 + xprt_switch_find_next_entry_all); 365 336 } 366 337 367 338 /*
+7
net/sunrpc/xprtrdma/backchannel.c
··· 52 52 return maxmsg - RPCRDMA_HDRLEN_MIN; 53 53 } 54 54 55 + unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *xprt) 56 + { 57 + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 58 + 59 + return r_xprt->rx_buf.rb_bc_srv_max_requests; 60 + } 61 + 55 62 static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) 56 63 { 57 64 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+218 -113
net/sunrpc/xprtrdma/frwr_ops.c
··· 144 144 frwr_release_mr(mr); 145 145 } 146 146 147 + /* frwr_reset - Place MRs back on the free list 148 + * @req: request to reset 149 + * 150 + * Used after a failed marshal. For FRWR, this means the MRs 151 + * don't have to be fully released and recreated. 152 + * 153 + * NB: This is safe only as long as none of @req's MRs are 154 + * involved with an ongoing asynchronous FAST_REG or LOCAL_INV 155 + * Work Request. 156 + */ 157 + void frwr_reset(struct rpcrdma_req *req) 158 + { 159 + while (!list_empty(&req->rl_registered)) { 160 + struct rpcrdma_mr *mr; 161 + 162 + mr = rpcrdma_mr_pop(&req->rl_registered); 163 + rpcrdma_mr_unmap_and_put(mr); 164 + } 165 + } 166 + 147 167 /** 148 168 * frwr_init_mr - Initialize one MR 149 169 * @ia: interface adapter ··· 188 168 goto out_list_err; 189 169 190 170 mr->frwr.fr_mr = frmr; 191 - mr->frwr.fr_state = FRWR_IS_INVALID; 192 171 mr->mr_dir = DMA_NONE; 193 172 INIT_LIST_HEAD(&mr->mr_list); 194 173 INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); ··· 317 298 } 318 299 319 300 /** 320 - * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC 321 - * @cq: completion queue (ignored) 322 - * @wc: completed WR 323 - * 324 - */ 325 - static void 326 - frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) 327 - { 328 - struct ib_cqe *cqe = wc->wr_cqe; 329 - struct rpcrdma_frwr *frwr = 330 - container_of(cqe, struct rpcrdma_frwr, fr_cqe); 331 - 332 - /* WARNING: Only wr_cqe and status are reliable at this point */ 333 - if (wc->status != IB_WC_SUCCESS) 334 - frwr->fr_state = FRWR_FLUSHED_FR; 335 - trace_xprtrdma_wc_fastreg(wc, frwr); 336 - } 337 - 338 - /** 339 - * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC 340 - * @cq: completion queue (ignored) 341 - * @wc: completed WR 342 - * 343 - */ 344 - static void 345 - frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) 346 - { 347 - struct ib_cqe *cqe = wc->wr_cqe; 348 - struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, 349 - fr_cqe); 350 - 351 - /* WARNING: Only wr_cqe and status are reliable at this point */ 352 - if (wc->status != IB_WC_SUCCESS) 353 - frwr->fr_state = FRWR_FLUSHED_LI; 354 - trace_xprtrdma_wc_li(wc, frwr); 355 - } 356 - 357 - /** 358 - * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC 359 - * @cq: completion queue (ignored) 360 - * @wc: completed WR 361 - * 362 - * Awaken anyone waiting for an MR to finish being fenced. 363 - */ 364 - static void 365 - frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) 366 - { 367 - struct ib_cqe *cqe = wc->wr_cqe; 368 - struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, 369 - fr_cqe); 370 - 371 - /* WARNING: Only wr_cqe and status are reliable at this point */ 372 - if (wc->status != IB_WC_SUCCESS) 373 - frwr->fr_state = FRWR_FLUSHED_LI; 374 - trace_xprtrdma_wc_li_wake(wc, frwr); 375 - complete(&frwr->fr_linv_done); 376 - } 377 - 378 - /** 379 301 * frwr_map - Register a memory region 380 302 * @r_xprt: controlling transport 381 303 * @seg: memory region co-ordinates ··· 338 378 { 339 379 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 340 380 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; 341 - struct rpcrdma_frwr *frwr; 342 381 struct rpcrdma_mr *mr; 343 382 struct ib_mr *ibmr; 344 383 struct ib_reg_wr *reg_wr; 345 384 int i, n; 346 385 u8 key; 347 386 348 - mr = NULL; 349 - do { 350 - if (mr) 351 - rpcrdma_mr_recycle(mr); 352 - mr = rpcrdma_mr_get(r_xprt); 353 - if (!mr) 354 - return ERR_PTR(-EAGAIN); 355 - } while (mr->frwr.fr_state != FRWR_IS_INVALID); 356 - frwr = &mr->frwr; 357 - frwr->fr_state = FRWR_IS_VALID; 387 + mr = rpcrdma_mr_get(r_xprt); 388 + if (!mr) 389 + goto out_getmr_err; 358 390 359 391 if (nsegs > ia->ri_max_frwr_depth) 360 392 nsegs = ia->ri_max_frwr_depth; ··· 375 423 if (!mr->mr_nents) 376 424 goto out_dmamap_err; 377 425 378 - ibmr = frwr->fr_mr; 426 + ibmr = mr->frwr.fr_mr; 379 427 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); 380 428 if (unlikely(n != mr->mr_nents)) 381 429 goto out_mapmr_err; ··· 385 433 key = (u8)(ibmr->rkey & 0x000000FF); 386 434 ib_update_fast_reg_key(ibmr, ++key); 387 435 388 - reg_wr = &frwr->fr_regwr; 436 + reg_wr = &mr->frwr.fr_regwr; 389 437 reg_wr->mr = ibmr; 390 438 reg_wr->key = ibmr->rkey; 391 439 reg_wr->access = writing ? ··· 400 448 *out = mr; 401 449 return seg; 402 450 451 + out_getmr_err: 452 + xprt_wait_for_buffer_space(&r_xprt->rx_xprt); 453 + return ERR_PTR(-EAGAIN); 454 + 403 455 out_dmamap_err: 404 456 mr->mr_dir = DMA_NONE; 405 457 trace_xprtrdma_frwr_sgerr(mr, i); ··· 414 458 trace_xprtrdma_frwr_maperr(mr, n); 415 459 rpcrdma_mr_recycle(mr); 416 460 return ERR_PTR(-EIO); 461 + } 462 + 463 + /** 464 + * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC 465 + * @cq: completion queue (ignored) 466 + * @wc: completed WR 467 + * 468 + */ 469 + static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) 470 + { 471 + struct ib_cqe *cqe = wc->wr_cqe; 472 + struct rpcrdma_frwr *frwr = 473 + container_of(cqe, struct rpcrdma_frwr, fr_cqe); 474 + 475 + /* WARNING: Only wr_cqe and status are reliable at this point */ 476 + trace_xprtrdma_wc_fastreg(wc, frwr); 477 + /* The MR will get recycled when the associated req is retransmitted */ 417 478 } 418 479 419 480 /** ··· 485 512 if (mr->mr_handle == rep->rr_inv_rkey) { 486 513 list_del_init(&mr->mr_list); 487 514 trace_xprtrdma_mr_remoteinv(mr); 488 - mr->frwr.fr_state = FRWR_IS_INVALID; 489 515 rpcrdma_mr_unmap_and_put(mr); 490 516 break; /* only one invalidated MR per RPC */ 491 517 } 492 518 } 493 519 520 + static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr) 521 + { 522 + if (wc->status != IB_WC_SUCCESS) 523 + rpcrdma_mr_recycle(mr); 524 + else 525 + rpcrdma_mr_unmap_and_put(mr); 526 + } 527 + 528 + /** 529 + * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC 530 + * @cq: completion queue (ignored) 531 + * @wc: completed WR 532 + * 533 + */ 534 + static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) 535 + { 536 + struct ib_cqe *cqe = wc->wr_cqe; 537 + struct rpcrdma_frwr *frwr = 538 + container_of(cqe, struct rpcrdma_frwr, fr_cqe); 539 + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); 540 + 541 + /* WARNING: Only wr_cqe and status are reliable at this point */ 542 + trace_xprtrdma_wc_li(wc, frwr); 543 + __frwr_release_mr(wc, mr); 544 + } 545 + 546 + /** 547 + * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC 548 + * @cq: completion queue (ignored) 549 + * @wc: completed WR 550 + * 551 + * Awaken anyone waiting for an MR to finish being fenced. 552 + */ 553 + static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) 554 + { 555 + struct ib_cqe *cqe = wc->wr_cqe; 556 + struct rpcrdma_frwr *frwr = 557 + container_of(cqe, struct rpcrdma_frwr, fr_cqe); 558 + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); 559 + 560 + /* WARNING: Only wr_cqe and status are reliable at this point */ 561 + trace_xprtrdma_wc_li_wake(wc, frwr); 562 + complete(&frwr->fr_linv_done); 563 + __frwr_release_mr(wc, mr); 564 + } 565 + 494 566 /** 495 567 * frwr_unmap_sync - invalidate memory regions that were registered for @req 496 - * @r_xprt: controlling transport 497 - * @mrs: list of MRs to process 568 + * @r_xprt: controlling transport instance 569 + * @req: rpcrdma_req with a non-empty list of MRs to process 498 570 * 499 - * Sleeps until it is safe for the host CPU to access the 500 - * previously mapped memory regions. 501 - * 502 - * Caller ensures that @mrs is not empty before the call. This 503 - * function empties the list. 571 + * Sleeps until it is safe for the host CPU to access the previously mapped 572 + * memory regions. This guarantees that registered MRs are properly fenced 573 + * from the server before the RPC consumer accesses the data in them. It 574 + * also ensures proper Send flow control: waking the next RPC waits until 575 + * this RPC has relinquished all its Send Queue entries. 504 576 */ 505 - void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) 577 + void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 506 578 { 507 579 struct ib_send_wr *first, **prev, *last; 508 580 const struct ib_send_wr *bad_wr; 509 - struct rpcrdma_ia *ia = &r_xprt->rx_ia; 510 581 struct rpcrdma_frwr *frwr; 511 582 struct rpcrdma_mr *mr; 512 - int count, rc; 583 + int rc; 513 584 514 585 /* ORDER: Invalidate all of the MRs first 515 586 * ··· 561 544 * a single ib_post_send() call. 562 545 */ 563 546 frwr = NULL; 564 - count = 0; 565 547 prev = &first; 566 - list_for_each_entry(mr, mrs, mr_list) { 567 - mr->frwr.fr_state = FRWR_IS_INVALID; 548 + while (!list_empty(&req->rl_registered)) { 549 + mr = rpcrdma_mr_pop(&req->rl_registered); 550 + 551 + trace_xprtrdma_mr_localinv(mr); 552 + r_xprt->rx_stats.local_inv_needed++; 568 553 569 554 frwr = &mr->frwr; 570 - trace_xprtrdma_mr_localinv(mr); 571 - 572 555 frwr->fr_cqe.done = frwr_wc_localinv; 573 556 last = &frwr->fr_invwr; 574 - memset(last, 0, sizeof(*last)); 557 + last->next = NULL; 575 558 last->wr_cqe = &frwr->fr_cqe; 559 + last->sg_list = NULL; 560 + last->num_sge = 0; 576 561 last->opcode = IB_WR_LOCAL_INV; 562 + last->send_flags = IB_SEND_SIGNALED; 577 563 last->ex.invalidate_rkey = mr->mr_handle; 578 - count++; 579 564 580 565 *prev = last; 581 566 prev = &last->next; 582 567 } 583 - if (!frwr) 584 - goto unmap; 585 568 586 569 /* Strong send queue ordering guarantees that when the 587 570 * last WR in the chain completes, all WRs in the chain 588 571 * are complete. 589 572 */ 590 - last->send_flags = IB_SEND_SIGNALED; 591 573 frwr->fr_cqe.done = frwr_wc_localinv_wake; 592 574 reinit_completion(&frwr->fr_linv_done); 593 575 ··· 594 578 * replaces the QP. The RPC reply handler won't call us 595 579 * unless ri_id->qp is a valid pointer. 596 580 */ 597 - r_xprt->rx_stats.local_inv_needed++; 598 581 bad_wr = NULL; 599 - rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); 582 + rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); 583 + trace_xprtrdma_post_send(req, rc); 584 + 585 + /* The final LOCAL_INV WR in the chain is supposed to 586 + * do the wake. If it was never posted, the wake will 587 + * not happen, so don't wait in that case. 588 + */ 600 589 if (bad_wr != first) 601 590 wait_for_completion(&frwr->fr_linv_done); 602 - if (rc) 603 - goto out_release; 591 + if (!rc) 592 + return; 604 593 605 - /* ORDER: Now DMA unmap all of the MRs, and return 606 - * them to the free MR list. 607 - */ 608 - unmap: 609 - while (!list_empty(mrs)) { 610 - mr = rpcrdma_mr_pop(mrs); 611 - rpcrdma_mr_unmap_and_put(mr); 612 - } 613 - return; 614 - 615 - out_release: 616 - pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc); 617 - 618 - /* Unmap and release the MRs in the LOCAL_INV WRs that did not 619 - * get posted. 594 + /* Recycle MRs in the LOCAL_INV chain that did not get posted. 620 595 */ 621 596 while (bad_wr) { 622 597 frwr = container_of(bad_wr, struct rpcrdma_frwr, ··· 618 611 list_del_init(&mr->mr_list); 619 612 rpcrdma_mr_recycle(mr); 620 613 } 614 + } 615 + 616 + /** 617 + * frwr_wc_localinv_done - Invoked by RDMA provider for a signaled LOCAL_INV WC 618 + * @cq: completion queue (ignored) 619 + * @wc: completed WR 620 + * 621 + */ 622 + static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc) 623 + { 624 + struct ib_cqe *cqe = wc->wr_cqe; 625 + struct rpcrdma_frwr *frwr = 626 + container_of(cqe, struct rpcrdma_frwr, fr_cqe); 627 + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); 628 + 629 + /* WARNING: Only wr_cqe and status are reliable at this point */ 630 + trace_xprtrdma_wc_li_done(wc, frwr); 631 + rpcrdma_complete_rqst(frwr->fr_req->rl_reply); 632 + __frwr_release_mr(wc, mr); 633 + } 634 + 635 + /** 636 + * frwr_unmap_async - invalidate memory regions that were registered for @req 637 + * @r_xprt: controlling transport instance 638 + * @req: rpcrdma_req with a non-empty list of MRs to process 639 + * 640 + * This guarantees that registered MRs are properly fenced from the 641 + * server before the RPC consumer accesses the data in them. It also 642 + * ensures proper Send flow control: waking the next RPC waits until 643 + * this RPC has relinquished all its Send Queue entries. 644 + */ 645 + void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 646 + { 647 + struct ib_send_wr *first, *last, **prev; 648 + const struct ib_send_wr *bad_wr; 649 + struct rpcrdma_frwr *frwr; 650 + struct rpcrdma_mr *mr; 651 + int rc; 652 + 653 + /* Chain the LOCAL_INV Work Requests and post them with 654 + * a single ib_post_send() call. 655 + */ 656 + frwr = NULL; 657 + prev = &first; 658 + while (!list_empty(&req->rl_registered)) { 659 + mr = rpcrdma_mr_pop(&req->rl_registered); 660 + 661 + trace_xprtrdma_mr_localinv(mr); 662 + r_xprt->rx_stats.local_inv_needed++; 663 + 664 + frwr = &mr->frwr; 665 + frwr->fr_cqe.done = frwr_wc_localinv; 666 + frwr->fr_req = req; 667 + last = &frwr->fr_invwr; 668 + last->next = NULL; 669 + last->wr_cqe = &frwr->fr_cqe; 670 + last->sg_list = NULL; 671 + last->num_sge = 0; 672 + last->opcode = IB_WR_LOCAL_INV; 673 + last->send_flags = IB_SEND_SIGNALED; 674 + last->ex.invalidate_rkey = mr->mr_handle; 675 + 676 + *prev = last; 677 + prev = &last->next; 678 + } 679 + 680 + /* Strong send queue ordering guarantees that when the 681 + * last WR in the chain completes, all WRs in the chain 682 + * are complete. The last completion will wake up the 683 + * RPC waiter. 684 + */ 685 + frwr->fr_cqe.done = frwr_wc_localinv_done; 686 + 687 + /* Transport disconnect drains the receive CQ before it 688 + * replaces the QP. The RPC reply handler won't call us 689 + * unless ri_id->qp is a valid pointer. 690 + */ 691 + bad_wr = NULL; 692 + rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); 693 + trace_xprtrdma_post_send(req, rc); 694 + if (!rc) 695 + return; 696 + 697 + /* Recycle MRs in the LOCAL_INV chain that did not get posted. 698 + */ 699 + while (bad_wr) { 700 + frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr); 701 + mr = container_of(frwr, struct rpcrdma_mr, frwr); 702 + bad_wr = bad_wr->next; 703 + 704 + rpcrdma_mr_recycle(mr); 705 + } 706 + 707 + /* The final LOCAL_INV WR in the chain is supposed to 708 + * do the wake. If it was never posted, the wake will 709 + * not happen, so wake here in that case. 710 + */ 711 + rpcrdma_complete_rqst(req->rl_reply); 621 712 }
+69 -91
net/sunrpc/xprtrdma/rpc_rdma.c
··· 366 366 unsigned int pos; 367 367 int nsegs; 368 368 369 + if (rtype == rpcrdma_noch) 370 + goto done; 371 + 369 372 pos = rqst->rq_snd_buf.head[0].iov_len; 370 373 if (rtype == rpcrdma_areadch) 371 374 pos = 0; ··· 392 389 nsegs -= mr->mr_nents; 393 390 } while (nsegs); 394 391 395 - return 0; 392 + done: 393 + return encode_item_not_present(xdr); 396 394 } 397 395 398 396 /* Register and XDR encode the Write list. Supports encoding a list ··· 420 416 struct rpcrdma_mr *mr; 421 417 int nsegs, nchunks; 422 418 __be32 *segcount; 419 + 420 + if (wtype != rpcrdma_writech) 421 + goto done; 423 422 424 423 seg = req->rl_segments; 425 424 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, ··· 458 451 /* Update count of segments in this Write chunk */ 459 452 *segcount = cpu_to_be32(nchunks); 460 453 461 - return 0; 454 + done: 455 + return encode_item_not_present(xdr); 462 456 } 463 457 464 458 /* Register and XDR encode the Reply chunk. Supports encoding an array ··· 483 475 struct rpcrdma_mr *mr; 484 476 int nsegs, nchunks; 485 477 __be32 *segcount; 478 + 479 + if (wtype != rpcrdma_replych) 480 + return encode_item_not_present(xdr); 486 481 487 482 seg = req->rl_segments; 488 483 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); ··· 522 511 return 0; 523 512 } 524 513 514 + static void rpcrdma_sendctx_done(struct kref *kref) 515 + { 516 + struct rpcrdma_req *req = 517 + container_of(kref, struct rpcrdma_req, rl_kref); 518 + struct rpcrdma_rep *rep = req->rl_reply; 519 + 520 + rpcrdma_complete_rqst(rep); 521 + rep->rr_rxprt->rx_stats.reply_waits_for_send++; 522 + } 523 + 525 524 /** 526 525 * rpcrdma_sendctx_unmap - DMA-unmap Send buffer 527 526 * @sc: sendctx containing SGEs to unmap ··· 540 519 void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc) 541 520 { 542 521 struct ib_sge *sge; 522 + 523 + if (!sc->sc_unmap_count) 524 + return; 543 525 544 526 /* The first two SGEs contain the transport header and 545 527 * the inline buffer. These are always left mapped so ··· 553 529 ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length, 554 530 DMA_TO_DEVICE); 555 531 556 - if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, 557 - &sc->sc_req->rl_flags)) 558 - wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES); 532 + kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done); 559 533 } 560 534 561 535 /* Prepare an SGE for the RPC-over-RDMA transport header. ··· 688 666 out: 689 667 sc->sc_wr.num_sge += sge_no; 690 668 if (sc->sc_unmap_count) 691 - __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags); 669 + kref_get(&req->rl_kref); 692 670 return true; 693 671 694 672 out_regbuf: ··· 721 699 struct rpcrdma_req *req, u32 hdrlen, 722 700 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) 723 701 { 702 + int ret; 703 + 704 + ret = -EAGAIN; 724 705 req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt); 725 706 if (!req->rl_sendctx) 726 - return -EAGAIN; 707 + goto err; 727 708 req->rl_sendctx->sc_wr.num_sge = 0; 728 709 req->rl_sendctx->sc_unmap_count = 0; 729 710 req->rl_sendctx->sc_req = req; 730 - __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags); 711 + kref_init(&req->rl_kref); 731 712 713 + ret = -EIO; 732 714 if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) 733 - return -EIO; 734 - 715 + goto err; 735 716 if (rtype != rpcrdma_areadch) 736 717 if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype)) 737 - return -EIO; 738 - 718 + goto err; 739 719 return 0; 720 + 721 + err: 722 + trace_xprtrdma_prepsend_failed(&req->rl_slot, ret); 723 + return ret; 740 724 } 741 725 742 726 /** ··· 870 842 * send a Call message with a Position Zero Read chunk and a 871 843 * regular Read chunk at the same time. 872 844 */ 873 - if (rtype != rpcrdma_noch) { 874 - ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype); 875 - if (ret) 876 - goto out_err; 877 - } 878 - ret = encode_item_not_present(xdr); 845 + ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype); 846 + if (ret) 847 + goto out_err; 848 + ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype); 849 + if (ret) 850 + goto out_err; 851 + ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype); 879 852 if (ret) 880 853 goto out_err; 881 854 882 - if (wtype == rpcrdma_writech) { 883 - ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype); 884 - if (ret) 885 - goto out_err; 886 - } 887 - ret = encode_item_not_present(xdr); 888 - if (ret) 889 - goto out_err; 890 - 891 - if (wtype != rpcrdma_replych) 892 - ret = encode_item_not_present(xdr); 893 - else 894 - ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype); 895 - if (ret) 896 - goto out_err; 897 - 898 - trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype); 899 - 900 - ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), 855 + ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len, 901 856 &rqst->rq_snd_buf, rtype); 902 857 if (ret) 903 858 goto out_err; 859 + 860 + trace_xprtrdma_marshal(req, rtype, wtype); 904 861 return 0; 905 862 906 863 out_err: 907 864 trace_xprtrdma_marshal_failed(rqst, ret); 908 - switch (ret) { 909 - case -EAGAIN: 910 - xprt_wait_for_buffer_space(rqst->rq_xprt); 911 - break; 912 - case -ENOBUFS: 913 - break; 914 - default: 915 - r_xprt->rx_stats.failed_marshal_count++; 916 - } 865 + r_xprt->rx_stats.failed_marshal_count++; 866 + frwr_reset(req); 917 867 return ret; 918 868 } 919 869 ··· 1275 1269 goto out; 1276 1270 } 1277 1271 1278 - void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 1272 + static void rpcrdma_reply_done(struct kref *kref) 1279 1273 { 1280 - /* Invalidate and unmap the data payloads before waking 1281 - * the waiting application. This guarantees the memory 1282 - * regions are properly fenced from the server before the 1283 - * application accesses the data. It also ensures proper 1284 - * send flow control: waking the next RPC waits until this 1285 - * RPC has relinquished all its Send Queue entries. 1286 - */ 1287 - if (!list_empty(&req->rl_registered)) 1288 - frwr_unmap_sync(r_xprt, &req->rl_registered); 1274 + struct rpcrdma_req *req = 1275 + container_of(kref, struct rpcrdma_req, rl_kref); 1289 1276 1290 - /* Ensure that any DMA mapped pages associated with 1291 - * the Send of the RPC Call have been unmapped before 1292 - * allowing the RPC to complete. This protects argument 1293 - * memory not controlled by the RPC client from being 1294 - * re-used before we're done with it. 1295 - */ 1296 - if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { 1297 - r_xprt->rx_stats.reply_waits_for_send++; 1298 - out_of_line_wait_on_bit(&req->rl_flags, 1299 - RPCRDMA_REQ_F_TX_RESOURCES, 1300 - bit_wait, 1301 - TASK_UNINTERRUPTIBLE); 1302 - } 1277 + rpcrdma_complete_rqst(req->rl_reply); 1303 1278 } 1304 1279 1305 - /* Reply handling runs in the poll worker thread. Anything that 1306 - * might wait is deferred to a separate workqueue. 1307 - */ 1308 - void rpcrdma_deferred_completion(struct work_struct *work) 1309 - { 1310 - struct rpcrdma_rep *rep = 1311 - container_of(work, struct rpcrdma_rep, rr_work); 1312 - struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); 1313 - struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; 1314 - 1315 - trace_xprtrdma_defer_cmp(rep); 1316 - if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) 1317 - frwr_reminv(rep, &req->rl_registered); 1318 - rpcrdma_release_rqst(r_xprt, req); 1319 - rpcrdma_complete_rqst(rep); 1320 - } 1321 - 1322 - /* Process received RPC/RDMA messages. 1280 + /** 1281 + * rpcrdma_reply_handler - Process received RPC/RDMA messages 1282 + * @rep: Incoming rpcrdma_rep object to process 1323 1283 * 1324 1284 * Errors must result in the RPC task either being awakened, or 1325 1285 * allowed to timeout, to discover the errors at that time. ··· 1332 1360 else if (credits > buf->rb_max_requests) 1333 1361 credits = buf->rb_max_requests; 1334 1362 if (buf->rb_credits != credits) { 1335 - spin_lock_bh(&xprt->transport_lock); 1363 + spin_lock(&xprt->transport_lock); 1336 1364 buf->rb_credits = credits; 1337 1365 xprt->cwnd = credits << RPC_CWNDSHIFT; 1338 - spin_unlock_bh(&xprt->transport_lock); 1366 + spin_unlock(&xprt->transport_lock); 1339 1367 } 1340 1368 1341 1369 req = rpcr_to_rdmar(rqst); ··· 1345 1373 } 1346 1374 req->rl_reply = rep; 1347 1375 rep->rr_rqst = rqst; 1348 - clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); 1349 1376 1350 1377 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); 1351 - queue_work(buf->rb_completion_wq, &rep->rr_work); 1378 + 1379 + if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) 1380 + frwr_reminv(rep, &req->rl_registered); 1381 + if (!list_empty(&req->rl_registered)) 1382 + frwr_unmap_async(r_xprt, req); 1383 + /* LocalInv completion will complete the RPC */ 1384 + else 1385 + kref_put(&req->rl_kref, rpcrdma_reply_done); 1352 1386 return; 1353 1387 1354 1388 out_badversion:
+2 -2
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
··· 72 72 else if (credits > r_xprt->rx_buf.rb_bc_max_requests) 73 73 credits = r_xprt->rx_buf.rb_bc_max_requests; 74 74 75 - spin_lock_bh(&xprt->transport_lock); 75 + spin_lock(&xprt->transport_lock); 76 76 xprt->cwnd = credits << RPC_CWNDSHIFT; 77 - spin_unlock_bh(&xprt->transport_lock); 77 + spin_unlock(&xprt->transport_lock); 78 78 79 79 spin_lock(&xprt->queue_lock); 80 80 ret = 0;
+4 -4
net/sunrpc/xprtrdma/svc_rdma_transport.c
··· 226 226 * Enqueue the new transport on the accept queue of the listening 227 227 * transport 228 228 */ 229 - spin_lock_bh(&listen_xprt->sc_lock); 229 + spin_lock(&listen_xprt->sc_lock); 230 230 list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q); 231 - spin_unlock_bh(&listen_xprt->sc_lock); 231 + spin_unlock(&listen_xprt->sc_lock); 232 232 233 233 set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags); 234 234 svc_xprt_enqueue(&listen_xprt->sc_xprt); ··· 401 401 listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); 402 402 clear_bit(XPT_CONN, &xprt->xpt_flags); 403 403 /* Get the next entry off the accept list */ 404 - spin_lock_bh(&listen_rdma->sc_lock); 404 + spin_lock(&listen_rdma->sc_lock); 405 405 if (!list_empty(&listen_rdma->sc_accept_q)) { 406 406 newxprt = list_entry(listen_rdma->sc_accept_q.next, 407 407 struct svcxprt_rdma, sc_accept_q); ··· 409 409 } 410 410 if (!list_empty(&listen_rdma->sc_accept_q)) 411 411 set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags); 412 - spin_unlock_bh(&listen_rdma->sc_lock); 412 + spin_unlock(&listen_rdma->sc_lock); 413 413 if (!newxprt) 414 414 return NULL; 415 415
+65 -19
net/sunrpc/xprtrdma/transport.c
··· 298 298 module_put(THIS_MODULE); 299 299 } 300 300 301 + /* 60 second timeout, no retries */ 301 302 static const struct rpc_timeout xprt_rdma_default_timeout = { 302 303 .to_initval = 60 * HZ, 303 304 .to_maxval = 60 * HZ, ··· 324 323 if (!xprt) 325 324 return ERR_PTR(-ENOMEM); 326 325 327 - /* 60 second timeout, no retries */ 328 326 xprt->timeout = &xprt_rdma_default_timeout; 327 + xprt->connect_timeout = xprt->timeout->to_initval; 328 + xprt->max_reconnect_timeout = xprt->timeout->to_maxval; 329 329 xprt->bind_timeout = RPCRDMA_BIND_TO; 330 330 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; 331 331 xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; ··· 489 487 } 490 488 491 489 /** 492 - * xprt_rdma_connect - try to establish a transport connection 490 + * xprt_rdma_set_connect_timeout - set timeouts for establishing a connection 491 + * @xprt: controlling transport instance 492 + * @connect_timeout: reconnect timeout after client disconnects 493 + * @reconnect_timeout: reconnect timeout after server disconnects 494 + * 495 + */ 496 + static void xprt_rdma_tcp_set_connect_timeout(struct rpc_xprt *xprt, 497 + unsigned long connect_timeout, 498 + unsigned long reconnect_timeout) 499 + { 500 + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 501 + 502 + trace_xprtrdma_op_set_cto(r_xprt, connect_timeout, reconnect_timeout); 503 + 504 + spin_lock(&xprt->transport_lock); 505 + 506 + if (connect_timeout < xprt->connect_timeout) { 507 + struct rpc_timeout to; 508 + unsigned long initval; 509 + 510 + to = *xprt->timeout; 511 + initval = connect_timeout; 512 + if (initval < RPCRDMA_INIT_REEST_TO << 1) 513 + initval = RPCRDMA_INIT_REEST_TO << 1; 514 + to.to_initval = initval; 515 + to.to_maxval = initval; 516 + r_xprt->rx_timeout = to; 517 + xprt->timeout = &r_xprt->rx_timeout; 518 + xprt->connect_timeout = connect_timeout; 519 + } 520 + 521 + if (reconnect_timeout < xprt->max_reconnect_timeout) 522 + xprt->max_reconnect_timeout = reconnect_timeout; 523 + 524 + spin_unlock(&xprt->transport_lock); 525 + } 526 + 527 + /** 528 + * xprt_rdma_connect - schedule an attempt to reconnect 493 529 * @xprt: transport state 494 - * @task: RPC scheduler context 530 + * @task: RPC scheduler context (unused) 495 531 * 496 532 */ 497 533 static void 498 534 xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) 499 535 { 500 536 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 537 + unsigned long delay; 501 538 502 539 trace_xprtrdma_op_connect(r_xprt); 540 + 541 + delay = 0; 503 542 if (r_xprt->rx_ep.rep_connected != 0) { 504 - /* Reconnect */ 505 - schedule_delayed_work(&r_xprt->rx_connect_worker, 506 - xprt->reestablish_timeout); 507 - xprt->reestablish_timeout <<= 1; 508 - if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) 509 - xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; 510 - else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) 511 - xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; 512 - } else { 513 - schedule_delayed_work(&r_xprt->rx_connect_worker, 0); 514 - if (!RPC_IS_ASYNC(task)) 515 - flush_delayed_work(&r_xprt->rx_connect_worker); 543 + delay = xprt_reconnect_delay(xprt); 544 + xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); 516 545 } 546 + queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, 547 + delay); 517 548 } 518 549 519 550 /** ··· 585 550 static void 586 551 xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) 587 552 { 553 + struct rpcrdma_xprt *r_xprt = 554 + container_of(xprt, struct rpcrdma_xprt, rx_xprt); 555 + 588 556 memset(rqst, 0, sizeof(*rqst)); 589 - rpcrdma_buffer_put(rpcr_to_rdmar(rqst)); 557 + rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); 590 558 rpc_wake_up_next(&xprt->backlog); 591 559 } 592 560 ··· 656 618 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 657 619 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 658 620 659 - if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) 660 - rpcrdma_release_rqst(r_xprt, req); 661 621 trace_xprtrdma_op_free(task, req); 622 + 623 + if (!list_empty(&req->rl_registered)) 624 + frwr_unmap_sync(r_xprt, req); 625 + 626 + /* XXX: If the RPC is completing because of a signal and 627 + * not because a reply was received, we ought to ensure 628 + * that the Send completion has fired, so that memory 629 + * involved with the Send is not still visible to the NIC. 630 + */ 662 631 } 663 632 664 633 /** ··· 712 667 goto drop_connection; 713 668 rqst->rq_xtime = ktime_get(); 714 669 715 - __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); 716 670 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) 717 671 goto drop_connection; 718 672 ··· 804 760 .send_request = xprt_rdma_send_request, 805 761 .close = xprt_rdma_close, 806 762 .destroy = xprt_rdma_destroy, 763 + .set_connect_timeout = xprt_rdma_tcp_set_connect_timeout, 807 764 .print_stats = xprt_rdma_print_stats, 808 765 .enable_swap = xprt_rdma_enable_swap, 809 766 .disable_swap = xprt_rdma_disable_swap, ··· 812 767 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 813 768 .bc_setup = xprt_rdma_bc_setup, 814 769 .bc_maxpayload = xprt_rdma_bc_maxpayload, 770 + .bc_num_slots = xprt_rdma_bc_max_slots, 815 771 .bc_free_rqst = xprt_rdma_bc_free_rqst, 816 772 .bc_destroy = xprt_rdma_bc_destroy, 817 773 #endif
+52 -65
net/sunrpc/xprtrdma/verbs.c
··· 89 89 */ 90 90 static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) 91 91 { 92 - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 93 92 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 94 93 95 94 /* Flush Receives, then wait for deferred Reply work 96 95 * to complete. 97 96 */ 98 97 ib_drain_rq(ia->ri_id->qp); 99 - drain_workqueue(buf->rb_completion_wq); 100 98 101 99 /* Deferred Reply processing might have scheduled 102 100 * local invalidations. ··· 899 901 * completions recently. This is a sign the Send Queue is 900 902 * backing up. Cause the caller to pause and try again. 901 903 */ 902 - set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags); 904 + xprt_wait_for_buffer_space(&r_xprt->rx_xprt); 903 905 r_xprt->rx_stats.empty_sendctx_q++; 904 906 return NULL; 905 907 } ··· 934 936 /* Paired with READ_ONCE */ 935 937 smp_store_release(&buf->rb_sc_tail, next_tail); 936 938 937 - if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) { 938 - smp_mb__after_atomic(); 939 - xprt_write_space(&sc->sc_xprt->rx_xprt); 940 - } 939 + xprt_write_space(&sc->sc_xprt->rx_xprt); 941 940 } 942 941 943 942 static void ··· 972 977 r_xprt->rx_stats.mrs_allocated += count; 973 978 spin_unlock(&buf->rb_mrlock); 974 979 trace_xprtrdma_createmrs(r_xprt, count); 975 - 976 - xprt_write_space(&r_xprt->rx_xprt); 977 980 } 978 981 979 982 static void ··· 983 990 rx_buf); 984 991 985 992 rpcrdma_mrs_create(r_xprt); 993 + xprt_write_space(&r_xprt->rx_xprt); 986 994 } 987 995 988 996 /** ··· 1019 1025 if (!req->rl_recvbuf) 1020 1026 goto out4; 1021 1027 1022 - req->rl_buffer = buffer; 1023 1028 INIT_LIST_HEAD(&req->rl_registered); 1024 1029 spin_lock(&buffer->rb_lock); 1025 1030 list_add(&req->rl_all, &buffer->rb_allreqs); ··· 1035 1042 return NULL; 1036 1043 } 1037 1044 1038 - static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp) 1045 + static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, 1046 + bool temp) 1039 1047 { 1040 - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1041 1048 struct rpcrdma_rep *rep; 1042 1049 1043 1050 rep = kzalloc(sizeof(*rep), GFP_KERNEL); ··· 1048 1055 DMA_FROM_DEVICE, GFP_KERNEL); 1049 1056 if (!rep->rr_rdmabuf) 1050 1057 goto out_free; 1058 + 1051 1059 xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), 1052 1060 rdmab_length(rep->rr_rdmabuf)); 1053 - 1054 1061 rep->rr_cqe.done = rpcrdma_wc_receive; 1055 1062 rep->rr_rxprt = r_xprt; 1056 - INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion); 1057 1063 rep->rr_recv_wr.next = NULL; 1058 1064 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 1059 1065 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1060 1066 rep->rr_recv_wr.num_sge = 1; 1061 1067 rep->rr_temp = temp; 1062 - 1063 - spin_lock(&buf->rb_lock); 1064 - list_add(&rep->rr_list, &buf->rb_recv_bufs); 1065 - spin_unlock(&buf->rb_lock); 1066 - return true; 1068 + return rep; 1067 1069 1068 1070 out_free: 1069 1071 kfree(rep); 1070 1072 out: 1071 - return false; 1073 + return NULL; 1072 1074 } 1073 1075 1074 1076 /** ··· 1077 1089 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1078 1090 int i, rc; 1079 1091 1080 - buf->rb_flags = 0; 1081 1092 buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests; 1082 1093 buf->rb_bc_srv_max_requests = 0; 1083 1094 spin_lock_init(&buf->rb_mrlock); ··· 1108 1121 rc = rpcrdma_sendctxs_create(r_xprt); 1109 1122 if (rc) 1110 1123 goto out; 1111 - 1112 - buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s", 1113 - WQ_MEM_RECLAIM | WQ_HIGHPRI, 1114 - 0, 1115 - r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]); 1116 - if (!buf->rb_completion_wq) { 1117 - rc = -ENOMEM; 1118 - goto out; 1119 - } 1120 1124 1121 1125 return 0; 1122 1126 out: ··· 1181 1203 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1182 1204 { 1183 1205 cancel_delayed_work_sync(&buf->rb_refresh_worker); 1184 - 1185 - if (buf->rb_completion_wq) { 1186 - destroy_workqueue(buf->rb_completion_wq); 1187 - buf->rb_completion_wq = NULL; 1188 - } 1189 1206 1190 1207 rpcrdma_sendctxs_destroy(buf); 1191 1208 ··· 1298 1325 1299 1326 /** 1300 1327 * rpcrdma_buffer_put - Put request/reply buffers back into pool 1328 + * @buffers: buffer pool 1301 1329 * @req: object to return 1302 1330 * 1303 1331 */ 1304 - void 1305 - rpcrdma_buffer_put(struct rpcrdma_req *req) 1332 + void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) 1306 1333 { 1307 - struct rpcrdma_buffer *buffers = req->rl_buffer; 1308 1334 struct rpcrdma_rep *rep = req->rl_reply; 1309 1335 1310 1336 req->rl_reply = NULL; ··· 1456 1484 struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; 1457 1485 int rc; 1458 1486 1459 - if (!ep->rep_send_count || 1460 - test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { 1487 + if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) { 1461 1488 send_wr->send_flags |= IB_SEND_SIGNALED; 1462 1489 ep->rep_send_count = ep->rep_send_batch; 1463 1490 } else { ··· 1476 1505 { 1477 1506 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1478 1507 struct rpcrdma_ep *ep = &r_xprt->rx_ep; 1479 - struct ib_recv_wr *wr, *bad_wr; 1508 + struct ib_recv_wr *i, *wr, *bad_wr; 1509 + struct rpcrdma_rep *rep; 1480 1510 int needed, count, rc; 1481 1511 1482 1512 rc = 0; 1483 1513 count = 0; 1514 + 1484 1515 needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); 1485 1516 if (ep->rep_receive_count > needed) 1486 1517 goto out; ··· 1490 1517 if (!temp) 1491 1518 needed += RPCRDMA_MAX_RECV_BATCH; 1492 1519 1493 - count = 0; 1520 + /* fast path: all needed reps can be found on the free list */ 1494 1521 wr = NULL; 1522 + spin_lock(&buf->rb_lock); 1495 1523 while (needed) { 1496 - struct rpcrdma_regbuf *rb; 1497 - struct rpcrdma_rep *rep; 1498 - 1499 - spin_lock(&buf->rb_lock); 1500 1524 rep = list_first_entry_or_null(&buf->rb_recv_bufs, 1501 1525 struct rpcrdma_rep, rr_list); 1502 - if (likely(rep)) 1503 - list_del(&rep->rr_list); 1504 - spin_unlock(&buf->rb_lock); 1505 - if (!rep) { 1506 - if (!rpcrdma_rep_create(r_xprt, temp)) 1507 - break; 1508 - continue; 1509 - } 1510 - 1511 - rb = rep->rr_rdmabuf; 1512 - if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) { 1513 - rpcrdma_recv_buffer_put(rep); 1526 + if (!rep) 1514 1527 break; 1515 - } 1516 1528 1517 - trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); 1529 + list_del(&rep->rr_list); 1518 1530 rep->rr_recv_wr.next = wr; 1519 1531 wr = &rep->rr_recv_wr; 1520 - ++count; 1521 1532 --needed; 1522 1533 } 1523 - if (!count) 1534 + spin_unlock(&buf->rb_lock); 1535 + 1536 + while (needed) { 1537 + rep = rpcrdma_rep_create(r_xprt, temp); 1538 + if (!rep) 1539 + break; 1540 + 1541 + rep->rr_recv_wr.next = wr; 1542 + wr = &rep->rr_recv_wr; 1543 + --needed; 1544 + } 1545 + if (!wr) 1524 1546 goto out; 1547 + 1548 + for (i = wr; i; i = i->next) { 1549 + rep = container_of(i, struct rpcrdma_rep, rr_recv_wr); 1550 + 1551 + if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) 1552 + goto release_wrs; 1553 + 1554 + trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe); 1555 + ++count; 1556 + } 1525 1557 1526 1558 rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, 1527 1559 (const struct ib_recv_wr **)&bad_wr); 1560 + out: 1561 + trace_xprtrdma_post_recvs(r_xprt, count, rc); 1528 1562 if (rc) { 1529 - for (wr = bad_wr; wr; wr = wr->next) { 1563 + for (wr = bad_wr; wr;) { 1530 1564 struct rpcrdma_rep *rep; 1531 1565 1532 1566 rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); 1567 + wr = wr->next; 1533 1568 rpcrdma_recv_buffer_put(rep); 1534 1569 --count; 1535 1570 } 1536 1571 } 1537 1572 ep->rep_receive_count += count; 1538 - out: 1539 - trace_xprtrdma_post_recvs(r_xprt, count, rc); 1573 + return; 1574 + 1575 + release_wrs: 1576 + for (i = wr; i;) { 1577 + rep = container_of(i, struct rpcrdma_rep, rr_recv_wr); 1578 + i = i->next; 1579 + rpcrdma_recv_buffer_put(rep); 1580 + } 1540 1581 }
+13 -32
net/sunrpc/xprtrdma/xprt_rdma.h
··· 44 44 45 45 #include <linux/wait.h> /* wait_queue_head_t, etc */ 46 46 #include <linux/spinlock.h> /* spinlock_t, etc */ 47 - #include <linux/atomic.h> /* atomic_t, etc */ 47 + #include <linux/atomic.h> /* atomic_t, etc */ 48 + #include <linux/kref.h> /* struct kref */ 48 49 #include <linux/workqueue.h> /* struct work_struct */ 49 50 50 51 #include <rdma/rdma_cm.h> /* RDMA connection api */ ··· 203 202 bool rr_temp; 204 203 struct rpcrdma_regbuf *rr_rdmabuf; 205 204 struct rpcrdma_xprt *rr_rxprt; 206 - struct work_struct rr_work; 205 + struct rpc_rqst *rr_rqst; 207 206 struct xdr_buf rr_hdrbuf; 208 207 struct xdr_stream rr_stream; 209 - struct rpc_rqst *rr_rqst; 210 208 struct list_head rr_list; 211 209 struct ib_recv_wr rr_recv_wr; 212 210 }; ··· 240 240 * An external memory region is any buffer or page that is registered 241 241 * on the fly (ie, not pre-registered). 242 242 */ 243 - enum rpcrdma_frwr_state { 244 - FRWR_IS_INVALID, /* ready to be used */ 245 - FRWR_IS_VALID, /* in use */ 246 - FRWR_FLUSHED_FR, /* flushed FASTREG WR */ 247 - FRWR_FLUSHED_LI, /* flushed LOCALINV WR */ 248 - }; 249 - 243 + struct rpcrdma_req; 250 244 struct rpcrdma_frwr { 251 245 struct ib_mr *fr_mr; 252 246 struct ib_cqe fr_cqe; 253 - enum rpcrdma_frwr_state fr_state; 254 247 struct completion fr_linv_done; 248 + struct rpcrdma_req *fr_req; 255 249 union { 256 250 struct ib_reg_wr fr_regwr; 257 251 struct ib_send_wr fr_invwr; ··· 320 326 struct rpcrdma_req { 321 327 struct list_head rl_list; 322 328 struct rpc_rqst rl_slot; 323 - struct rpcrdma_buffer *rl_buffer; 324 329 struct rpcrdma_rep *rl_reply; 325 330 struct xdr_stream rl_stream; 326 331 struct xdr_buf rl_hdrbuf; ··· 329 336 struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ 330 337 331 338 struct list_head rl_all; 332 - unsigned long rl_flags; 339 + struct kref rl_kref; 333 340 334 341 struct list_head rl_registered; /* registered segments */ 335 342 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; 336 - }; 337 - 338 - /* rl_flags */ 339 - enum { 340 - RPCRDMA_REQ_F_PENDING = 0, 341 - RPCRDMA_REQ_F_TX_RESOURCES, 342 343 }; 343 344 344 345 static inline struct rpcrdma_req * ··· 378 391 struct list_head rb_recv_bufs; 379 392 struct list_head rb_allreqs; 380 393 381 - unsigned long rb_flags; 382 394 u32 rb_max_requests; 383 395 u32 rb_credits; /* most recent credit grant */ 384 396 385 397 u32 rb_bc_srv_max_requests; 386 398 u32 rb_bc_max_requests; 387 399 388 - struct workqueue_struct *rb_completion_wq; 389 400 struct delayed_work rb_refresh_worker; 390 - }; 391 - 392 - /* rb_flags */ 393 - enum { 394 - RPCRDMA_BUF_F_EMPTY_SCQ = 0, 395 401 }; 396 402 397 403 /* ··· 432 452 struct rpcrdma_ep rx_ep; 433 453 struct rpcrdma_buffer rx_buf; 434 454 struct delayed_work rx_connect_worker; 455 + struct rpc_timeout rx_timeout; 435 456 struct rpcrdma_stats rx_stats; 436 457 }; 437 458 ··· 499 518 } 500 519 501 520 struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 502 - void rpcrdma_buffer_put(struct rpcrdma_req *); 521 + void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, 522 + struct rpcrdma_req *req); 503 523 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); 504 524 505 525 bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, ··· 546 564 /* Memory registration calls xprtrdma/frwr_ops.c 547 565 */ 548 566 bool frwr_is_supported(struct ib_device *device); 567 + void frwr_reset(struct rpcrdma_req *req); 549 568 int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep); 550 569 int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); 551 570 void frwr_release_mr(struct rpcrdma_mr *mr); ··· 557 574 struct rpcrdma_mr **mr); 558 575 int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); 559 576 void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); 560 - void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, 561 - struct list_head *mrs); 577 + void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); 578 + void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); 562 579 563 580 /* 564 581 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c ··· 581 598 void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); 582 599 void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); 583 600 void rpcrdma_reply_handler(struct rpcrdma_rep *rep); 584 - void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, 585 - struct rpcrdma_req *req); 586 - void rpcrdma_deferred_completion(struct work_struct *work); 587 601 588 602 static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) 589 603 { ··· 605 625 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 606 626 int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); 607 627 size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); 628 + unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *); 608 629 int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); 609 630 void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); 610 631 int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
+87 -39
net/sunrpc/xprtsock.c
··· 880 880 req->rq_slen); 881 881 882 882 /* Protect against races with write_space */ 883 - spin_lock_bh(&xprt->transport_lock); 883 + spin_lock(&xprt->transport_lock); 884 884 885 885 /* Don't race with disconnect */ 886 886 if (xprt_connected(xprt)) { ··· 890 890 } else 891 891 ret = -ENOTCONN; 892 892 893 - spin_unlock_bh(&xprt->transport_lock); 893 + spin_unlock(&xprt->transport_lock); 894 894 895 895 /* Race breaker in case memory is freed before above code is called */ 896 896 if (ret == -EAGAIN) { ··· 909 909 static void 910 910 xs_stream_prepare_request(struct rpc_rqst *req) 911 911 { 912 + xdr_free_bvec(&req->rq_rcv_buf); 912 913 req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL); 913 914 } 914 915 ··· 1212 1211 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1213 1212 1214 1213 clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); 1214 + clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); 1215 + clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); 1216 + clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); 1217 + } 1218 + 1219 + static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) 1220 + { 1221 + set_bit(nr, &transport->sock_state); 1222 + queue_work(xprtiod_workqueue, &transport->error_worker); 1215 1223 } 1216 1224 1217 1225 static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) ··· 1241 1231 */ 1242 1232 static void xs_error_report(struct sock *sk) 1243 1233 { 1234 + struct sock_xprt *transport; 1244 1235 struct rpc_xprt *xprt; 1245 1236 int err; 1246 1237 ··· 1249 1238 if (!(xprt = xprt_from_sock(sk))) 1250 1239 goto out; 1251 1240 1241 + transport = container_of(xprt, struct sock_xprt, xprt); 1252 1242 err = -sk->sk_err; 1253 1243 if (err == 0) 1254 1244 goto out; 1255 1245 dprintk("RPC: xs_error_report client %p, error=%d...\n", 1256 1246 xprt, -err); 1257 1247 trace_rpc_socket_error(xprt, sk->sk_socket, err); 1258 - xprt_wake_pending_tasks(xprt, err); 1248 + xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); 1259 1249 out: 1260 1250 read_unlock_bh(&sk->sk_callback_lock); 1261 1251 } ··· 1345 1333 cancel_delayed_work_sync(&transport->connect_worker); 1346 1334 xs_close(xprt); 1347 1335 cancel_work_sync(&transport->recv_worker); 1336 + cancel_work_sync(&transport->error_worker); 1348 1337 xs_xprt_free(xprt); 1349 1338 module_put(THIS_MODULE); 1350 1339 } ··· 1399 1386 } 1400 1387 1401 1388 1402 - spin_lock_bh(&xprt->transport_lock); 1389 + spin_lock(&xprt->transport_lock); 1403 1390 xprt_adjust_cwnd(xprt, task, copied); 1404 - spin_unlock_bh(&xprt->transport_lock); 1391 + spin_unlock(&xprt->transport_lock); 1405 1392 spin_lock(&xprt->queue_lock); 1406 1393 xprt_complete_rqst(task, copied); 1407 1394 __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); ··· 1511 1498 trace_rpc_socket_state_change(xprt, sk->sk_socket); 1512 1499 switch (sk->sk_state) { 1513 1500 case TCP_ESTABLISHED: 1514 - spin_lock(&xprt->transport_lock); 1515 1501 if (!xprt_test_and_set_connected(xprt)) { 1516 1502 xprt->connect_cookie++; 1517 1503 clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); ··· 1519 1507 xprt->stat.connect_count++; 1520 1508 xprt->stat.connect_time += (long)jiffies - 1521 1509 xprt->stat.connect_start; 1522 - xprt_wake_pending_tasks(xprt, -EAGAIN); 1510 + xs_run_error_worker(transport, XPRT_SOCK_WAKE_PENDING); 1523 1511 } 1524 - spin_unlock(&xprt->transport_lock); 1525 1512 break; 1526 1513 case TCP_FIN_WAIT1: 1527 1514 /* The client initiated a shutdown of the socket */ ··· 1536 1525 /* The server initiated a shutdown of the socket */ 1537 1526 xprt->connect_cookie++; 1538 1527 clear_bit(XPRT_CONNECTED, &xprt->state); 1539 - xs_tcp_force_close(xprt); 1528 + xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); 1540 1529 /* fall through */ 1541 1530 case TCP_CLOSING: 1542 1531 /* ··· 1558 1547 xprt_clear_connecting(xprt); 1559 1548 clear_bit(XPRT_CLOSING, &xprt->state); 1560 1549 /* Trigger the socket release */ 1561 - xs_tcp_force_close(xprt); 1550 + xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); 1562 1551 } 1563 1552 out: 1564 1553 read_unlock_bh(&sk->sk_callback_lock); ··· 1567 1556 static void xs_write_space(struct sock *sk) 1568 1557 { 1569 1558 struct socket_wq *wq; 1559 + struct sock_xprt *transport; 1570 1560 struct rpc_xprt *xprt; 1571 1561 1572 1562 if (!sk->sk_socket) ··· 1576 1564 1577 1565 if (unlikely(!(xprt = xprt_from_sock(sk)))) 1578 1566 return; 1567 + transport = container_of(xprt, struct sock_xprt, xprt); 1579 1568 rcu_read_lock(); 1580 1569 wq = rcu_dereference(sk->sk_wq); 1581 1570 if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) 1582 1571 goto out; 1583 1572 1584 - if (xprt_write_space(xprt)) 1585 - sk->sk_write_pending--; 1573 + xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE); 1574 + sk->sk_write_pending--; 1586 1575 out: 1587 1576 rcu_read_unlock(); 1588 1577 } ··· 1677 1664 */ 1678 1665 static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) 1679 1666 { 1680 - spin_lock_bh(&xprt->transport_lock); 1667 + spin_lock(&xprt->transport_lock); 1681 1668 xprt_adjust_cwnd(xprt, task, -ETIMEDOUT); 1682 - spin_unlock_bh(&xprt->transport_lock); 1669 + spin_unlock(&xprt->transport_lock); 1683 1670 } 1684 1671 1685 1672 static int xs_get_random_port(void) ··· 2214 2201 unsigned int opt_on = 1; 2215 2202 unsigned int timeo; 2216 2203 2217 - spin_lock_bh(&xprt->transport_lock); 2204 + spin_lock(&xprt->transport_lock); 2218 2205 keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ); 2219 2206 keepcnt = xprt->timeout->to_retries + 1; 2220 2207 timeo = jiffies_to_msecs(xprt->timeout->to_initval) * 2221 2208 (xprt->timeout->to_retries + 1); 2222 2209 clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); 2223 - spin_unlock_bh(&xprt->transport_lock); 2210 + spin_unlock(&xprt->transport_lock); 2224 2211 2225 2212 /* TCP Keepalive options */ 2226 2213 kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, ··· 2245 2232 struct rpc_timeout to; 2246 2233 unsigned long initval; 2247 2234 2248 - spin_lock_bh(&xprt->transport_lock); 2235 + spin_lock(&xprt->transport_lock); 2249 2236 if (reconnect_timeout < xprt->max_reconnect_timeout) 2250 2237 xprt->max_reconnect_timeout = reconnect_timeout; 2251 2238 if (connect_timeout < xprt->connect_timeout) { ··· 2262 2249 xprt->connect_timeout = connect_timeout; 2263 2250 } 2264 2251 set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); 2265 - spin_unlock_bh(&xprt->transport_lock); 2252 + spin_unlock(&xprt->transport_lock); 2266 2253 } 2267 2254 2268 2255 static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) ··· 2415 2402 xprt_wake_pending_tasks(xprt, status); 2416 2403 } 2417 2404 2418 - static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt) 2419 - { 2420 - unsigned long start, now = jiffies; 2421 - 2422 - start = xprt->stat.connect_start + xprt->reestablish_timeout; 2423 - if (time_after(start, now)) 2424 - return start - now; 2425 - return 0; 2426 - } 2427 - 2428 - static void xs_reconnect_backoff(struct rpc_xprt *xprt) 2429 - { 2430 - xprt->reestablish_timeout <<= 1; 2431 - if (xprt->reestablish_timeout > xprt->max_reconnect_timeout) 2432 - xprt->reestablish_timeout = xprt->max_reconnect_timeout; 2433 - if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) 2434 - xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 2435 - } 2436 - 2437 2405 /** 2438 2406 * xs_connect - connect a socket to a remote endpoint 2439 2407 * @xprt: pointer to transport structure ··· 2444 2450 /* Start by resetting any existing state */ 2445 2451 xs_reset_transport(transport); 2446 2452 2447 - delay = xs_reconnect_delay(xprt); 2448 - xs_reconnect_backoff(xprt); 2453 + delay = xprt_reconnect_delay(xprt); 2454 + xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); 2449 2455 2450 2456 } else 2451 2457 dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); ··· 2453 2459 queue_delayed_work(xprtiod_workqueue, 2454 2460 &transport->connect_worker, 2455 2461 delay); 2462 + } 2463 + 2464 + static void xs_wake_disconnect(struct sock_xprt *transport) 2465 + { 2466 + if (test_and_clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state)) 2467 + xs_tcp_force_close(&transport->xprt); 2468 + } 2469 + 2470 + static void xs_wake_write(struct sock_xprt *transport) 2471 + { 2472 + if (test_and_clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state)) 2473 + xprt_write_space(&transport->xprt); 2474 + } 2475 + 2476 + static void xs_wake_error(struct sock_xprt *transport) 2477 + { 2478 + int sockerr; 2479 + int sockerr_len = sizeof(sockerr); 2480 + 2481 + if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) 2482 + return; 2483 + mutex_lock(&transport->recv_mutex); 2484 + if (transport->sock == NULL) 2485 + goto out; 2486 + if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) 2487 + goto out; 2488 + if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR, 2489 + (char *)&sockerr, &sockerr_len) != 0) 2490 + goto out; 2491 + if (sockerr < 0) 2492 + xprt_wake_pending_tasks(&transport->xprt, sockerr); 2493 + out: 2494 + mutex_unlock(&transport->recv_mutex); 2495 + } 2496 + 2497 + static void xs_wake_pending(struct sock_xprt *transport) 2498 + { 2499 + if (test_and_clear_bit(XPRT_SOCK_WAKE_PENDING, &transport->sock_state)) 2500 + xprt_wake_pending_tasks(&transport->xprt, -EAGAIN); 2501 + } 2502 + 2503 + static void xs_error_handle(struct work_struct *work) 2504 + { 2505 + struct sock_xprt *transport = container_of(work, 2506 + struct sock_xprt, error_worker); 2507 + 2508 + xs_wake_disconnect(transport); 2509 + xs_wake_write(transport); 2510 + xs_wake_error(transport); 2511 + xs_wake_pending(transport); 2456 2512 } 2457 2513 2458 2514 /** ··· 2789 2745 #ifdef CONFIG_SUNRPC_BACKCHANNEL 2790 2746 .bc_setup = xprt_setup_bc, 2791 2747 .bc_maxpayload = xs_tcp_bc_maxpayload, 2748 + .bc_num_slots = xprt_bc_max_slots, 2792 2749 .bc_free_rqst = xprt_free_bc_rqst, 2793 2750 .bc_destroy = xprt_destroy_bc, 2794 2751 #endif ··· 2918 2873 xprt->timeout = &xs_local_default_timeout; 2919 2874 2920 2875 INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); 2876 + INIT_WORK(&transport->error_worker, xs_error_handle); 2921 2877 INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket); 2922 2878 2923 2879 switch (sun->sun_family) { ··· 2989 2943 xprt->timeout = &xs_udp_default_timeout; 2990 2944 2991 2945 INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn); 2946 + INIT_WORK(&transport->error_worker, xs_error_handle); 2992 2947 INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket); 2993 2948 2994 2949 switch (addr->sa_family) { ··· 3071 3024 (xprt->timeout->to_retries + 1); 3072 3025 3073 3026 INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); 3027 + INIT_WORK(&transport->error_worker, xs_error_handle); 3074 3028 INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); 3075 3029 3076 3030 switch (addr->sa_family) {