Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'nfs-for-3.1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

* 'nfs-for-3.1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (44 commits)
NFSv4: Don't use the delegation->inode in nfs_mark_return_delegation()
nfs: don't use d_move in nfs_async_rename_done
RDMA: Increasing RPCRDMA_MAX_DATA_SEGS
SUNRPC: Replace xprt->resend and xprt->sending with a priority queue
SUNRPC: Allow caller of rpc_sleep_on() to select priority levels
SUNRPC: Support dynamic slot allocation for TCP connections
SUNRPC: Clean up the slot table allocation
SUNRPC: Initalise the struct xprt upon allocation
SUNRPC: Ensure that we grab the XPRT_LOCK before calling xprt_alloc_slot
pnfs: simplify pnfs files module autoloading
nfs: document nfsv4 sillyrename issues
NFS: Convert nfs4_set_ds_client to EXPORT_SYMBOL_GPL
SUNRPC: Convert the backchannel exports to EXPORT_SYMBOL_GPL
SUNRPC: sunrpc should not explicitly depend on NFS config options
NFS: Clean up - simplify the switch to read/write-through-MDS
NFS: Move the pnfs write code into pnfs.c
NFS: Move the pnfs read code into pnfs.c
NFS: Allow the nfs_pageio_descriptor to signal that a re-coalesce is needed
NFS: Use the nfs_pageio_descriptor->pg_bsize in the read/write request
NFS: Cache rpc_ops in struct nfs_pageio_descriptor
...

+1856 -632
+6 -3
fs/lockd/clntproc.c
··· 302 302 /* We appear to be out of the grace period */ 303 303 wake_up_all(&host->h_gracewait); 304 304 } 305 - dprintk("lockd: server returns status %d\n", resp->status); 305 + dprintk("lockd: server returns status %d\n", 306 + ntohl(resp->status)); 306 307 return 0; /* Okay, call complete */ 307 308 } 308 309 ··· 691 690 goto out; 692 691 693 692 if (resp->status != nlm_lck_denied_nolocks) 694 - printk("lockd: unexpected unlock status: %d\n", resp->status); 693 + printk("lockd: unexpected unlock status: %d\n", 694 + ntohl(resp->status)); 695 695 /* What to do now? I'm out of my depth... */ 696 696 status = -ENOLCK; 697 697 out: ··· 845 843 return -ENOLCK; 846 844 #endif 847 845 } 848 - printk(KERN_NOTICE "lockd: unexpected server status %d\n", status); 846 + printk(KERN_NOTICE "lockd: unexpected server status %d\n", 847 + ntohl(status)); 849 848 return -ENOLCK; 850 849 }
+1
fs/nfs/Kconfig
··· 77 77 config NFS_V4_1 78 78 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" 79 79 depends on NFS_FS && NFS_V4 && EXPERIMENTAL 80 + select SUNRPC_BACKCHANNEL 80 81 select PNFS_FILE_LAYOUT 81 82 help 82 83 This option enables support for minor version 1 of the NFSv4 protocol
+36 -21
fs/nfs/callback_proc.c
··· 111 111 static u32 initiate_file_draining(struct nfs_client *clp, 112 112 struct cb_layoutrecallargs *args) 113 113 { 114 + struct nfs_server *server; 114 115 struct pnfs_layout_hdr *lo; 115 116 struct inode *ino; 116 117 bool found = false; ··· 119 118 LIST_HEAD(free_me_list); 120 119 121 120 spin_lock(&clp->cl_lock); 122 - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { 123 - if (nfs_compare_fh(&args->cbl_fh, 124 - &NFS_I(lo->plh_inode)->fh)) 125 - continue; 126 - ino = igrab(lo->plh_inode); 127 - if (!ino) 128 - continue; 129 - found = true; 130 - /* Without this, layout can be freed as soon 131 - * as we release cl_lock. 132 - */ 133 - get_layout_hdr(lo); 134 - break; 121 + rcu_read_lock(); 122 + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 123 + list_for_each_entry(lo, &server->layouts, plh_layouts) { 124 + if (nfs_compare_fh(&args->cbl_fh, 125 + &NFS_I(lo->plh_inode)->fh)) 126 + continue; 127 + ino = igrab(lo->plh_inode); 128 + if (!ino) 129 + continue; 130 + found = true; 131 + /* Without this, layout can be freed as soon 132 + * as we release cl_lock. 133 + */ 134 + get_layout_hdr(lo); 135 + break; 136 + } 137 + if (found) 138 + break; 135 139 } 140 + rcu_read_unlock(); 136 141 spin_unlock(&clp->cl_lock); 142 + 137 143 if (!found) 138 144 return NFS4ERR_NOMATCHING_LAYOUT; 139 145 ··· 162 154 static u32 initiate_bulk_draining(struct nfs_client *clp, 163 155 struct cb_layoutrecallargs *args) 164 156 { 157 + struct nfs_server *server; 165 158 struct pnfs_layout_hdr *lo; 166 159 struct inode *ino; 167 160 u32 rv = NFS4ERR_NOMATCHING_LAYOUT; ··· 176 167 }; 177 168 178 169 spin_lock(&clp->cl_lock); 179 - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { 170 + rcu_read_lock(); 171 + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 180 172 if ((args->cbl_recall_type == RETURN_FSID) && 181 - memcmp(&NFS_SERVER(lo->plh_inode)->fsid, 182 - &args->cbl_fsid, sizeof(struct nfs_fsid))) 173 + memcmp(&server->fsid, &args->cbl_fsid, 174 + sizeof(struct nfs_fsid))) 183 175 continue; 184 - if (!igrab(lo->plh_inode)) 185 - continue; 186 - get_layout_hdr(lo); 187 - BUG_ON(!list_empty(&lo->plh_bulk_recall)); 188 - list_add(&lo->plh_bulk_recall, &recall_list); 176 + 177 + list_for_each_entry(lo, &server->layouts, plh_layouts) { 178 + if (!igrab(lo->plh_inode)) 179 + continue; 180 + get_layout_hdr(lo); 181 + BUG_ON(!list_empty(&lo->plh_bulk_recall)); 182 + list_add(&lo->plh_bulk_recall, &recall_list); 183 + } 189 184 } 185 + rcu_read_unlock(); 190 186 spin_unlock(&clp->cl_lock); 187 + 191 188 list_for_each_entry_safe(lo, tmp, 192 189 &recall_list, plh_bulk_recall) { 193 190 ino = lo->plh_inode;
+3 -4
fs/nfs/client.c
··· 188 188 cred = rpc_lookup_machine_cred(); 189 189 if (!IS_ERR(cred)) 190 190 clp->cl_machine_cred = cred; 191 - #if defined(CONFIG_NFS_V4_1) 192 - INIT_LIST_HEAD(&clp->cl_layouts); 193 - #endif 194 191 nfs_fscache_get_client_cookie(clp); 195 192 196 193 return clp; ··· 290 293 nfs4_deviceid_purge_client(clp); 291 294 292 295 kfree(clp->cl_hostname); 296 + kfree(clp->server_scope); 293 297 kfree(clp); 294 298 295 299 dprintk("<-- nfs_free_client()\n"); ··· 1060 1062 INIT_LIST_HEAD(&server->client_link); 1061 1063 INIT_LIST_HEAD(&server->master_link); 1062 1064 INIT_LIST_HEAD(&server->delegations); 1065 + INIT_LIST_HEAD(&server->layouts); 1063 1066 1064 1067 atomic_set(&server->active, 0); 1065 1068 ··· 1463 1464 dprintk("<-- %s %p\n", __func__, clp); 1464 1465 return clp; 1465 1466 } 1466 - EXPORT_SYMBOL(nfs4_set_ds_client); 1467 + EXPORT_SYMBOL_GPL(nfs4_set_ds_client); 1467 1468 1468 1469 /* 1469 1470 * Session has been established, and the client marked ready.
+8 -8
fs/nfs/delegation.c
··· 398 398 return err; 399 399 } 400 400 401 - static void nfs_mark_return_delegation(struct nfs_delegation *delegation) 401 + static void nfs_mark_return_delegation(struct nfs_server *server, 402 + struct nfs_delegation *delegation) 402 403 { 403 - struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client; 404 - 405 404 set_bit(NFS_DELEGATION_RETURN, &delegation->flags); 406 - set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); 405 + set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); 407 406 } 408 407 409 408 /** ··· 440 441 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) 441 442 continue; 442 443 if (delegation->type & flags) 443 - nfs_mark_return_delegation(delegation); 444 + nfs_mark_return_delegation(server, delegation); 444 445 } 445 446 } 446 447 ··· 507 508 list_for_each_entry_rcu(delegation, &server->delegations, super_list) { 508 509 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) 509 510 continue; 510 - nfs_mark_return_delegation(delegation); 511 + nfs_mark_return_delegation(server, delegation); 511 512 } 512 513 } 513 514 ··· 538 539 int nfs_async_inode_return_delegation(struct inode *inode, 539 540 const nfs4_stateid *stateid) 540 541 { 541 - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 542 + struct nfs_server *server = NFS_SERVER(inode); 543 + struct nfs_client *clp = server->nfs_client; 542 544 struct nfs_delegation *delegation; 543 545 544 546 rcu_read_lock(); ··· 549 549 rcu_read_unlock(); 550 550 return -ENOENT; 551 551 } 552 - nfs_mark_return_delegation(delegation); 552 + nfs_mark_return_delegation(server, delegation); 553 553 rcu_read_unlock(); 554 554 555 555 nfs_delegation_run_state_manager(clp);
+13
fs/nfs/internal.h
··· 277 277 extern char *nfs_path(char **p, struct dentry *dentry, 278 278 char *buffer, ssize_t buflen); 279 279 extern struct vfsmount *nfs_d_automount(struct path *path); 280 + #ifdef CONFIG_NFS_V4 281 + rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); 282 + #endif 280 283 281 284 /* getroot.c */ 282 285 extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, ··· 291 288 extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 292 289 #endif 293 290 291 + struct nfs_pageio_descriptor; 294 292 /* read.c */ 295 293 extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 296 294 const struct rpc_call_ops *call_ops); 297 295 extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 296 + extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 297 + struct list_head *head); 298 + 299 + extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); 300 + extern void nfs_readdata_release(struct nfs_read_data *rdata); 298 301 299 302 /* write.c */ 303 + extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, 304 + struct list_head *head); 305 + extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); 306 + extern void nfs_writedata_release(struct nfs_write_data *wdata); 300 307 extern void nfs_commit_free(struct nfs_write_data *p); 301 308 extern int nfs_initiate_write(struct nfs_write_data *data, 302 309 struct rpc_clnt *clnt,
+1 -1
fs/nfs/namespace.c
··· 119 119 } 120 120 121 121 #ifdef CONFIG_NFS_V4 122 - static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) 122 + rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) 123 123 { 124 124 struct gss_api_mech *mech; 125 125 struct xdr_netobj oid;
+5
fs/nfs/nfs4_fs.h
··· 48 48 NFS4CLNT_SESSION_RESET, 49 49 NFS4CLNT_RECALL_SLOT, 50 50 NFS4CLNT_LEASE_CONFIRM, 51 + NFS4CLNT_SERVER_SCOPE_MISMATCH, 51 52 }; 52 53 53 54 enum nfs4_session_state { ··· 67 66 int cache_reply); 68 67 int (*validate_stateid)(struct nfs_delegation *, 69 68 const nfs4_stateid *); 69 + int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, 70 + struct nfs_fsinfo *); 70 71 const struct nfs4_state_recovery_ops *reboot_recovery_ops; 71 72 const struct nfs4_state_recovery_ops *nograce_recovery_ops; 72 73 const struct nfs4_state_maintenance_ops *state_renewal_ops; ··· 352 349 extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); 353 350 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 354 351 extern void nfs41_handle_recall_slot(struct nfs_client *clp); 352 + extern void nfs41_handle_server_scope(struct nfs_client *, 353 + struct server_scope **); 355 354 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 356 355 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 357 356 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
+72 -8
fs/nfs/nfs4filelayout.c
··· 334 334 __func__, data->inode->i_ino, 335 335 data->args.pgbase, (size_t)data->args.count, offset); 336 336 337 + if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) 338 + return PNFS_NOT_ATTEMPTED; 339 + 337 340 /* Retrieve the correct rpc_client for the byte range */ 338 341 j = nfs4_fl_calc_j_index(lseg, offset); 339 342 idx = nfs4_fl_calc_ds_index(lseg, j); ··· 347 344 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 348 345 return PNFS_NOT_ATTEMPTED; 349 346 } 350 - dprintk("%s USE DS:ip %x %hu\n", __func__, 351 - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 347 + dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); 352 348 353 349 /* No multipath support. Use first DS */ 354 350 data->ds_clp = ds->ds_clp; ··· 376 374 struct nfs_fh *fh; 377 375 int status; 378 376 377 + if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) 378 + return PNFS_NOT_ATTEMPTED; 379 + 379 380 /* Retrieve the correct rpc_client for the byte range */ 380 381 j = nfs4_fl_calc_j_index(lseg, offset); 381 382 idx = nfs4_fl_calc_ds_index(lseg, j); ··· 389 384 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 390 385 return PNFS_NOT_ATTEMPTED; 391 386 } 392 - dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, 387 + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, 393 388 data->inode->i_ino, sync, (size_t) data->args.count, offset, 394 - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 389 + ds->ds_remotestr); 395 390 396 391 data->write_done_cb = filelayout_write_done_cb; 397 392 data->ds_clp = ds->ds_clp; ··· 433 428 434 429 dprintk("--> %s\n", __func__); 435 430 431 + /* FIXME: remove this check when layout segment support is added */ 432 + if (lgr->range.offset != 0 || 433 + lgr->range.length != NFS4_MAX_UINT64) { 434 + dprintk("%s Only whole file layouts supported. Use MDS i/o\n", 435 + __func__); 436 + goto out; 437 + } 438 + 436 439 if (fl->pattern_offset > lgr->range.offset) { 437 440 dprintk("%s pattern_offset %lld too large\n", 438 441 __func__, fl->pattern_offset); ··· 462 449 goto out; 463 450 } else 464 451 dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); 452 + /* Found deviceid is being reaped */ 453 + if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) 454 + goto out_put; 455 + 465 456 fl->dsaddr = dsaddr; 466 457 467 458 if (fl->first_stripe_index < 0 || ··· 676 659 * return true : coalesce page 677 660 * return false : don't coalesce page 678 661 */ 679 - bool 662 + static bool 680 663 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 681 664 struct nfs_page *req) 682 665 { ··· 687 670 !nfs_generic_pg_test(pgio, prev, req)) 688 671 return false; 689 672 690 - if (!pgio->pg_lseg) 691 - return 1; 692 673 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; 693 674 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; 694 675 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; ··· 696 681 697 682 return (p_stripe == r_stripe); 698 683 } 684 + 685 + void 686 + filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, 687 + struct nfs_page *req) 688 + { 689 + BUG_ON(pgio->pg_lseg != NULL); 690 + 691 + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 692 + req->wb_context, 693 + 0, 694 + NFS4_MAX_UINT64, 695 + IOMODE_READ, 696 + GFP_KERNEL); 697 + /* If no lseg, fall back to read through mds */ 698 + if (pgio->pg_lseg == NULL) 699 + nfs_pageio_reset_read_mds(pgio); 700 + } 701 + 702 + void 703 + filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, 704 + struct nfs_page *req) 705 + { 706 + BUG_ON(pgio->pg_lseg != NULL); 707 + 708 + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 709 + req->wb_context, 710 + 0, 711 + NFS4_MAX_UINT64, 712 + IOMODE_RW, 713 + GFP_NOFS); 714 + /* If no lseg, fall back to write through mds */ 715 + if (pgio->pg_lseg == NULL) 716 + nfs_pageio_reset_write_mds(pgio); 717 + } 718 + 719 + static const struct nfs_pageio_ops filelayout_pg_read_ops = { 720 + .pg_init = filelayout_pg_init_read, 721 + .pg_test = filelayout_pg_test, 722 + .pg_doio = pnfs_generic_pg_readpages, 723 + }; 724 + 725 + static const struct nfs_pageio_ops filelayout_pg_write_ops = { 726 + .pg_init = filelayout_pg_init_write, 727 + .pg_test = filelayout_pg_test, 728 + .pg_doio = pnfs_generic_pg_writepages, 729 + }; 699 730 700 731 static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) 701 732 { ··· 940 879 .owner = THIS_MODULE, 941 880 .alloc_lseg = filelayout_alloc_lseg, 942 881 .free_lseg = filelayout_free_lseg, 943 - .pg_test = filelayout_pg_test, 882 + .pg_read_ops = &filelayout_pg_read_ops, 883 + .pg_write_ops = &filelayout_pg_write_ops, 944 884 .mark_pnfs_commit = filelayout_mark_pnfs_commit, 945 885 .choose_commit_list = filelayout_choose_commit_list, 946 886 .commit_pagelist = filelayout_commit_pagelist, ··· 963 901 __func__); 964 902 pnfs_unregister_layoutdriver(&filelayout_type); 965 903 } 904 + 905 + MODULE_ALIAS("nfs-layouttype4-1"); 966 906 967 907 module_init(nfs4filelayout_init); 968 908 module_exit(nfs4filelayout_exit);
+15 -2
fs/nfs/nfs4filelayout.h
··· 47 47 }; 48 48 49 49 /* Individual ip address */ 50 + struct nfs4_pnfs_ds_addr { 51 + struct sockaddr_storage da_addr; 52 + size_t da_addrlen; 53 + struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ 54 + char *da_remotestr; /* human readable addr+port */ 55 + }; 56 + 50 57 struct nfs4_pnfs_ds { 51 58 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ 52 - u32 ds_ip_addr; 53 - u32 ds_port; 59 + char *ds_remotestr; /* comma sep list of addrs */ 60 + struct list_head ds_addrs; 54 61 struct nfs_client *ds_clp; 55 62 atomic_t ds_count; 56 63 }; ··· 94 87 return container_of(lseg, 95 88 struct nfs4_filelayout_segment, 96 89 generic_hdr); 90 + } 91 + 92 + static inline struct nfs4_deviceid_node * 93 + FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) 94 + { 95 + return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; 97 96 } 98 97 99 98 extern struct nfs_fh *
+338 -114
fs/nfs/nfs4filelayoutdev.c
··· 56 56 printk("%s NULL device\n", __func__); 57 57 return; 58 58 } 59 - printk(" ip_addr %x port %hu\n" 59 + printk(" ds %s\n" 60 60 " ref count %d\n" 61 61 " client %p\n" 62 62 " cl_exchange_flags %x\n", 63 - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), 63 + ds->ds_remotestr, 64 64 atomic_read(&ds->ds_count), ds->ds_clp, 65 65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); 66 66 } 67 67 68 - /* nfs4_ds_cache_lock is held */ 68 + static bool 69 + same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) 70 + { 71 + struct sockaddr_in *a, *b; 72 + struct sockaddr_in6 *a6, *b6; 73 + 74 + if (addr1->sa_family != addr2->sa_family) 75 + return false; 76 + 77 + switch (addr1->sa_family) { 78 + case AF_INET: 79 + a = (struct sockaddr_in *)addr1; 80 + b = (struct sockaddr_in *)addr2; 81 + 82 + if (a->sin_addr.s_addr == b->sin_addr.s_addr && 83 + a->sin_port == b->sin_port) 84 + return true; 85 + break; 86 + 87 + case AF_INET6: 88 + a6 = (struct sockaddr_in6 *)addr1; 89 + b6 = (struct sockaddr_in6 *)addr2; 90 + 91 + /* LINKLOCAL addresses must have matching scope_id */ 92 + if (ipv6_addr_scope(&a6->sin6_addr) == 93 + IPV6_ADDR_SCOPE_LINKLOCAL && 94 + a6->sin6_scope_id != b6->sin6_scope_id) 95 + return false; 96 + 97 + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && 98 + a6->sin6_port == b6->sin6_port) 99 + return true; 100 + break; 101 + 102 + default: 103 + dprintk("%s: unhandled address family: %u\n", 104 + __func__, addr1->sa_family); 105 + return false; 106 + } 107 + 108 + return false; 109 + } 110 + 111 + /* 112 + * Lookup DS by addresses. The first matching address returns true. 113 + * nfs4_ds_cache_lock is held 114 + */ 69 115 static struct nfs4_pnfs_ds * 70 - _data_server_lookup_locked(u32 ip_addr, u32 port) 116 + _data_server_lookup_locked(struct list_head *dsaddrs) 71 117 { 72 118 struct nfs4_pnfs_ds *ds; 119 + struct nfs4_pnfs_ds_addr *da1, *da2; 73 120 74 - dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", 75 - ntohl(ip_addr), ntohs(port)); 76 - 77 - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { 78 - if (ds->ds_ip_addr == ip_addr && 79 - ds->ds_port == port) { 80 - return ds; 121 + list_for_each_entry(da1, dsaddrs, da_node) { 122 + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { 123 + list_for_each_entry(da2, &ds->ds_addrs, da_node) { 124 + if (same_sockaddr( 125 + (struct sockaddr *)&da1->da_addr, 126 + (struct sockaddr *)&da2->da_addr)) 127 + return ds; 128 + } 81 129 } 82 130 } 83 131 return NULL; 84 132 } 85 133 86 134 /* 135 + * Compare two lists of addresses. 136 + */ 137 + static bool 138 + _data_server_match_all_addrs_locked(struct list_head *dsaddrs1, 139 + struct list_head *dsaddrs2) 140 + { 141 + struct nfs4_pnfs_ds_addr *da1, *da2; 142 + size_t count1 = 0, 143 + count2 = 0; 144 + 145 + list_for_each_entry(da1, dsaddrs1, da_node) 146 + count1++; 147 + 148 + list_for_each_entry(da2, dsaddrs2, da_node) { 149 + bool found = false; 150 + count2++; 151 + list_for_each_entry(da1, dsaddrs1, da_node) { 152 + if (same_sockaddr((struct sockaddr *)&da1->da_addr, 153 + (struct sockaddr *)&da2->da_addr)) { 154 + found = true; 155 + break; 156 + } 157 + } 158 + if (!found) 159 + return false; 160 + } 161 + 162 + return (count1 == count2); 163 + } 164 + 165 + /* 87 166 * Create an rpc connection to the nfs4_pnfs_ds data server 88 - * Currently only support IPv4 167 + * Currently only supports IPv4 and IPv6 addresses 89 168 */ 90 169 static int 91 170 nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) 92 171 { 93 - struct nfs_client *clp; 94 - struct sockaddr_in sin; 172 + struct nfs_client *clp = ERR_PTR(-EIO); 173 + struct nfs4_pnfs_ds_addr *da; 95 174 int status = 0; 96 175 97 - dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, 98 - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), 176 + dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, 99 177 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); 100 178 101 - sin.sin_family = AF_INET; 102 - sin.sin_addr.s_addr = ds->ds_ip_addr; 103 - sin.sin_port = ds->ds_port; 179 + BUG_ON(list_empty(&ds->ds_addrs)); 104 180 105 - clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin, 106 - sizeof(sin), IPPROTO_TCP); 181 + list_for_each_entry(da, &ds->ds_addrs, da_node) { 182 + dprintk("%s: DS %s: trying address %s\n", 183 + __func__, ds->ds_remotestr, da->da_remotestr); 184 + 185 + clp = nfs4_set_ds_client(mds_srv->nfs_client, 186 + (struct sockaddr *)&da->da_addr, 187 + da->da_addrlen, IPPROTO_TCP); 188 + if (!IS_ERR(clp)) 189 + break; 190 + } 191 + 107 192 if (IS_ERR(clp)) { 108 193 status = PTR_ERR(clp); 109 194 goto out; ··· 200 115 goto out_put; 201 116 } 202 117 ds->ds_clp = clp; 203 - dprintk("%s [existing] ip=%x, port=%hu\n", __func__, 204 - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 118 + dprintk("%s [existing] server=%s\n", __func__, 119 + ds->ds_remotestr); 205 120 goto out; 206 121 } 207 122 ··· 220 135 goto out_put; 221 136 222 137 ds->ds_clp = clp; 223 - dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), 224 - ntohs(ds->ds_port)); 138 + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); 225 139 out: 226 140 return status; 227 141 out_put: ··· 231 147 static void 232 148 destroy_ds(struct nfs4_pnfs_ds *ds) 233 149 { 150 + struct nfs4_pnfs_ds_addr *da; 151 + 234 152 dprintk("--> %s\n", __func__); 235 153 ifdebug(FACILITY) 236 154 print_ds(ds); 237 155 238 156 if (ds->ds_clp) 239 157 nfs_put_client(ds->ds_clp); 158 + 159 + while (!list_empty(&ds->ds_addrs)) { 160 + da = list_first_entry(&ds->ds_addrs, 161 + struct nfs4_pnfs_ds_addr, 162 + da_node); 163 + list_del_init(&da->da_node); 164 + kfree(da->da_remotestr); 165 + kfree(da); 166 + } 167 + 168 + kfree(ds->ds_remotestr); 240 169 kfree(ds); 241 170 } 242 171 ··· 276 179 kfree(dsaddr); 277 180 } 278 181 279 - static struct nfs4_pnfs_ds * 280 - nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) 182 + /* 183 + * Create a string with a human readable address and port to avoid 184 + * complicated setup around many dprinks. 185 + */ 186 + static char * 187 + nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) 281 188 { 282 - struct nfs4_pnfs_ds *tmp_ds, *ds; 189 + struct nfs4_pnfs_ds_addr *da; 190 + char *remotestr; 191 + size_t len; 192 + char *p; 283 193 284 - ds = kzalloc(sizeof(*tmp_ds), gfp_flags); 194 + len = 3; /* '{', '}' and eol */ 195 + list_for_each_entry(da, dsaddrs, da_node) { 196 + len += strlen(da->da_remotestr) + 1; /* string plus comma */ 197 + } 198 + 199 + remotestr = kzalloc(len, gfp_flags); 200 + if (!remotestr) 201 + return NULL; 202 + 203 + p = remotestr; 204 + *(p++) = '{'; 205 + len--; 206 + list_for_each_entry(da, dsaddrs, da_node) { 207 + size_t ll = strlen(da->da_remotestr); 208 + 209 + if (ll > len) 210 + goto out_err; 211 + 212 + memcpy(p, da->da_remotestr, ll); 213 + p += ll; 214 + len -= ll; 215 + 216 + if (len < 1) 217 + goto out_err; 218 + (*p++) = ','; 219 + len--; 220 + } 221 + if (len < 2) 222 + goto out_err; 223 + *(p++) = '}'; 224 + *p = '\0'; 225 + return remotestr; 226 + out_err: 227 + kfree(remotestr); 228 + return NULL; 229 + } 230 + 231 + static struct nfs4_pnfs_ds * 232 + nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) 233 + { 234 + struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; 235 + char *remotestr; 236 + 237 + if (list_empty(dsaddrs)) { 238 + dprintk("%s: no addresses defined\n", __func__); 239 + goto out; 240 + } 241 + 242 + ds = kzalloc(sizeof(*ds), gfp_flags); 285 243 if (!ds) 286 244 goto out; 287 245 246 + /* this is only used for debugging, so it's ok if its NULL */ 247 + remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); 248 + 288 249 spin_lock(&nfs4_ds_cache_lock); 289 - tmp_ds = _data_server_lookup_locked(ip_addr, port); 250 + tmp_ds = _data_server_lookup_locked(dsaddrs); 290 251 if (tmp_ds == NULL) { 291 - ds->ds_ip_addr = ip_addr; 292 - ds->ds_port = port; 252 + INIT_LIST_HEAD(&ds->ds_addrs); 253 + list_splice_init(dsaddrs, &ds->ds_addrs); 254 + ds->ds_remotestr = remotestr; 293 255 atomic_set(&ds->ds_count, 1); 294 256 INIT_LIST_HEAD(&ds->ds_node); 295 257 ds->ds_clp = NULL; 296 258 list_add(&ds->ds_node, &nfs4_data_server_cache); 297 - dprintk("%s add new data server ip 0x%x\n", __func__, 298 - ds->ds_ip_addr); 259 + dprintk("%s add new data server %s\n", __func__, 260 + ds->ds_remotestr); 299 261 } else { 262 + if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, 263 + dsaddrs)) { 264 + dprintk("%s: multipath address mismatch: %s != %s", 265 + __func__, tmp_ds->ds_remotestr, remotestr); 266 + } 267 + kfree(remotestr); 300 268 kfree(ds); 301 269 atomic_inc(&tmp_ds->ds_count); 302 - dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", 303 - __func__, tmp_ds->ds_ip_addr, 270 + dprintk("%s data server %s found, inc'ed ds_count to %d\n", 271 + __func__, tmp_ds->ds_remotestr, 304 272 atomic_read(&tmp_ds->ds_count)); 305 273 ds = tmp_ds; 306 274 } ··· 375 213 } 376 214 377 215 /* 378 - * Currently only support ipv4, and one multi-path address. 216 + * Currently only supports ipv4, ipv6 and one multi-path address. 379 217 */ 380 - static struct nfs4_pnfs_ds * 381 - decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) 218 + static struct nfs4_pnfs_ds_addr * 219 + decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) 382 220 { 383 - struct nfs4_pnfs_ds *ds = NULL; 384 - char *buf; 385 - const char *ipend, *pstr; 386 - u32 ip_addr, port; 387 - int nlen, rlen, i; 221 + struct nfs4_pnfs_ds_addr *da = NULL; 222 + char *buf, *portstr; 223 + u32 port; 224 + int nlen, rlen; 388 225 int tmp[2]; 389 226 __be32 *p; 227 + char *netid, *match_netid; 228 + size_t len, match_netid_len; 229 + char *startsep = ""; 230 + char *endsep = ""; 231 + 390 232 391 233 /* r_netid */ 392 234 p = xdr_inline_decode(streamp, 4); ··· 402 236 if (unlikely(!p)) 403 237 goto out_err; 404 238 405 - /* Check that netid is "tcp" */ 406 - if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { 407 - dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); 239 + netid = kmalloc(nlen+1, gfp_flags); 240 + if (unlikely(!netid)) 408 241 goto out_err; 409 - } 410 242 411 - /* r_addr */ 243 + netid[nlen] = '\0'; 244 + memcpy(netid, p, nlen); 245 + 246 + /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ 412 247 p = xdr_inline_decode(streamp, 4); 413 248 if (unlikely(!p)) 414 - goto out_err; 249 + goto out_free_netid; 415 250 rlen = be32_to_cpup(p); 416 251 417 252 p = xdr_inline_decode(streamp, rlen); 418 253 if (unlikely(!p)) 419 - goto out_err; 254 + goto out_free_netid; 420 255 421 - /* ipv6 length plus port is legal */ 422 - if (rlen > INET6_ADDRSTRLEN + 8) { 256 + /* port is ".ABC.DEF", 8 chars max */ 257 + if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { 423 258 dprintk("%s: Invalid address, length %d\n", __func__, 424 259 rlen); 425 - goto out_err; 260 + goto out_free_netid; 426 261 } 427 262 buf = kmalloc(rlen + 1, gfp_flags); 428 263 if (!buf) { 429 264 dprintk("%s: Not enough memory\n", __func__); 430 - goto out_err; 265 + goto out_free_netid; 431 266 } 432 267 buf[rlen] = '\0'; 433 268 memcpy(buf, p, rlen); 434 269 435 - /* replace the port dots with dashes for the in4_pton() delimiter*/ 436 - for (i = 0; i < 2; i++) { 437 - char *res = strrchr(buf, '.'); 438 - if (!res) { 439 - dprintk("%s: Failed finding expected dots in port\n", 440 - __func__); 441 - goto out_free; 442 - } 443 - *res = '-'; 270 + /* replace port '.' with '-' */ 271 + portstr = strrchr(buf, '.'); 272 + if (!portstr) { 273 + dprintk("%s: Failed finding expected dot in port\n", 274 + __func__); 275 + goto out_free_buf; 276 + } 277 + *portstr = '-'; 278 + 279 + /* find '.' between address and port */ 280 + portstr = strrchr(buf, '.'); 281 + if (!portstr) { 282 + dprintk("%s: Failed finding expected dot between address and " 283 + "port\n", __func__); 284 + goto out_free_buf; 285 + } 286 + *portstr = '\0'; 287 + 288 + da = kzalloc(sizeof(*da), gfp_flags); 289 + if (unlikely(!da)) 290 + goto out_free_buf; 291 + 292 + INIT_LIST_HEAD(&da->da_node); 293 + 294 + if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, 295 + sizeof(da->da_addr))) { 296 + dprintk("%s: error parsing address %s\n", __func__, buf); 297 + goto out_free_da; 444 298 } 445 299 446 - /* Currently only support ipv4 address */ 447 - if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { 448 - dprintk("%s: Only ipv4 addresses supported\n", __func__); 449 - goto out_free; 450 - } 451 - 452 - /* port */ 453 - pstr = ipend; 454 - sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); 300 + portstr++; 301 + sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); 455 302 port = htons((tmp[0] << 8) | (tmp[1])); 456 303 457 - ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); 458 - dprintk("%s: Decoded address and port %s\n", __func__, buf); 459 - out_free: 304 + switch (da->da_addr.ss_family) { 305 + case AF_INET: 306 + ((struct sockaddr_in *)&da->da_addr)->sin_port = port; 307 + da->da_addrlen = sizeof(struct sockaddr_in); 308 + match_netid = "tcp"; 309 + match_netid_len = 3; 310 + break; 311 + 312 + case AF_INET6: 313 + ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; 314 + da->da_addrlen = sizeof(struct sockaddr_in6); 315 + match_netid = "tcp6"; 316 + match_netid_len = 4; 317 + startsep = "["; 318 + endsep = "]"; 319 + break; 320 + 321 + default: 322 + dprintk("%s: unsupported address family: %u\n", 323 + __func__, da->da_addr.ss_family); 324 + goto out_free_da; 325 + } 326 + 327 + if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { 328 + dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", 329 + __func__, netid, match_netid); 330 + goto out_free_da; 331 + } 332 + 333 + /* save human readable address */ 334 + len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; 335 + da->da_remotestr = kzalloc(len, gfp_flags); 336 + 337 + /* NULL is ok, only used for dprintk */ 338 + if (da->da_remotestr) 339 + snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, 340 + buf, endsep, ntohs(port)); 341 + 342 + dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); 460 343 kfree(buf); 344 + kfree(netid); 345 + return da; 346 + 347 + out_free_da: 348 + kfree(da); 349 + out_free_buf: 350 + dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); 351 + kfree(buf); 352 + out_free_netid: 353 + kfree(netid); 461 354 out_err: 462 - return ds; 355 + return NULL; 463 356 } 464 357 465 358 /* Decode opaque device data and return the result */ ··· 535 310 struct xdr_stream stream; 536 311 struct xdr_buf buf; 537 312 struct page *scratch; 313 + struct list_head dsaddrs; 314 + struct nfs4_pnfs_ds_addr *da; 538 315 539 316 /* set up xdr stream */ 540 317 scratch = alloc_page(gfp_flags); ··· 613 386 NFS_SERVER(ino)->nfs_client, 614 387 &pdev->dev_id); 615 388 389 + INIT_LIST_HEAD(&dsaddrs); 390 + 616 391 for (i = 0; i < dsaddr->ds_num; i++) { 617 392 int j; 618 393 u32 mp_count; ··· 624 395 goto out_err_free_deviceid; 625 396 626 397 mp_count = be32_to_cpup(p); /* multipath count */ 627 - if (mp_count > 1) { 628 - printk(KERN_WARNING 629 - "%s: Multipath count %d not supported, " 630 - "skipping all greater than 1\n", __func__, 631 - mp_count); 632 - } 633 398 for (j = 0; j < mp_count; j++) { 634 - if (j == 0) { 635 - dsaddr->ds_list[i] = decode_and_add_ds(&stream, 636 - ino, gfp_flags); 637 - if (dsaddr->ds_list[i] == NULL) 638 - goto out_err_free_deviceid; 639 - } else { 640 - u32 len; 641 - /* skip extra multipath */ 399 + da = decode_ds_addr(&stream, gfp_flags); 400 + if (da) 401 + list_add_tail(&da->da_node, &dsaddrs); 402 + } 403 + if (list_empty(&dsaddrs)) { 404 + dprintk("%s: no suitable DS addresses found\n", 405 + __func__); 406 + goto out_err_free_deviceid; 407 + } 642 408 643 - /* read len, skip */ 644 - p = xdr_inline_decode(&stream, 4); 645 - if (unlikely(!p)) 646 - goto out_err_free_deviceid; 647 - len = be32_to_cpup(p); 409 + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); 410 + if (!dsaddr->ds_list[i]) 411 + goto out_err_drain_dsaddrs; 648 412 649 - p = xdr_inline_decode(&stream, len); 650 - if (unlikely(!p)) 651 - goto out_err_free_deviceid; 652 - 653 - /* read len, skip */ 654 - p = xdr_inline_decode(&stream, 4); 655 - if (unlikely(!p)) 656 - goto out_err_free_deviceid; 657 - len = be32_to_cpup(p); 658 - 659 - p = xdr_inline_decode(&stream, len); 660 - if (unlikely(!p)) 661 - goto out_err_free_deviceid; 662 - } 413 + /* If DS was already in cache, free ds addrs */ 414 + while (!list_empty(&dsaddrs)) { 415 + da = list_first_entry(&dsaddrs, 416 + struct nfs4_pnfs_ds_addr, 417 + da_node); 418 + list_del_init(&da->da_node); 419 + kfree(da->da_remotestr); 420 + kfree(da); 663 421 } 664 422 } 665 423 666 424 __free_page(scratch); 667 425 return dsaddr; 668 426 427 + out_err_drain_dsaddrs: 428 + while (!list_empty(&dsaddrs)) { 429 + da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, 430 + da_node); 431 + list_del_init(&da->da_node); 432 + kfree(da->da_remotestr); 433 + kfree(da); 434 + } 669 435 out_err_free_deviceid: 670 436 nfs4_fl_free_deviceid(dsaddr); 671 437 /* stripe_indicies was part of dsaddr */ ··· 815 591 816 592 static void 817 593 filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, 818 - int err, u32 ds_addr) 594 + int err, const char *ds_remotestr) 819 595 { 820 596 u32 *p = (u32 *)&dsaddr->id_node.deviceid; 821 597 822 - printk(KERN_ERR "NFS: data server %x connection error %d." 598 + printk(KERN_ERR "NFS: data server %s connection error %d." 823 599 " Deviceid [%x%x%x%x] marked out of use.\n", 824 - ds_addr, err, p[0], p[1], p[2], p[3]); 600 + ds_remotestr, err, p[0], p[1], p[2], p[3]); 825 601 826 602 spin_lock(&nfs4_ds_cache_lock); 827 603 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; ··· 852 628 err = nfs4_ds_connect(s, ds); 853 629 if (err) { 854 630 filelayout_mark_devid_negative(dsaddr, err, 855 - ntohl(ds->ds_ip_addr)); 631 + ds->ds_remotestr); 856 632 return NULL; 857 633 } 858 634 }
+209 -6
fs/nfs/nfs4proc.c
··· 80 80 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 81 81 struct nfs_fattr *fattr, struct iattr *sattr, 82 82 struct nfs4_state *state); 83 - 83 + #ifdef CONFIG_NFS_V4_1 84 + static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); 85 + static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); 86 + #endif 84 87 /* Prevent leaks of NFSv4 errors into userland */ 85 88 static int nfs4_map_errors(int err) 86 89 { ··· 1692 1689 return ret; 1693 1690 } 1694 1691 1692 + #if defined(CONFIG_NFS_V4_1) 1693 + static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) 1694 + { 1695 + int status; 1696 + struct nfs_server *server = NFS_SERVER(state->inode); 1697 + 1698 + status = nfs41_test_stateid(server, state); 1699 + if (status == NFS_OK) 1700 + return 0; 1701 + nfs41_free_stateid(server, state); 1702 + return nfs4_open_expired(sp, state); 1703 + } 1704 + #endif 1705 + 1695 1706 /* 1696 1707 * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* 1697 1708 * fields corresponding to attributes that were used to store the verifier. ··· 2269 2252 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2270 2253 struct nfs_fsinfo *info) 2271 2254 { 2255 + int minor_version = server->nfs_client->cl_minorversion; 2272 2256 int status = nfs4_lookup_root(server, fhandle, info); 2273 2257 if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) 2274 2258 /* 2275 2259 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM 2276 2260 * by nfs4_map_errors() as this function exits. 2277 2261 */ 2278 - status = nfs4_find_root_sec(server, fhandle, info); 2262 + status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info); 2279 2263 if (status == 0) 2280 2264 status = nfs4_server_capabilities(server, fhandle); 2281 2265 if (status == 0) ··· 4459 4441 return err; 4460 4442 } 4461 4443 4444 + #if defined(CONFIG_NFS_V4_1) 4445 + static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) 4446 + { 4447 + int status; 4448 + struct nfs_server *server = NFS_SERVER(state->inode); 4449 + 4450 + status = nfs41_test_stateid(server, state); 4451 + if (status == NFS_OK) 4452 + return 0; 4453 + nfs41_free_stateid(server, state); 4454 + return nfs4_lock_expired(state, request); 4455 + } 4456 + #endif 4457 + 4462 4458 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4463 4459 { 4464 4460 struct nfs_inode *nfsi = NFS_I(state->inode); ··· 4811 4779 return -NFS4ERR_INVAL; 4812 4780 } 4813 4781 4782 + static bool 4783 + nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) 4784 + { 4785 + if (a->server_scope_sz == b->server_scope_sz && 4786 + memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) 4787 + return true; 4788 + 4789 + return false; 4790 + } 4791 + 4814 4792 /* 4815 4793 * nfs4_proc_exchange_id() 4816 4794 * ··· 4863 4821 init_utsname()->domainname, 4864 4822 clp->cl_rpcclient->cl_auth->au_flavor); 4865 4823 4824 + res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); 4825 + if (unlikely(!res.server_scope)) 4826 + return -ENOMEM; 4827 + 4866 4828 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 4867 4829 if (!status) 4868 4830 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 4831 + 4832 + if (!status) { 4833 + if (clp->server_scope && 4834 + !nfs41_same_server_scope(clp->server_scope, 4835 + res.server_scope)) { 4836 + dprintk("%s: server_scope mismatch detected\n", 4837 + __func__); 4838 + set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); 4839 + kfree(clp->server_scope); 4840 + clp->server_scope = NULL; 4841 + } 4842 + 4843 + if (!clp->server_scope) 4844 + clp->server_scope = res.server_scope; 4845 + else 4846 + kfree(res.server_scope); 4847 + } 4848 + 4869 4849 dprintk("<-- %s status= %d\n", __func__, status); 4870 4850 return status; 4871 4851 } ··· 5768 5704 { 5769 5705 struct nfs4_layoutreturn *lrp = calldata; 5770 5706 struct nfs_server *server; 5771 - struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; 5707 + struct pnfs_layout_hdr *lo = lrp->args.layout; 5772 5708 5773 5709 dprintk("--> %s\n", __func__); 5774 5710 ··· 5797 5733 struct nfs4_layoutreturn *lrp = calldata; 5798 5734 5799 5735 dprintk("--> %s\n", __func__); 5800 - put_layout_hdr(NFS_I(lrp->args.inode)->layout); 5736 + put_layout_hdr(lrp->args.layout); 5801 5737 kfree(calldata); 5802 5738 dprintk("<-- %s\n", __func__); 5803 5739 } ··· 5965 5901 rpc_put_task(task); 5966 5902 return status; 5967 5903 } 5904 + 5905 + static int 5906 + _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, 5907 + struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) 5908 + { 5909 + struct nfs41_secinfo_no_name_args args = { 5910 + .style = SECINFO_STYLE_CURRENT_FH, 5911 + }; 5912 + struct nfs4_secinfo_res res = { 5913 + .flavors = flavors, 5914 + }; 5915 + struct rpc_message msg = { 5916 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME], 5917 + .rpc_argp = &args, 5918 + .rpc_resp = &res, 5919 + }; 5920 + return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); 5921 + } 5922 + 5923 + static int 5924 + nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, 5925 + struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) 5926 + { 5927 + struct nfs4_exception exception = { }; 5928 + int err; 5929 + do { 5930 + err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); 5931 + switch (err) { 5932 + case 0: 5933 + case -NFS4ERR_WRONGSEC: 5934 + case -NFS4ERR_NOTSUPP: 5935 + break; 5936 + default: 5937 + err = nfs4_handle_exception(server, err, &exception); 5938 + } 5939 + } while (exception.retry); 5940 + return err; 5941 + } 5942 + 5943 + static int 5944 + nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, 5945 + struct nfs_fsinfo *info) 5946 + { 5947 + int err; 5948 + struct page *page; 5949 + rpc_authflavor_t flavor; 5950 + struct nfs4_secinfo_flavors *flavors; 5951 + 5952 + page = alloc_page(GFP_KERNEL); 5953 + if (!page) { 5954 + err = -ENOMEM; 5955 + goto out; 5956 + } 5957 + 5958 + flavors = page_address(page); 5959 + err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); 5960 + 5961 + /* 5962 + * Fall back on "guess and check" method if 5963 + * the server doesn't support SECINFO_NO_NAME 5964 + */ 5965 + if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { 5966 + err = nfs4_find_root_sec(server, fhandle, info); 5967 + goto out_freepage; 5968 + } 5969 + if (err) 5970 + goto out_freepage; 5971 + 5972 + flavor = nfs_find_best_sec(flavors); 5973 + if (err == 0) 5974 + err = nfs4_lookup_root_sec(server, fhandle, info, flavor); 5975 + 5976 + out_freepage: 5977 + put_page(page); 5978 + if (err == -EACCES) 5979 + return -EPERM; 5980 + out: 5981 + return err; 5982 + } 5983 + static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) 5984 + { 5985 + int status; 5986 + struct nfs41_test_stateid_args args = { 5987 + .stateid = &state->stateid, 5988 + }; 5989 + struct nfs41_test_stateid_res res; 5990 + struct rpc_message msg = { 5991 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], 5992 + .rpc_argp = &args, 5993 + .rpc_resp = &res, 5994 + }; 5995 + args.seq_args.sa_session = res.seq_res.sr_session = NULL; 5996 + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); 5997 + return status; 5998 + } 5999 + 6000 + static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) 6001 + { 6002 + struct nfs4_exception exception = { }; 6003 + int err; 6004 + do { 6005 + err = nfs4_handle_exception(server, 6006 + _nfs41_test_stateid(server, state), 6007 + &exception); 6008 + } while (exception.retry); 6009 + return err; 6010 + } 6011 + 6012 + static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) 6013 + { 6014 + int status; 6015 + struct nfs41_free_stateid_args args = { 6016 + .stateid = &state->stateid, 6017 + }; 6018 + struct nfs41_free_stateid_res res; 6019 + struct rpc_message msg = { 6020 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], 6021 + .rpc_argp = &args, 6022 + .rpc_resp = &res, 6023 + }; 6024 + 6025 + args.seq_args.sa_session = res.seq_res.sr_session = NULL; 6026 + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); 6027 + return status; 6028 + } 6029 + 6030 + static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) 6031 + { 6032 + struct nfs4_exception exception = { }; 6033 + int err; 6034 + do { 6035 + err = nfs4_handle_exception(server, 6036 + _nfs4_free_stateid(server, state), 6037 + &exception); 6038 + } while (exception.retry); 6039 + return err; 6040 + } 5968 6041 #endif /* CONFIG_NFS_V4_1 */ 5969 6042 5970 6043 struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { ··· 6138 5937 struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { 6139 5938 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, 6140 5939 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, 6141 - .recover_open = nfs4_open_expired, 6142 - .recover_lock = nfs4_lock_expired, 5940 + .recover_open = nfs41_open_expired, 5941 + .recover_lock = nfs41_lock_expired, 6143 5942 .establish_clid = nfs41_init_clientid, 6144 5943 .get_clid_cred = nfs4_get_exchange_id_cred, 6145 5944 }; ··· 6163 5962 .minor_version = 0, 6164 5963 .call_sync = _nfs4_call_sync, 6165 5964 .validate_stateid = nfs4_validate_delegation_stateid, 5965 + .find_root_sec = nfs4_find_root_sec, 6166 5966 .reboot_recovery_ops = &nfs40_reboot_recovery_ops, 6167 5967 .nograce_recovery_ops = &nfs40_nograce_recovery_ops, 6168 5968 .state_renewal_ops = &nfs40_state_renewal_ops, ··· 6174 5972 .minor_version = 1, 6175 5973 .call_sync = _nfs4_call_sync_session, 6176 5974 .validate_stateid = nfs41_validate_delegation_stateid, 5975 + .find_root_sec = nfs41_find_root_sec, 6177 5976 .reboot_recovery_ops = &nfs41_reboot_recovery_ops, 6178 5977 .nograce_recovery_ops = &nfs41_nograce_recovery_ops, 6179 5978 .state_renewal_ops = &nfs41_state_renewal_ops,
+8 -1
fs/nfs/nfs4state.c
··· 1643 1643 goto out_error; 1644 1644 } 1645 1645 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1646 - set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); 1646 + 1647 + if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, 1648 + &clp->cl_state)) 1649 + nfs4_state_start_reclaim_nograce(clp); 1650 + else 1651 + set_bit(NFS4CLNT_RECLAIM_REBOOT, 1652 + &clp->cl_state); 1653 + 1647 1654 pnfs_destroy_all_layouts(clp); 1648 1655 } 1649 1656
+246 -1
fs/nfs/nfs4xdr.c
··· 343 343 1 /* FIXME: opaque lrf_body always empty at the moment */) 344 344 #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ 345 345 1 + decode_stateid_maxsz) 346 + #define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1) 347 + #define decode_secinfo_no_name_maxsz decode_secinfo_maxsz 348 + #define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \ 349 + XDR_QUADLEN(NFS4_STATEID_SIZE)) 350 + #define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) 351 + #define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ 352 + XDR_QUADLEN(NFS4_STATEID_SIZE)) 353 + #define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) 346 354 #else /* CONFIG_NFS_V4_1 */ 347 355 #define encode_sequence_maxsz 0 348 356 #define decode_sequence_maxsz 0 ··· 780 772 decode_sequence_maxsz + \ 781 773 decode_putfh_maxsz + \ 782 774 decode_layoutreturn_maxsz) 775 + #define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \ 776 + encode_sequence_maxsz + \ 777 + encode_putrootfh_maxsz +\ 778 + encode_secinfo_no_name_maxsz) 779 + #define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \ 780 + decode_sequence_maxsz + \ 781 + decode_putrootfh_maxsz + \ 782 + decode_secinfo_no_name_maxsz) 783 + #define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \ 784 + encode_sequence_maxsz + \ 785 + encode_test_stateid_maxsz) 786 + #define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \ 787 + decode_sequence_maxsz + \ 788 + decode_test_stateid_maxsz) 789 + #define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \ 790 + encode_sequence_maxsz + \ 791 + encode_free_stateid_maxsz) 792 + #define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \ 793 + decode_sequence_maxsz + \ 794 + decode_free_stateid_maxsz) 783 795 784 796 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 785 797 compound_encode_hdr_maxsz + ··· 1966 1938 hdr->nops++; 1967 1939 hdr->replen += decode_layoutreturn_maxsz; 1968 1940 } 1941 + 1942 + static int 1943 + encode_secinfo_no_name(struct xdr_stream *xdr, 1944 + const struct nfs41_secinfo_no_name_args *args, 1945 + struct compound_hdr *hdr) 1946 + { 1947 + __be32 *p; 1948 + p = reserve_space(xdr, 8); 1949 + *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); 1950 + *p++ = cpu_to_be32(args->style); 1951 + hdr->nops++; 1952 + hdr->replen += decode_secinfo_no_name_maxsz; 1953 + return 0; 1954 + } 1955 + 1956 + static void encode_test_stateid(struct xdr_stream *xdr, 1957 + struct nfs41_test_stateid_args *args, 1958 + struct compound_hdr *hdr) 1959 + { 1960 + __be32 *p; 1961 + 1962 + p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); 1963 + *p++ = cpu_to_be32(OP_TEST_STATEID); 1964 + *p++ = cpu_to_be32(1); 1965 + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); 1966 + hdr->nops++; 1967 + hdr->replen += decode_test_stateid_maxsz; 1968 + } 1969 + 1970 + static void encode_free_stateid(struct xdr_stream *xdr, 1971 + struct nfs41_free_stateid_args *args, 1972 + struct compound_hdr *hdr) 1973 + { 1974 + __be32 *p; 1975 + p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); 1976 + *p++ = cpu_to_be32(OP_FREE_STATEID); 1977 + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); 1978 + hdr->nops++; 1979 + hdr->replen += decode_free_stateid_maxsz; 1980 + } 1969 1981 #endif /* CONFIG_NFS_V4_1 */ 1970 1982 1971 1983 /* ··· 2856 2788 encode_sequence(xdr, &args->seq_args, &hdr); 2857 2789 encode_putfh(xdr, NFS_FH(args->inode), &hdr); 2858 2790 encode_layoutreturn(xdr, args, &hdr); 2791 + encode_nops(&hdr); 2792 + } 2793 + 2794 + /* 2795 + * Encode SECINFO_NO_NAME request 2796 + */ 2797 + static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req, 2798 + struct xdr_stream *xdr, 2799 + struct nfs41_secinfo_no_name_args *args) 2800 + { 2801 + struct compound_hdr hdr = { 2802 + .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2803 + }; 2804 + 2805 + encode_compound_hdr(xdr, req, &hdr); 2806 + encode_sequence(xdr, &args->seq_args, &hdr); 2807 + encode_putrootfh(xdr, &hdr); 2808 + encode_secinfo_no_name(xdr, args, &hdr); 2809 + encode_nops(&hdr); 2810 + return 0; 2811 + } 2812 + 2813 + /* 2814 + * Encode TEST_STATEID request 2815 + */ 2816 + static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req, 2817 + struct xdr_stream *xdr, 2818 + struct nfs41_test_stateid_args *args) 2819 + { 2820 + struct compound_hdr hdr = { 2821 + .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2822 + }; 2823 + 2824 + encode_compound_hdr(xdr, req, &hdr); 2825 + encode_sequence(xdr, &args->seq_args, &hdr); 2826 + encode_test_stateid(xdr, args, &hdr); 2827 + encode_nops(&hdr); 2828 + } 2829 + 2830 + /* 2831 + * Encode FREE_STATEID request 2832 + */ 2833 + static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req, 2834 + struct xdr_stream *xdr, 2835 + struct nfs41_free_stateid_args *args) 2836 + { 2837 + struct compound_hdr hdr = { 2838 + .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2839 + }; 2840 + 2841 + encode_compound_hdr(xdr, req, &hdr); 2842 + encode_sequence(xdr, &args->seq_args, &hdr); 2843 + encode_free_stateid(xdr, args, &hdr); 2859 2844 encode_nops(&hdr); 2860 2845 } 2861 2846 #endif /* CONFIG_NFS_V4_1 */ ··· 5098 4977 if (unlikely(status)) 5099 4978 return status; 5100 4979 5101 - /* Throw away server_scope */ 4980 + /* Save server_scope */ 5102 4981 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5103 4982 if (unlikely(status)) 5104 4983 return status; 4984 + 4985 + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) 4986 + return -EIO; 4987 + 4988 + memcpy(res->server_scope->server_scope, dummy_str, dummy); 4989 + res->server_scope->server_scope_sz = dummy; 5105 4990 5106 4991 /* Throw away Implementation id array */ 5107 4992 status = decode_opaque_inline(xdr, &dummy, &dummy_str); ··· 5445 5318 goto out_overflow; 5446 5319 } 5447 5320 return 0; 5321 + out_overflow: 5322 + print_overflow_msg(__func__, xdr); 5323 + return -EIO; 5324 + } 5325 + 5326 + static int decode_test_stateid(struct xdr_stream *xdr, 5327 + struct nfs41_test_stateid_res *res) 5328 + { 5329 + __be32 *p; 5330 + int status; 5331 + int num_res; 5332 + 5333 + status = decode_op_hdr(xdr, OP_TEST_STATEID); 5334 + if (status) 5335 + return status; 5336 + 5337 + p = xdr_inline_decode(xdr, 4); 5338 + if (unlikely(!p)) 5339 + goto out_overflow; 5340 + num_res = be32_to_cpup(p++); 5341 + if (num_res != 1) 5342 + goto out; 5343 + 5344 + p = xdr_inline_decode(xdr, 4); 5345 + if (unlikely(!p)) 5346 + goto out_overflow; 5347 + res->status = be32_to_cpup(p++); 5348 + return res->status; 5349 + out_overflow: 5350 + print_overflow_msg(__func__, xdr); 5351 + out: 5352 + return -EIO; 5353 + } 5354 + 5355 + static int decode_free_stateid(struct xdr_stream *xdr, 5356 + struct nfs41_free_stateid_res *res) 5357 + { 5358 + __be32 *p; 5359 + int status; 5360 + 5361 + status = decode_op_hdr(xdr, OP_FREE_STATEID); 5362 + if (status) 5363 + return status; 5364 + 5365 + p = xdr_inline_decode(xdr, 4); 5366 + if (unlikely(!p)) 5367 + goto out_overflow; 5368 + res->status = be32_to_cpup(p++); 5369 + return res->status; 5448 5370 out_overflow: 5449 5371 print_overflow_msg(__func__, xdr); 5450 5372 return -EIO; ··· 6637 6461 out: 6638 6462 return status; 6639 6463 } 6464 + 6465 + /* 6466 + * Decode SECINFO_NO_NAME response 6467 + */ 6468 + static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp, 6469 + struct xdr_stream *xdr, 6470 + struct nfs4_secinfo_res *res) 6471 + { 6472 + struct compound_hdr hdr; 6473 + int status; 6474 + 6475 + status = decode_compound_hdr(xdr, &hdr); 6476 + if (status) 6477 + goto out; 6478 + status = decode_sequence(xdr, &res->seq_res, rqstp); 6479 + if (status) 6480 + goto out; 6481 + status = decode_putrootfh(xdr); 6482 + if (status) 6483 + goto out; 6484 + status = decode_secinfo(xdr, res); 6485 + out: 6486 + return status; 6487 + } 6488 + 6489 + /* 6490 + * Decode TEST_STATEID response 6491 + */ 6492 + static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp, 6493 + struct xdr_stream *xdr, 6494 + struct nfs41_test_stateid_res *res) 6495 + { 6496 + struct compound_hdr hdr; 6497 + int status; 6498 + 6499 + status = decode_compound_hdr(xdr, &hdr); 6500 + if (status) 6501 + goto out; 6502 + status = decode_sequence(xdr, &res->seq_res, rqstp); 6503 + if (status) 6504 + goto out; 6505 + status = decode_test_stateid(xdr, res); 6506 + out: 6507 + return status; 6508 + } 6509 + 6510 + /* 6511 + * Decode FREE_STATEID response 6512 + */ 6513 + static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp, 6514 + struct xdr_stream *xdr, 6515 + struct nfs41_free_stateid_res *res) 6516 + { 6517 + struct compound_hdr hdr; 6518 + int status; 6519 + 6520 + status = decode_compound_hdr(xdr, &hdr); 6521 + if (status) 6522 + goto out; 6523 + status = decode_sequence(xdr, &res->seq_res, rqstp); 6524 + if (status) 6525 + goto out; 6526 + status = decode_free_stateid(xdr, res); 6527 + out: 6528 + return status; 6529 + } 6640 6530 #endif /* CONFIG_NFS_V4_1 */ 6641 6531 6642 6532 /** ··· 6905 6663 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6906 6664 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), 6907 6665 PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), 6666 + PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), 6667 + PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), 6668 + PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), 6908 6669 #endif /* CONFIG_NFS_V4_1 */ 6909 6670 }; 6910 6671
+16 -4
fs/nfs/objlayout/objio_osd.c
··· 1000 1000 if (!pnfs_generic_pg_test(pgio, prev, req)) 1001 1001 return false; 1002 1002 1003 - if (pgio->pg_lseg == NULL) 1004 - return true; 1005 - 1006 1003 return pgio->pg_count + req->wb_bytes <= 1007 1004 OBJIO_LSEG(pgio->pg_lseg)->max_io_size; 1008 1005 } 1006 + 1007 + static const struct nfs_pageio_ops objio_pg_read_ops = { 1008 + .pg_init = pnfs_generic_pg_init_read, 1009 + .pg_test = objio_pg_test, 1010 + .pg_doio = pnfs_generic_pg_readpages, 1011 + }; 1012 + 1013 + static const struct nfs_pageio_ops objio_pg_write_ops = { 1014 + .pg_init = pnfs_generic_pg_init_write, 1015 + .pg_test = objio_pg_test, 1016 + .pg_doio = pnfs_generic_pg_writepages, 1017 + }; 1009 1018 1010 1019 static struct pnfs_layoutdriver_type objlayout_type = { 1011 1020 .id = LAYOUT_OSD2_OBJECTS, ··· 1029 1020 1030 1021 .read_pagelist = objlayout_read_pagelist, 1031 1022 .write_pagelist = objlayout_write_pagelist, 1032 - .pg_test = objio_pg_test, 1023 + .pg_read_ops = &objio_pg_read_ops, 1024 + .pg_write_ops = &objio_pg_write_ops, 1033 1025 1034 1026 .free_deviceid_node = objio_free_deviceid_node, 1035 1027 ··· 1064 1054 printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", 1065 1055 __func__); 1066 1056 } 1057 + 1058 + MODULE_ALIAS("nfs-layouttype4-2"); 1067 1059 1068 1060 module_init(objlayout_init); 1069 1061 module_exit(objlayout_exit);
+60 -9
fs/nfs/pagelist.c
··· 230 230 */ 231 231 void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 232 232 struct inode *inode, 233 - int (*doio)(struct nfs_pageio_descriptor *), 233 + const struct nfs_pageio_ops *pg_ops, 234 234 size_t bsize, 235 235 int io_flags) 236 236 { ··· 240 240 desc->pg_bsize = bsize; 241 241 desc->pg_base = 0; 242 242 desc->pg_moreio = 0; 243 + desc->pg_recoalesce = 0; 243 244 desc->pg_inode = inode; 244 - desc->pg_doio = doio; 245 + desc->pg_ops = pg_ops; 245 246 desc->pg_ioflags = io_flags; 246 247 desc->pg_error = 0; 247 248 desc->pg_lseg = NULL; 248 - desc->pg_test = nfs_generic_pg_test; 249 - pnfs_pageio_init(desc, inode); 250 249 } 251 250 252 251 /** ··· 275 276 return false; 276 277 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 277 278 return false; 278 - return pgio->pg_test(pgio, prev, req); 279 + return pgio->pg_ops->pg_test(pgio, prev, req); 279 280 } 280 281 281 282 /** ··· 296 297 if (!nfs_can_coalesce_requests(prev, req, desc)) 297 298 return 0; 298 299 } else { 300 + if (desc->pg_ops->pg_init) 301 + desc->pg_ops->pg_init(desc, req); 299 302 desc->pg_base = req->wb_pgbase; 300 303 } 301 304 nfs_list_remove_request(req); ··· 312 311 static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 313 312 { 314 313 if (!list_empty(&desc->pg_list)) { 315 - int error = desc->pg_doio(desc); 314 + int error = desc->pg_ops->pg_doio(desc); 316 315 if (error < 0) 317 316 desc->pg_error = error; 318 317 else ··· 332 331 * Returns true if the request 'req' was successfully coalesced into the 333 332 * existing list of pages 'desc'. 334 333 */ 335 - int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 334 + static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 336 335 struct nfs_page *req) 337 336 { 338 337 while (!nfs_pageio_do_add_request(desc, req)) { ··· 341 340 if (desc->pg_error < 0) 342 341 return 0; 343 342 desc->pg_moreio = 0; 343 + if (desc->pg_recoalesce) 344 + return 0; 344 345 } 345 346 return 1; 347 + } 348 + 349 + static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) 350 + { 351 + LIST_HEAD(head); 352 + 353 + do { 354 + list_splice_init(&desc->pg_list, &head); 355 + desc->pg_bytes_written -= desc->pg_count; 356 + desc->pg_count = 0; 357 + desc->pg_base = 0; 358 + desc->pg_recoalesce = 0; 359 + 360 + while (!list_empty(&head)) { 361 + struct nfs_page *req; 362 + 363 + req = list_first_entry(&head, struct nfs_page, wb_list); 364 + nfs_list_remove_request(req); 365 + if (__nfs_pageio_add_request(desc, req)) 366 + continue; 367 + if (desc->pg_error < 0) 368 + return 0; 369 + break; 370 + } 371 + } while (desc->pg_recoalesce); 372 + return 1; 373 + } 374 + 375 + int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 376 + struct nfs_page *req) 377 + { 378 + int ret; 379 + 380 + do { 381 + ret = __nfs_pageio_add_request(desc, req); 382 + if (ret) 383 + break; 384 + if (desc->pg_error < 0) 385 + break; 386 + ret = nfs_do_recoalesce(desc); 387 + } while (ret); 388 + return ret; 346 389 } 347 390 348 391 /** ··· 395 350 */ 396 351 void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) 397 352 { 398 - nfs_pageio_doio(desc); 353 + for (;;) { 354 + nfs_pageio_doio(desc); 355 + if (!desc->pg_recoalesce) 356 + break; 357 + if (!nfs_do_recoalesce(desc)) 358 + break; 359 + } 399 360 } 400 361 401 362 /** ··· 420 369 if (!list_empty(&desc->pg_list)) { 421 370 struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); 422 371 if (index != prev->wb_index + 1) 423 - nfs_pageio_doio(desc); 372 + nfs_pageio_complete(desc); 424 373 } 425 374 } 426 375
+188 -33
fs/nfs/pnfs.c
··· 28 28 */ 29 29 30 30 #include <linux/nfs_fs.h> 31 + #include <linux/nfs_page.h> 31 32 #include "internal.h" 32 33 #include "pnfs.h" 33 34 #include "iostat.h" ··· 449 448 void 450 449 pnfs_destroy_all_layouts(struct nfs_client *clp) 451 450 { 451 + struct nfs_server *server; 452 452 struct pnfs_layout_hdr *lo; 453 453 LIST_HEAD(tmp_list); 454 454 455 + nfs4_deviceid_mark_client_invalid(clp); 456 + nfs4_deviceid_purge_client(clp); 457 + 455 458 spin_lock(&clp->cl_lock); 456 - list_splice_init(&clp->cl_layouts, &tmp_list); 459 + rcu_read_lock(); 460 + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 461 + if (!list_empty(&server->layouts)) 462 + list_splice_init(&server->layouts, &tmp_list); 463 + } 464 + rcu_read_unlock(); 457 465 spin_unlock(&clp->cl_lock); 458 466 459 467 while (!list_empty(&tmp_list)) { ··· 671 661 lrp->args.stateid = stateid; 672 662 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 673 663 lrp->args.inode = ino; 664 + lrp->args.layout = lo; 674 665 lrp->clp = NFS_SERVER(ino)->nfs_client; 675 666 676 667 status = nfs4_proc_layoutreturn(lrp); ··· 931 920 }; 932 921 unsigned pg_offset; 933 922 struct nfs_inode *nfsi = NFS_I(ino); 934 - struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 923 + struct nfs_server *server = NFS_SERVER(ino); 924 + struct nfs_client *clp = server->nfs_client; 935 925 struct pnfs_layout_hdr *lo; 936 926 struct pnfs_layout_segment *lseg = NULL; 937 927 bool first = false; ··· 976 964 */ 977 965 spin_lock(&clp->cl_lock); 978 966 BUG_ON(!list_empty(&lo->plh_layouts)); 979 - list_add_tail(&lo->plh_layouts, &clp->cl_layouts); 967 + list_add_tail(&lo->plh_layouts, &server->layouts); 980 968 spin_unlock(&clp->cl_lock); 981 969 } 982 970 ··· 985 973 arg.offset -= pg_offset; 986 974 arg.length += pg_offset; 987 975 } 988 - arg.length = PAGE_CACHE_ALIGN(arg.length); 976 + if (arg.length != NFS4_MAX_UINT64) 977 + arg.length = PAGE_CACHE_ALIGN(arg.length); 989 978 990 979 lseg = send_layoutget(lo, ctx, &arg, gfp_flags); 991 980 if (!lseg && first) { ··· 1004 991 spin_unlock(&ino->i_lock); 1005 992 goto out; 1006 993 } 994 + EXPORT_SYMBOL_GPL(pnfs_update_layout); 1007 995 1008 996 int 1009 997 pnfs_layout_process(struct nfs4_layoutget *lgp) ··· 1062 1048 goto out; 1063 1049 } 1064 1050 1051 + void 1052 + pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1053 + { 1054 + BUG_ON(pgio->pg_lseg != NULL); 1055 + 1056 + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1057 + req->wb_context, 1058 + req_offset(req), 1059 + req->wb_bytes, 1060 + IOMODE_READ, 1061 + GFP_KERNEL); 1062 + /* If no lseg, fall back to read through mds */ 1063 + if (pgio->pg_lseg == NULL) 1064 + nfs_pageio_reset_read_mds(pgio); 1065 + 1066 + } 1067 + EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); 1068 + 1069 + void 1070 + pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1071 + { 1072 + BUG_ON(pgio->pg_lseg != NULL); 1073 + 1074 + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1075 + req->wb_context, 1076 + req_offset(req), 1077 + req->wb_bytes, 1078 + IOMODE_RW, 1079 + GFP_NOFS); 1080 + /* If no lseg, fall back to write through mds */ 1081 + if (pgio->pg_lseg == NULL) 1082 + nfs_pageio_reset_write_mds(pgio); 1083 + } 1084 + EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1085 + 1086 + bool 1087 + pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 1088 + { 1089 + struct nfs_server *server = NFS_SERVER(inode); 1090 + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1091 + 1092 + if (ld == NULL) 1093 + return false; 1094 + nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); 1095 + return true; 1096 + } 1097 + 1098 + bool 1099 + pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 1100 + { 1101 + struct nfs_server *server = NFS_SERVER(inode); 1102 + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 1103 + 1104 + if (ld == NULL) 1105 + return false; 1106 + nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); 1107 + return true; 1108 + } 1109 + 1065 1110 bool 1066 1111 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1067 1112 struct nfs_page *req) 1068 1113 { 1069 - enum pnfs_iomode access_type; 1070 - gfp_t gfp_flags; 1071 - 1072 - /* We assume that pg_ioflags == 0 iff we're reading a page */ 1073 - if (pgio->pg_ioflags == 0) { 1074 - access_type = IOMODE_READ; 1075 - gfp_flags = GFP_KERNEL; 1076 - } else { 1077 - access_type = IOMODE_RW; 1078 - gfp_flags = GFP_NOFS; 1079 - } 1080 - 1081 - if (pgio->pg_lseg == NULL) { 1082 - if (pgio->pg_count != prev->wb_bytes) 1083 - return true; 1084 - /* This is first coelesce call for a series of nfs_pages */ 1085 - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1086 - prev->wb_context, 1087 - req_offset(prev), 1088 - pgio->pg_count, 1089 - access_type, 1090 - gfp_flags); 1091 - if (pgio->pg_lseg == NULL) 1092 - return true; 1093 - } 1114 + if (pgio->pg_lseg == NULL) 1115 + return nfs_generic_pg_test(pgio, prev, req); 1094 1116 1095 1117 /* 1096 1118 * Test if a nfs_page is fully contained in the pnfs_layout_range. ··· 1170 1120 } 1171 1121 EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1172 1122 1173 - enum pnfs_try_status 1123 + static void 1124 + pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1125 + struct nfs_write_data *data) 1126 + { 1127 + list_splice_tail_init(&data->pages, &desc->pg_list); 1128 + if (data->req && list_empty(&data->req->wb_list)) 1129 + nfs_list_add_request(data->req, &desc->pg_list); 1130 + nfs_pageio_reset_write_mds(desc); 1131 + desc->pg_recoalesce = 1; 1132 + nfs_writedata_release(data); 1133 + } 1134 + 1135 + static enum pnfs_try_status 1174 1136 pnfs_try_to_write_data(struct nfs_write_data *wdata, 1175 - const struct rpc_call_ops *call_ops, int how) 1137 + const struct rpc_call_ops *call_ops, 1138 + struct pnfs_layout_segment *lseg, 1139 + int how) 1176 1140 { 1177 1141 struct inode *inode = wdata->inode; 1178 1142 enum pnfs_try_status trypnfs; 1179 1143 struct nfs_server *nfss = NFS_SERVER(inode); 1180 1144 1181 1145 wdata->mds_ops = call_ops; 1146 + wdata->lseg = get_lseg(lseg); 1182 1147 1183 1148 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1184 1149 inode->i_ino, wdata->args.count, wdata->args.offset, how); ··· 1208 1143 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1209 1144 return trypnfs; 1210 1145 } 1146 + 1147 + static void 1148 + pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) 1149 + { 1150 + struct nfs_write_data *data; 1151 + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1152 + struct pnfs_layout_segment *lseg = desc->pg_lseg; 1153 + 1154 + desc->pg_lseg = NULL; 1155 + while (!list_empty(head)) { 1156 + enum pnfs_try_status trypnfs; 1157 + 1158 + data = list_entry(head->next, struct nfs_write_data, list); 1159 + list_del_init(&data->list); 1160 + 1161 + trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); 1162 + if (trypnfs == PNFS_NOT_ATTEMPTED) 1163 + pnfs_write_through_mds(desc, data); 1164 + } 1165 + put_lseg(lseg); 1166 + } 1167 + 1168 + int 1169 + pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1170 + { 1171 + LIST_HEAD(head); 1172 + int ret; 1173 + 1174 + ret = nfs_generic_flush(desc, &head); 1175 + if (ret != 0) { 1176 + put_lseg(desc->pg_lseg); 1177 + desc->pg_lseg = NULL; 1178 + return ret; 1179 + } 1180 + pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); 1181 + return 0; 1182 + } 1183 + EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1211 1184 1212 1185 /* 1213 1186 * Called by non rpc-based layout drivers ··· 1270 1167 } 1271 1168 EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1272 1169 1170 + static void 1171 + pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1172 + struct nfs_read_data *data) 1173 + { 1174 + list_splice_tail_init(&data->pages, &desc->pg_list); 1175 + if (data->req && list_empty(&data->req->wb_list)) 1176 + nfs_list_add_request(data->req, &desc->pg_list); 1177 + nfs_pageio_reset_read_mds(desc); 1178 + desc->pg_recoalesce = 1; 1179 + nfs_readdata_release(data); 1180 + } 1181 + 1273 1182 /* 1274 1183 * Call the appropriate parallel I/O subsystem read function. 1275 1184 */ 1276 - enum pnfs_try_status 1185 + static enum pnfs_try_status 1277 1186 pnfs_try_to_read_data(struct nfs_read_data *rdata, 1278 - const struct rpc_call_ops *call_ops) 1187 + const struct rpc_call_ops *call_ops, 1188 + struct pnfs_layout_segment *lseg) 1279 1189 { 1280 1190 struct inode *inode = rdata->inode; 1281 1191 struct nfs_server *nfss = NFS_SERVER(inode); 1282 1192 enum pnfs_try_status trypnfs; 1283 1193 1284 1194 rdata->mds_ops = call_ops; 1195 + rdata->lseg = get_lseg(lseg); 1285 1196 1286 1197 dprintk("%s: Reading ino:%lu %u@%llu\n", 1287 1198 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); ··· 1310 1193 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1311 1194 return trypnfs; 1312 1195 } 1196 + 1197 + static void 1198 + pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) 1199 + { 1200 + struct nfs_read_data *data; 1201 + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1202 + struct pnfs_layout_segment *lseg = desc->pg_lseg; 1203 + 1204 + desc->pg_lseg = NULL; 1205 + while (!list_empty(head)) { 1206 + enum pnfs_try_status trypnfs; 1207 + 1208 + data = list_entry(head->next, struct nfs_read_data, list); 1209 + list_del_init(&data->list); 1210 + 1211 + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1212 + if (trypnfs == PNFS_NOT_ATTEMPTED) 1213 + pnfs_read_through_mds(desc, data); 1214 + } 1215 + put_lseg(lseg); 1216 + } 1217 + 1218 + int 1219 + pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1220 + { 1221 + LIST_HEAD(head); 1222 + int ret; 1223 + 1224 + ret = nfs_generic_pagein(desc, &head); 1225 + if (ret != 0) { 1226 + put_lseg(desc->pg_lseg); 1227 + desc->pg_lseg = NULL; 1228 + return ret; 1229 + } 1230 + pnfs_do_multiple_reads(desc, &head); 1231 + return 0; 1232 + } 1233 + EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1313 1234 1314 1235 /* 1315 1236 * Currently there is only one (whole file) write lseg.
+31 -43
fs/nfs/pnfs.h
··· 87 87 void (*free_lseg) (struct pnfs_layout_segment *lseg); 88 88 89 89 /* test for nfs page cache coalescing */ 90 - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 90 + const struct nfs_pageio_ops *pg_read_ops; 91 + const struct nfs_pageio_ops *pg_write_ops; 91 92 92 93 /* Returns true if layoutdriver wants to divert this request to 93 94 * driver's commit routine. ··· 149 148 /* pnfs.c */ 150 149 void get_layout_hdr(struct pnfs_layout_hdr *lo); 151 150 void put_lseg(struct pnfs_layout_segment *lseg); 152 - struct pnfs_layout_segment * 153 - pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 154 - loff_t pos, u64 count, enum pnfs_iomode access_type, 155 - gfp_t gfp_flags); 151 + 152 + bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); 153 + bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); 154 + 156 155 void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 157 156 void unset_pnfs_layoutdriver(struct nfs_server *); 158 - enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, 159 - const struct rpc_call_ops *, int); 160 - enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, 161 - const struct rpc_call_ops *); 157 + void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); 158 + int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); 159 + void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); 160 + int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); 162 161 bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); 163 162 int pnfs_layout_process(struct nfs4_layoutget *lgp); 164 163 void pnfs_free_lseg_list(struct list_head *tmp_list); ··· 183 182 int _pnfs_return_layout(struct inode *); 184 183 int pnfs_ld_write_done(struct nfs_write_data *); 185 184 int pnfs_ld_read_done(struct nfs_read_data *); 185 + struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 186 + struct nfs_open_context *ctx, 187 + loff_t pos, 188 + u64 count, 189 + enum pnfs_iomode iomode, 190 + gfp_t gfp_flags); 191 + 192 + void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); 193 + 194 + /* nfs4_deviceid_flags */ 195 + enum { 196 + NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ 197 + }; 186 198 187 199 /* pnfs_dev.c */ 188 200 struct nfs4_deviceid_node { ··· 203 189 struct hlist_node tmpnode; 204 190 const struct pnfs_layoutdriver_type *ld; 205 191 const struct nfs_client *nfs_client; 192 + unsigned long flags; 206 193 struct nfs4_deviceid deviceid; 207 194 atomic_t ref; 208 195 }; 209 196 210 197 void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); 211 198 struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 212 - struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 213 199 void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 214 200 void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, 215 201 const struct pnfs_layoutdriver_type *, ··· 307 293 return 0; 308 294 } 309 295 310 - static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, 311 - struct inode *inode) 312 - { 313 - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 314 - 315 - if (ld) 316 - pgio->pg_test = ld->pg_test; 317 - } 318 - 319 296 #else /* CONFIG_NFS_V4_1 */ 320 297 321 298 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) ··· 325 320 326 321 static inline void put_lseg(struct pnfs_layout_segment *lseg) 327 322 { 328 - } 329 - 330 - static inline struct pnfs_layout_segment * 331 - pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 332 - loff_t pos, u64 count, enum pnfs_iomode access_type, 333 - gfp_t gfp_flags) 334 - { 335 - return NULL; 336 - } 337 - 338 - static inline enum pnfs_try_status 339 - pnfs_try_to_read_data(struct nfs_read_data *data, 340 - const struct rpc_call_ops *call_ops) 341 - { 342 - return PNFS_NOT_ATTEMPTED; 343 - } 344 - 345 - static inline enum pnfs_try_status 346 - pnfs_try_to_write_data(struct nfs_write_data *data, 347 - const struct rpc_call_ops *call_ops, int how) 348 - { 349 - return PNFS_NOT_ATTEMPTED; 350 323 } 351 324 352 325 static inline int pnfs_return_layout(struct inode *ino) ··· 368 385 { 369 386 } 370 387 371 - static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, 372 - struct inode *inode) 388 + static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 373 389 { 390 + return false; 391 + } 392 + 393 + static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) 394 + { 395 + return false; 374 396 } 375 397 376 398 static inline void
+31 -33
fs/nfs/pnfs_dev.c
··· 100 100 101 101 rcu_read_lock(); 102 102 d = _lookup_deviceid(ld, clp, id, hash); 103 - if (d && !atomic_inc_not_zero(&d->ref)) 104 - d = NULL; 103 + if (d != NULL) 104 + atomic_inc(&d->ref); 105 105 rcu_read_unlock(); 106 106 return d; 107 107 } ··· 115 115 EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); 116 116 117 117 /* 118 - * Unhash and put deviceid 118 + * Remove a deviceid from cache 119 119 * 120 120 * @clp nfs_client associated with deviceid 121 121 * @id the deviceid to unhash 122 122 * 123 123 * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. 124 124 */ 125 - struct nfs4_deviceid_node * 126 - nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, 125 + void 126 + nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, 127 127 const struct nfs_client *clp, const struct nfs4_deviceid *id) 128 128 { 129 129 struct nfs4_deviceid_node *d; ··· 134 134 rcu_read_unlock(); 135 135 if (!d) { 136 136 spin_unlock(&nfs4_deviceid_lock); 137 - return NULL; 137 + return; 138 138 } 139 139 hlist_del_init_rcu(&d->node); 140 140 spin_unlock(&nfs4_deviceid_lock); ··· 142 142 143 143 /* balance the initial ref set in pnfs_insert_deviceid */ 144 144 if (atomic_dec_and_test(&d->ref)) 145 - return d; 146 - 147 - return NULL; 148 - } 149 - EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); 150 - 151 - /* 152 - * Delete a deviceid from cache 153 - * 154 - * @clp struct nfs_client qualifying the deviceid 155 - * @id deviceid to delete 156 - */ 157 - void 158 - nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, 159 - const struct nfs_client *clp, const struct nfs4_deviceid *id) 160 - { 161 - struct nfs4_deviceid_node *d; 162 - 163 - d = nfs4_unhash_put_deviceid(ld, clp, id); 164 - if (!d) 165 - return; 166 - d->ld->free_deviceid_node(d); 145 + d->ld->free_deviceid_node(d); 167 146 } 168 147 EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); 169 148 ··· 156 177 INIT_HLIST_NODE(&d->tmpnode); 157 178 d->ld = ld; 158 179 d->nfs_client = nfs_client; 180 + d->flags = 0; 159 181 d->deviceid = *id; 160 182 atomic_set(&d->ref, 1); 161 183 } ··· 201 221 * 202 222 * @d deviceid node to put 203 223 * 204 - * @ret true iff the node was deleted 224 + * return true iff the node was deleted 225 + * Note that since the test for d->ref == 0 is sufficient to establish 226 + * that the node is no longer hashed in the global device id cache. 205 227 */ 206 228 bool 207 229 nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) 208 230 { 209 - if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) 231 + if (!atomic_dec_and_test(&d->ref)) 210 232 return false; 211 - hlist_del_init_rcu(&d->node); 212 - spin_unlock(&nfs4_deviceid_lock); 213 - synchronize_rcu(); 214 233 d->ld->free_deviceid_node(d); 215 234 return true; 216 235 } ··· 253 274 return; 254 275 for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) 255 276 _deviceid_purge_client(clp, h); 277 + } 278 + 279 + /* 280 + * Stop use of all deviceids associated with an nfs_client 281 + */ 282 + void 283 + nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) 284 + { 285 + struct nfs4_deviceid_node *d; 286 + struct hlist_node *n; 287 + int i; 288 + 289 + rcu_read_lock(); 290 + for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ 291 + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) 292 + if (d->nfs_client == clp) 293 + set_bit(NFS_DEVICEID_INVALID, &d->flags); 294 + } 295 + rcu_read_unlock(); 256 296 }
+94 -72
fs/nfs/read.c
··· 30 30 31 31 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 32 32 33 - static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); 34 - static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); 33 + static const struct nfs_pageio_ops nfs_pageio_read_ops; 35 34 static const struct rpc_call_ops nfs_read_partial_ops; 36 35 static const struct rpc_call_ops nfs_read_full_ops; 37 36 ··· 67 68 mempool_free(p, nfs_rdata_mempool); 68 69 } 69 70 70 - static void nfs_readdata_release(struct nfs_read_data *rdata) 71 + void nfs_readdata_release(struct nfs_read_data *rdata) 71 72 { 72 73 put_lseg(rdata->lseg); 73 74 put_nfs_open_context(rdata->args.context); ··· 112 113 } 113 114 } 114 115 116 + static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 117 + struct inode *inode) 118 + { 119 + nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, 120 + NFS_SERVER(inode)->rsize, 0); 121 + } 122 + 123 + void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) 124 + { 125 + pgio->pg_ops = &nfs_pageio_read_ops; 126 + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; 127 + } 128 + EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 129 + 130 + static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 131 + struct inode *inode) 132 + { 133 + if (!pnfs_pageio_init_read(pgio, inode)) 134 + nfs_pageio_init_read_mds(pgio, inode); 135 + } 136 + 115 137 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 116 138 struct page *page) 117 139 { ··· 151 131 if (len < PAGE_CACHE_SIZE) 152 132 zero_user_segment(page, len, PAGE_CACHE_SIZE); 153 133 154 - nfs_pageio_init(&pgio, inode, NULL, 0, 0); 155 - nfs_list_add_request(new, &pgio.pg_list); 156 - pgio.pg_count = len; 157 - 158 - if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 159 - nfs_pagein_multi(&pgio); 160 - else 161 - nfs_pagein_one(&pgio); 134 + nfs_pageio_init_read(&pgio, inode); 135 + nfs_pageio_add_request(&pgio, new); 136 + nfs_pageio_complete(&pgio); 162 137 return 0; 163 138 } 164 139 ··· 217 202 /* 218 203 * Set up the NFS read request struct 219 204 */ 220 - static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 221 - const struct rpc_call_ops *call_ops, 222 - unsigned int count, unsigned int offset, 223 - struct pnfs_layout_segment *lseg) 205 + static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 206 + unsigned int count, unsigned int offset) 224 207 { 225 208 struct inode *inode = req->wb_context->dentry->d_inode; 226 209 227 210 data->req = req; 228 211 data->inode = inode; 229 212 data->cred = req->wb_context->cred; 230 - data->lseg = get_lseg(lseg); 231 213 232 214 data->args.fh = NFS_FH(inode); 233 215 data->args.offset = req_offset(req) + offset; ··· 238 226 data->res.count = count; 239 227 data->res.eof = 0; 240 228 nfs_fattr_init(&data->fattr); 229 + } 241 230 242 - if (data->lseg && 243 - (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) 244 - return 0; 231 + static int nfs_do_read(struct nfs_read_data *data, 232 + const struct rpc_call_ops *call_ops) 233 + { 234 + struct inode *inode = data->args.context->dentry->d_inode; 245 235 246 236 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); 237 + } 238 + 239 + static int 240 + nfs_do_multiple_reads(struct list_head *head, 241 + const struct rpc_call_ops *call_ops) 242 + { 243 + struct nfs_read_data *data; 244 + int ret = 0; 245 + 246 + while (!list_empty(head)) { 247 + int ret2; 248 + 249 + data = list_entry(head->next, struct nfs_read_data, list); 250 + list_del_init(&data->list); 251 + 252 + ret2 = nfs_do_read(data, call_ops); 253 + if (ret == 0) 254 + ret = ret2; 255 + } 256 + return ret; 247 257 } 248 258 249 259 static void ··· 294 260 * won't see the new data until our attribute cache is updated. This is more 295 261 * or less conventional NFS client behavior. 296 262 */ 297 - static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) 263 + static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 298 264 { 299 265 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 300 266 struct page *page = req->wb_page; 301 267 struct nfs_read_data *data; 302 - size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; 268 + size_t rsize = desc->pg_bsize, nbytes; 303 269 unsigned int offset; 304 270 int requests = 0; 305 271 int ret = 0; 306 - struct pnfs_layout_segment *lseg; 307 - LIST_HEAD(list); 308 272 309 273 nfs_list_remove_request(req); 310 274 275 + offset = 0; 311 276 nbytes = desc->pg_count; 312 277 do { 313 278 size_t len = min(nbytes,rsize); ··· 314 281 data = nfs_readdata_alloc(1); 315 282 if (!data) 316 283 goto out_bad; 317 - list_add(&data->pages, &list); 284 + data->pagevec[0] = page; 285 + nfs_read_rpcsetup(req, data, len, offset); 286 + list_add(&data->list, res); 318 287 requests++; 319 288 nbytes -= len; 289 + offset += len; 320 290 } while(nbytes != 0); 321 291 atomic_set(&req->wb_complete, requests); 322 - 323 - BUG_ON(desc->pg_lseg != NULL); 324 - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, 325 - req_offset(req), desc->pg_count, 326 - IOMODE_READ, GFP_KERNEL); 327 292 ClearPageError(page); 328 - offset = 0; 329 - nbytes = desc->pg_count; 330 - do { 331 - int ret2; 332 - 333 - data = list_entry(list.next, struct nfs_read_data, pages); 334 - list_del_init(&data->pages); 335 - 336 - data->pagevec[0] = page; 337 - 338 - if (nbytes < rsize) 339 - rsize = nbytes; 340 - ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 341 - rsize, offset, lseg); 342 - if (ret == 0) 343 - ret = ret2; 344 - offset += rsize; 345 - nbytes -= rsize; 346 - } while (nbytes != 0); 347 - put_lseg(lseg); 348 - desc->pg_lseg = NULL; 349 - 293 + desc->pg_rpc_callops = &nfs_read_partial_ops; 350 294 return ret; 351 - 352 295 out_bad: 353 - while (!list_empty(&list)) { 354 - data = list_entry(list.next, struct nfs_read_data, pages); 355 - list_del(&data->pages); 296 + while (!list_empty(res)) { 297 + data = list_entry(res->next, struct nfs_read_data, list); 298 + list_del(&data->list); 356 299 nfs_readdata_free(data); 357 300 } 358 301 SetPageError(page); ··· 336 327 return -ENOMEM; 337 328 } 338 329 339 - static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) 330 + static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 340 331 { 341 332 struct nfs_page *req; 342 333 struct page **pages; 343 334 struct nfs_read_data *data; 344 335 struct list_head *head = &desc->pg_list; 345 - struct pnfs_layout_segment *lseg = desc->pg_lseg; 346 - int ret = -ENOMEM; 336 + int ret = 0; 347 337 348 338 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, 349 339 desc->pg_count)); 350 340 if (!data) { 351 341 nfs_async_read_error(head); 342 + ret = -ENOMEM; 352 343 goto out; 353 344 } 354 345 ··· 361 352 *pages++ = req->wb_page; 362 353 } 363 354 req = nfs_list_entry(data->pages.next); 364 - if ((!lseg) && list_is_singular(&data->pages)) 365 - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, 366 - req_offset(req), desc->pg_count, 367 - IOMODE_READ, GFP_KERNEL); 368 355 369 - ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 370 - 0, lseg); 356 + nfs_read_rpcsetup(req, data, desc->pg_count, 0); 357 + list_add(&data->list, res); 358 + desc->pg_rpc_callops = &nfs_read_full_ops; 371 359 out: 372 - put_lseg(lseg); 373 - desc->pg_lseg = NULL; 374 360 return ret; 375 361 } 362 + 363 + int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) 364 + { 365 + if (desc->pg_bsize < PAGE_CACHE_SIZE) 366 + return nfs_pagein_multi(desc, head); 367 + return nfs_pagein_one(desc, head); 368 + } 369 + 370 + static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 371 + { 372 + LIST_HEAD(head); 373 + int ret; 374 + 375 + ret = nfs_generic_pagein(desc, &head); 376 + if (ret == 0) 377 + ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); 378 + return ret; 379 + } 380 + 381 + static const struct nfs_pageio_ops nfs_pageio_read_ops = { 382 + .pg_test = nfs_generic_pg_test, 383 + .pg_doio = nfs_generic_pg_readpages, 384 + }; 376 385 377 386 /* 378 387 * This is the callback from RPC telling us whether a reply was ··· 662 635 .pgio = &pgio, 663 636 }; 664 637 struct inode *inode = mapping->host; 665 - struct nfs_server *server = NFS_SERVER(inode); 666 - size_t rsize = server->rsize; 667 638 unsigned long npages; 668 639 int ret = -ESTALE; 669 640 ··· 689 664 if (ret == 0) 690 665 goto read_complete; /* all pages were read */ 691 666 692 - if (rsize < PAGE_CACHE_SIZE) 693 - nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 694 - else 695 - nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); 667 + nfs_pageio_init_read(&pgio, inode); 696 668 697 669 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 698 670
+27 -10
fs/nfs/unlink.c
··· 147 147 148 148 alias = d_lookup(parent, &data->args.name); 149 149 if (alias != NULL) { 150 - int ret = 0; 150 + int ret; 151 151 void *devname_garbage = NULL; 152 152 153 153 /* ··· 155 155 * the sillyrename information to the aliased dentry. 156 156 */ 157 157 nfs_free_dname(data); 158 + ret = nfs_copy_dname(alias, data); 158 159 spin_lock(&alias->d_lock); 159 - if (alias->d_inode != NULL && 160 + if (ret == 0 && alias->d_inode != NULL && 160 161 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { 161 162 devname_garbage = alias->d_fsdata; 162 163 alias->d_fsdata = data; 163 164 alias->d_flags |= DCACHE_NFSFS_RENAMED; 164 165 ret = 1; 165 - } 166 + } else 167 + ret = 0; 166 168 spin_unlock(&alias->d_lock); 167 169 nfs_dec_sillycount(dir); 168 170 dput(alias); ··· 173 171 * point dentry is definitely not a root, so we won't need 174 172 * that anymore. 175 173 */ 176 - if (devname_garbage) 177 - kfree(devname_garbage); 174 + kfree(devname_garbage); 178 175 return ret; 179 176 } 180 177 data->dir = igrab(dir); ··· 205 204 if (parent == NULL) 206 205 goto out_free; 207 206 dir = parent->d_inode; 208 - if (nfs_copy_dname(dentry, data) != 0) 209 - goto out_dput; 210 207 /* Non-exclusive lock protects against concurrent lookup() calls */ 211 208 spin_lock(&dir->i_lock); 212 209 if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { ··· 365 366 struct nfs_renamedata *data = calldata; 366 367 struct inode *old_dir = data->old_dir; 367 368 struct inode *new_dir = data->new_dir; 369 + struct dentry *old_dentry = data->old_dentry; 370 + struct dentry *new_dentry = data->new_dentry; 368 371 369 372 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { 370 373 nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); ··· 374 373 } 375 374 376 375 if (task->tk_status != 0) { 377 - nfs_cancel_async_unlink(data->old_dentry); 376 + nfs_cancel_async_unlink(old_dentry); 378 377 return; 379 378 } 380 379 381 - nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); 382 - d_move(data->old_dentry, data->new_dentry); 380 + d_drop(old_dentry); 381 + d_drop(new_dentry); 383 382 } 384 383 385 384 /** ··· 502 501 * and only performs the unlink once the last reference to it is put. 503 502 * 504 503 * The final cleanup is done during dentry_iput. 504 + * 505 + * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server 506 + * could take responsibility for keeping open files referenced. The server 507 + * would also need to ensure that opened-but-deleted files were kept over 508 + * reboots. However, we may not assume a server does so. (RFC 5661 509 + * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can 510 + * use to advertise that it does this; some day we may take advantage of 511 + * it.)) 505 512 */ 506 513 int 507 514 nfs_sillyrename(struct inode *dir, struct dentry *dentry) ··· 568 559 error = nfs_async_unlink(dir, dentry); 569 560 if (error) 570 561 goto out_dput; 562 + 563 + /* populate unlinkdata with the right dname */ 564 + error = nfs_copy_dname(sdentry, 565 + (struct nfs_unlinkdata *)dentry->d_fsdata); 566 + if (error) { 567 + nfs_cancel_async_unlink(dentry); 568 + goto out_dput; 569 + } 571 570 572 571 /* run the rename task, undo unlink if it fails */ 573 572 task = nfs_async_rename(dir, dir, dentry, sdentry);
+92 -64
fs/nfs/write.c
··· 97 97 mempool_free(p, nfs_wdata_mempool); 98 98 } 99 99 100 - static void nfs_writedata_release(struct nfs_write_data *wdata) 100 + void nfs_writedata_release(struct nfs_write_data *wdata) 101 101 { 102 102 put_lseg(wdata->lseg); 103 103 put_nfs_open_context(wdata->args.context); ··· 845 845 /* 846 846 * Set up the argument/result storage required for the RPC call. 847 847 */ 848 - static int nfs_write_rpcsetup(struct nfs_page *req, 848 + static void nfs_write_rpcsetup(struct nfs_page *req, 849 849 struct nfs_write_data *data, 850 - const struct rpc_call_ops *call_ops, 851 850 unsigned int count, unsigned int offset, 852 - struct pnfs_layout_segment *lseg, 853 851 int how) 854 852 { 855 853 struct inode *inode = req->wb_context->dentry->d_inode; ··· 858 860 data->req = req; 859 861 data->inode = inode = req->wb_context->dentry->d_inode; 860 862 data->cred = req->wb_context->cred; 861 - data->lseg = get_lseg(lseg); 862 863 863 864 data->args.fh = NFS_FH(inode); 864 865 data->args.offset = req_offset(req) + offset; ··· 869 872 data->args.context = get_nfs_open_context(req->wb_context); 870 873 data->args.lock_context = req->wb_lock_context; 871 874 data->args.stable = NFS_UNSTABLE; 872 - if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { 873 - data->args.stable = NFS_DATA_SYNC; 874 - if (!nfs_need_commit(NFS_I(inode))) 875 - data->args.stable = NFS_FILE_SYNC; 875 + switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { 876 + case 0: 877 + break; 878 + case FLUSH_COND_STABLE: 879 + if (nfs_need_commit(NFS_I(inode))) 880 + break; 881 + default: 882 + data->args.stable = NFS_FILE_SYNC; 876 883 } 877 884 878 885 data->res.fattr = &data->fattr; 879 886 data->res.count = count; 880 887 data->res.verf = &data->verf; 881 888 nfs_fattr_init(&data->fattr); 889 + } 882 890 883 - if (data->lseg && 884 - (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)) 885 - return 0; 891 + static int nfs_do_write(struct nfs_write_data *data, 892 + const struct rpc_call_ops *call_ops, 893 + int how) 894 + { 895 + struct inode *inode = data->args.context->dentry->d_inode; 886 896 887 897 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); 898 + } 899 + 900 + static int nfs_do_multiple_writes(struct list_head *head, 901 + const struct rpc_call_ops *call_ops, 902 + int how) 903 + { 904 + struct nfs_write_data *data; 905 + int ret = 0; 906 + 907 + while (!list_empty(head)) { 908 + int ret2; 909 + 910 + data = list_entry(head->next, struct nfs_write_data, list); 911 + list_del_init(&data->list); 912 + 913 + ret2 = nfs_do_write(data, call_ops, how); 914 + if (ret == 0) 915 + ret = ret2; 916 + } 917 + return ret; 888 918 } 889 919 890 920 /* If a nfs_flush_* function fails, it should remove reqs from @head and ··· 931 907 * Generate multiple small requests to write out a single 932 908 * contiguous dirty area on one page. 933 909 */ 934 - static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) 910 + static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 935 911 { 936 912 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 937 913 struct page *page = req->wb_page; 938 914 struct nfs_write_data *data; 939 - size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes; 915 + size_t wsize = desc->pg_bsize, nbytes; 940 916 unsigned int offset; 941 917 int requests = 0; 942 918 int ret = 0; 943 - struct pnfs_layout_segment *lseg; 944 - LIST_HEAD(list); 945 919 946 920 nfs_list_remove_request(req); 947 921 ··· 949 927 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 950 928 951 929 930 + offset = 0; 952 931 nbytes = desc->pg_count; 953 932 do { 954 933 size_t len = min(nbytes, wsize); ··· 957 934 data = nfs_writedata_alloc(1); 958 935 if (!data) 959 936 goto out_bad; 960 - list_add(&data->pages, &list); 937 + data->pagevec[0] = page; 938 + nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); 939 + list_add(&data->list, res); 961 940 requests++; 962 941 nbytes -= len; 942 + offset += len; 963 943 } while (nbytes != 0); 964 944 atomic_set(&req->wb_complete, requests); 965 - 966 - BUG_ON(desc->pg_lseg); 967 - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, 968 - req_offset(req), desc->pg_count, 969 - IOMODE_RW, GFP_NOFS); 970 - ClearPageError(page); 971 - offset = 0; 972 - nbytes = desc->pg_count; 973 - do { 974 - int ret2; 975 - 976 - data = list_entry(list.next, struct nfs_write_data, pages); 977 - list_del_init(&data->pages); 978 - 979 - data->pagevec[0] = page; 980 - 981 - if (nbytes < wsize) 982 - wsize = nbytes; 983 - ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 984 - wsize, offset, lseg, desc->pg_ioflags); 985 - if (ret == 0) 986 - ret = ret2; 987 - offset += wsize; 988 - nbytes -= wsize; 989 - } while (nbytes != 0); 990 - 991 - put_lseg(lseg); 992 - desc->pg_lseg = NULL; 945 + desc->pg_rpc_callops = &nfs_write_partial_ops; 993 946 return ret; 994 947 995 948 out_bad: 996 - while (!list_empty(&list)) { 997 - data = list_entry(list.next, struct nfs_write_data, pages); 998 - list_del(&data->pages); 949 + while (!list_empty(res)) { 950 + data = list_entry(res->next, struct nfs_write_data, list); 951 + list_del(&data->list); 999 952 nfs_writedata_free(data); 1000 953 } 1001 954 nfs_redirty_request(req); ··· 986 987 * This is the case if nfs_updatepage detects a conflicting request 987 988 * that has been written but not committed. 988 989 */ 989 - static int nfs_flush_one(struct nfs_pageio_descriptor *desc) 990 + static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 990 991 { 991 992 struct nfs_page *req; 992 993 struct page **pages; 993 994 struct nfs_write_data *data; 994 995 struct list_head *head = &desc->pg_list; 995 - struct pnfs_layout_segment *lseg = desc->pg_lseg; 996 - int ret; 996 + int ret = 0; 997 997 998 998 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, 999 999 desc->pg_count)); ··· 1014 1016 *pages++ = req->wb_page; 1015 1017 } 1016 1018 req = nfs_list_entry(data->pages.next); 1017 - if ((!lseg) && list_is_singular(&data->pages)) 1018 - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, 1019 - req_offset(req), desc->pg_count, 1020 - IOMODE_RW, GFP_NOFS); 1021 1019 1022 1020 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1023 1021 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1024 1022 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1025 1023 1026 1024 /* Set up the argument struct */ 1027 - ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); 1025 + nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); 1026 + list_add(&data->list, res); 1027 + desc->pg_rpc_callops = &nfs_write_full_ops; 1028 1028 out: 1029 - put_lseg(lseg); /* Cleans any gotten in ->pg_test */ 1030 - desc->pg_lseg = NULL; 1031 1029 return ret; 1032 1030 } 1031 + 1032 + int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) 1033 + { 1034 + if (desc->pg_bsize < PAGE_CACHE_SIZE) 1035 + return nfs_flush_multi(desc, head); 1036 + return nfs_flush_one(desc, head); 1037 + } 1038 + 1039 + static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1040 + { 1041 + LIST_HEAD(head); 1042 + int ret; 1043 + 1044 + ret = nfs_generic_flush(desc, &head); 1045 + if (ret == 0) 1046 + ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, 1047 + desc->pg_ioflags); 1048 + return ret; 1049 + } 1050 + 1051 + static const struct nfs_pageio_ops nfs_pageio_write_ops = { 1052 + .pg_test = nfs_generic_pg_test, 1053 + .pg_doio = nfs_generic_pg_writepages, 1054 + }; 1055 + 1056 + static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 1057 + struct inode *inode, int ioflags) 1058 + { 1059 + nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, 1060 + NFS_SERVER(inode)->wsize, ioflags); 1061 + } 1062 + 1063 + void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) 1064 + { 1065 + pgio->pg_ops = &nfs_pageio_write_ops; 1066 + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; 1067 + } 1068 + EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); 1033 1069 1034 1070 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1035 1071 struct inode *inode, int ioflags) 1036 1072 { 1037 - size_t wsize = NFS_SERVER(inode)->wsize; 1038 - 1039 - if (wsize < PAGE_CACHE_SIZE) 1040 - nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 1041 - else 1042 - nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); 1073 + if (!pnfs_pageio_init_write(pgio, inode, ioflags)) 1074 + nfs_pageio_init_write_mds(pgio, inode, ioflags); 1043 1075 } 1044 1076 1045 1077 /*
+3
include/linux/nfs4.h
··· 563 563 NFSPROC4_CLNT_GETDEVICEINFO, 564 564 NFSPROC4_CLNT_LAYOUTCOMMIT, 565 565 NFSPROC4_CLNT_LAYOUTRETURN, 566 + NFSPROC4_CLNT_SECINFO_NO_NAME, 567 + NFSPROC4_CLNT_TEST_STATEID, 568 + NFSPROC4_CLNT_FREE_STATEID, 566 569 }; 567 570 568 571 /* nfs41 types */
+4 -1
include/linux/nfs_fs_sb.h
··· 16 16 struct nfs4_sequence_res; 17 17 struct nfs_server; 18 18 struct nfs4_minor_version_ops; 19 + struct server_scope; 19 20 20 21 /* 21 22 * The nfs_client identifies our client state to the server. ··· 78 77 /* The flags used for obtaining the clientid during EXCHANGE_ID */ 79 78 u32 cl_exchange_flags; 80 79 struct nfs4_session *cl_session; /* sharred session */ 81 - struct list_head cl_layouts; 82 80 #endif /* CONFIG_NFS_V4 */ 83 81 84 82 #ifdef CONFIG_NFS_FSCACHE 85 83 struct fscache_cookie *fscache; /* client index cache cookie */ 86 84 #endif 85 + 86 + struct server_scope *server_scope; /* from exchange_id */ 87 87 }; 88 88 89 89 /* ··· 151 149 struct rb_root openowner_id; 152 150 struct rb_root lockowner_id; 153 151 #endif 152 + struct list_head layouts; 154 153 struct list_head delegations; 155 154 void (*destroy)(struct nfs_server *); 156 155
+12 -5
include/linux/nfs_page.h
··· 55 55 struct nfs_writeverf wb_verf; /* Commit cookie */ 56 56 }; 57 57 58 + struct nfs_pageio_descriptor; 59 + struct nfs_pageio_ops { 60 + void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); 61 + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 62 + int (*pg_doio)(struct nfs_pageio_descriptor *); 63 + }; 64 + 58 65 struct nfs_pageio_descriptor { 59 66 struct list_head pg_list; 60 67 unsigned long pg_bytes_written; 61 68 size_t pg_count; 62 69 size_t pg_bsize; 63 70 unsigned int pg_base; 64 - char pg_moreio; 71 + unsigned char pg_moreio : 1, 72 + pg_recoalesce : 1; 65 73 66 74 struct inode *pg_inode; 67 - int (*pg_doio)(struct nfs_pageio_descriptor *); 75 + const struct nfs_pageio_ops *pg_ops; 68 76 int pg_ioflags; 69 77 int pg_error; 78 + const struct rpc_call_ops *pg_rpc_callops; 70 79 struct pnfs_layout_segment *pg_lseg; 71 - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 72 80 }; 73 81 74 82 #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) ··· 93 85 pgoff_t idx_start, unsigned int npages, int tag); 94 86 extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 95 87 struct inode *inode, 96 - int (*doio)(struct nfs_pageio_descriptor *desc), 88 + const struct nfs_pageio_ops *pg_ops, 97 89 size_t bsize, 98 90 int how); 99 91 extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, ··· 107 99 extern void nfs_unlock_request(struct nfs_page *req); 108 100 extern int nfs_set_page_tag_locked(struct nfs_page *req); 109 101 extern void nfs_clear_page_tag_locked(struct nfs_page *req); 110 - 111 102 112 103 /* 113 104 * Lock the page of an asynchronous request without getting a new reference
+33 -1
include/linux/nfs_xdr.h
··· 269 269 }; 270 270 271 271 struct nfs4_layoutreturn_args { 272 - __u32 layout_type; 272 + struct pnfs_layout_hdr *layout; 273 273 struct inode *inode; 274 274 nfs4_stateid stateid; 275 + __u32 layout_type; 275 276 struct nfs4_sequence_args seq_args; 276 277 }; 277 278 ··· 1061 1060 struct nfs41_exchange_id_res { 1062 1061 struct nfs_client *client; 1063 1062 u32 flags; 1063 + struct server_scope *server_scope; 1064 1064 }; 1065 1065 1066 1066 struct nfs41_create_session_args { ··· 1085 1083 struct nfs41_reclaim_complete_res { 1086 1084 struct nfs4_sequence_res seq_res; 1087 1085 }; 1086 + 1087 + #define SECINFO_STYLE_CURRENT_FH 0 1088 + #define SECINFO_STYLE_PARENT 1 1089 + struct nfs41_secinfo_no_name_args { 1090 + int style; 1091 + struct nfs4_sequence_args seq_args; 1092 + }; 1093 + 1094 + struct nfs41_test_stateid_args { 1095 + nfs4_stateid *stateid; 1096 + struct nfs4_sequence_args seq_args; 1097 + }; 1098 + 1099 + struct nfs41_test_stateid_res { 1100 + unsigned int status; 1101 + struct nfs4_sequence_res seq_res; 1102 + }; 1103 + 1104 + struct nfs41_free_stateid_args { 1105 + nfs4_stateid *stateid; 1106 + struct nfs4_sequence_args seq_args; 1107 + }; 1108 + 1109 + struct nfs41_free_stateid_res { 1110 + unsigned int status; 1111 + struct nfs4_sequence_res seq_res; 1112 + }; 1113 + 1088 1114 #endif /* CONFIG_NFS_V4_1 */ 1089 1115 1090 1116 struct nfs_page; ··· 1126 1096 struct rpc_cred *cred; 1127 1097 struct nfs_fattr fattr; /* fattr storage */ 1128 1098 struct list_head pages; /* Coalesced read requests */ 1099 + struct list_head list; /* lists of struct nfs_read_data */ 1129 1100 struct nfs_page *req; /* multi ops per nfs_page */ 1130 1101 struct page **pagevec; 1131 1102 unsigned int npages; /* Max length of pagevec */ ··· 1150 1119 struct nfs_fattr fattr; 1151 1120 struct nfs_writeverf verf; 1152 1121 struct list_head pages; /* Coalesced requests we wish to flush */ 1122 + struct list_head list; /* lists of struct nfs_write_data */ 1153 1123 struct nfs_page *req; /* multi ops per nfs_page */ 1154 1124 struct page **pagevec; 1155 1125 unsigned int npages; /* Max length of pagevec */
+2 -29
include/linux/pnfs_osd_xdr.h
··· 41 41 42 42 #include <linux/nfs_fs.h> 43 43 #include <linux/nfs_page.h> 44 - #include <scsi/osd_protocol.h> 45 - 46 - #define PNFS_OSD_OSDNAME_MAXSIZE 256 47 44 48 45 /* 49 46 * draft-ietf-nfsv4-minorversion-22 ··· 95 98 96 99 #define _DEVID_HI(oid_device_id) \ 97 100 (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) 98 - 99 - static inline int 100 - pnfs_osd_objid_xdr_sz(void) 101 - { 102 - return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2; 103 - } 104 101 105 102 enum pnfs_osd_version { 106 103 PNFS_OSD_MISSING = 0, ··· 180 189 struct nfs4_string oti_scsi_device_id; 181 190 }; 182 191 183 - enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 }; 184 - 185 192 /* struct netaddr4 { 186 193 * // see struct rpcb in RFC1833 187 194 * string r_netid<>; // network id ··· 196 207 struct pnfs_osd_net_addr ota_netaddr; 197 208 }; 198 209 199 - enum { 200 - NETWORK_ID_MAX = 16 / 4, 201 - UNIVERSAL_ADDRESS_MAX = 64 / 4, 202 - PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX, 203 - }; 204 - 205 210 struct pnfs_osd_deviceaddr { 206 211 struct pnfs_osd_targetid oda_targetid; 207 212 struct pnfs_osd_targetaddr oda_targetaddr; ··· 203 220 struct nfs4_string oda_systemid; 204 221 struct pnfs_osd_object_cred oda_root_obj_cred; 205 222 struct nfs4_string oda_osdname; 206 - }; 207 - 208 - enum { 209 - ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4, 210 - PNFS_OSD_DEVICEADDR_MAX = 211 - PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX + 212 - 2 /*oda_lun*/ + 213 - 1 + OSD_SYSTEMID_LEN + 214 - 1 + ODA_OSDNAME_MAX, 215 223 }; 216 224 217 225 /* LAYOUTCOMMIT: layoutupdate */ ··· 253 279 u32 oer_errno; 254 280 }; 255 281 256 - /* OSD XDR API */ 282 + /* OSD XDR Client API */ 257 283 /* Layout helpers */ 258 284 /* Layout decoding is done in two parts: 259 285 * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part ··· 311 337 pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, 312 338 struct pnfs_osd_layoutupdate *lou); 313 339 314 - /* osd_ioerror encoding/decoding (layout_return) */ 315 - /* Client */ 340 + /* osd_ioerror encoding (layout_return) */ 316 341 extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); 317 342 extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); 318 343
+3 -3
include/linux/sunrpc/bc_xprt.h
··· 31 31 #include <linux/sunrpc/xprt.h> 32 32 #include <linux/sunrpc/sched.h> 33 33 34 - #ifdef CONFIG_NFS_V4_1 34 + #ifdef CONFIG_SUNRPC_BACKCHANNEL 35 35 struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt); 36 36 void xprt_free_bc_request(struct rpc_rqst *req); 37 37 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); ··· 47 47 return 1; 48 48 return 0; 49 49 } 50 - #else /* CONFIG_NFS_V4_1 */ 50 + #else /* CONFIG_SUNRPC_BACKCHANNEL */ 51 51 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, 52 52 unsigned int min_reqs) 53 53 { ··· 62 62 static inline void xprt_free_bc_request(struct rpc_rqst *req) 63 63 { 64 64 } 65 - #endif /* CONFIG_NFS_V4_1 */ 65 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 66 66 #endif /* _LINUX_SUNRPC_BC_XPRT_H */ 67 67
+4
include/linux/sunrpc/sched.h
··· 227 227 void rpc_destroy_wait_queue(struct rpc_wait_queue *); 228 228 void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, 229 229 rpc_action action); 230 + void rpc_sleep_on_priority(struct rpc_wait_queue *, 231 + struct rpc_task *, 232 + rpc_action action, 233 + int priority); 230 234 void rpc_wake_up_queued_task(struct rpc_wait_queue *, 231 235 struct rpc_task *); 232 236 void rpc_wake_up(struct rpc_wait_queue *);
+2 -2
include/linux/sunrpc/svc.h
··· 92 92 struct module * sv_module; /* optional module to count when 93 93 * adding threads */ 94 94 svc_thread_fn sv_function; /* main function for threads */ 95 - #if defined(CONFIG_NFS_V4_1) 95 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 96 96 struct list_head sv_cb_list; /* queue for callback requests 97 97 * that arrive over the same 98 98 * connection */ ··· 100 100 wait_queue_head_t sv_cb_waitq; /* sleep here if there are no 101 101 * entries in the svc_cb_list */ 102 102 struct svc_xprt *sv_bc_xprt; /* callback on fore channel */ 103 - #endif /* CONFIG_NFS_V4_1 */ 103 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 104 104 }; 105 105 106 106 /*
+18 -16
include/linux/sunrpc/xprt.h
··· 22 22 #define RPC_MIN_SLOT_TABLE (2U) 23 23 #define RPC_DEF_SLOT_TABLE (16U) 24 24 #define RPC_MAX_SLOT_TABLE (128U) 25 + #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) 25 26 26 27 /* 27 28 * This describes a timeout strategy ··· 101 100 ktime_t rq_xtime; /* transmit time stamp */ 102 101 int rq_ntrans; 103 102 104 - #if defined(CONFIG_NFS_V4_1) 103 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 105 104 struct list_head rq_bc_list; /* Callback service list */ 106 105 unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ 107 106 struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ 108 - #endif /* CONFIG_NFS_V4_1 */ 107 + #endif /* CONFIG_SUNRPC_BACKCHANEL */ 109 108 }; 110 109 #define rq_svec rq_snd_buf.head 111 110 #define rq_slen rq_snd_buf.len 112 111 113 112 struct rpc_xprt_ops { 114 113 void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); 115 - int (*reserve_xprt)(struct rpc_task *task); 114 + int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); 116 115 void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); 117 116 void (*rpcbind)(struct rpc_task *task); 118 117 void (*set_port)(struct rpc_xprt *xprt, unsigned short port); ··· 165 164 166 165 struct rpc_wait_queue binding; /* requests waiting on rpcbind */ 167 166 struct rpc_wait_queue sending; /* requests waiting to send */ 168 - struct rpc_wait_queue resend; /* requests waiting to resend */ 169 167 struct rpc_wait_queue pending; /* requests in flight */ 170 168 struct rpc_wait_queue backlog; /* waiting for slot */ 171 169 struct list_head free; /* free slots */ 172 - struct rpc_rqst * slot; /* slot table storage */ 173 - unsigned int max_reqs; /* total slots */ 170 + unsigned int max_reqs; /* max number of slots */ 171 + unsigned int min_reqs; /* min number of slots */ 172 + atomic_t num_reqs; /* total slots */ 174 173 unsigned long state; /* transport state */ 175 174 unsigned char shutdown : 1, /* being shut down */ 176 175 resvport : 1; /* use a reserved port */ ··· 201 200 u32 xid; /* Next XID value to use */ 202 201 struct rpc_task * snd_task; /* Task blocked in send */ 203 202 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ 204 - #if defined(CONFIG_NFS_V4_1) 203 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 205 204 struct svc_serv *bc_serv; /* The RPC service which will */ 206 205 /* process the callback */ 207 206 unsigned int bc_alloc_count; /* Total number of preallocs */ ··· 209 208 * items */ 210 209 struct list_head bc_pa_list; /* List of preallocated 211 210 * backchannel rpc_rqst's */ 212 - #endif /* CONFIG_NFS_V4_1 */ 211 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 213 212 struct list_head recv; 214 213 215 214 struct { ··· 229 228 const char *address_strings[RPC_DISPLAY_MAX]; 230 229 }; 231 230 232 - #if defined(CONFIG_NFS_V4_1) 231 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 233 232 /* 234 233 * Backchannel flags 235 234 */ 236 235 #define RPC_BC_PA_IN_USE 0x0001 /* Preallocated backchannel */ 237 236 /* buffer in use */ 238 - #endif /* CONFIG_NFS_V4_1 */ 237 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 239 238 240 - #if defined(CONFIG_NFS_V4_1) 239 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 241 240 static inline int bc_prealloc(struct rpc_rqst *req) 242 241 { 243 242 return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); ··· 247 246 { 248 247 return 0; 249 248 } 250 - #endif /* CONFIG_NFS_V4_1 */ 249 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 251 250 252 251 struct xprt_create { 253 252 int ident; /* XPRT_TRANSPORT identifier */ ··· 272 271 struct rpc_xprt *xprt_create_transport(struct xprt_create *args); 273 272 void xprt_connect(struct rpc_task *task); 274 273 void xprt_reserve(struct rpc_task *task); 275 - int xprt_reserve_xprt(struct rpc_task *task); 276 - int xprt_reserve_xprt_cong(struct rpc_task *task); 274 + int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); 275 + int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); 277 276 int xprt_prepare_transmit(struct rpc_task *task); 278 277 void xprt_transmit(struct rpc_task *task); 279 278 void xprt_end_transmit(struct rpc_task *task); ··· 283 282 void xprt_release(struct rpc_task *task); 284 283 struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); 285 284 void xprt_put(struct rpc_xprt *xprt); 286 - struct rpc_xprt * xprt_alloc(struct net *net, int size, int max_req); 285 + struct rpc_xprt * xprt_alloc(struct net *net, size_t size, 286 + unsigned int num_prealloc, 287 + unsigned int max_req); 287 288 void xprt_free(struct rpc_xprt *); 288 289 289 290 static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) ··· 324 321 #define XPRT_CLOSING (6) 325 322 #define XPRT_CONNECTION_ABORT (7) 326 323 #define XPRT_CONNECTION_CLOSE (8) 327 - #define XPRT_INITIALIZED (9) 328 324 329 325 static inline void xprt_set_connected(struct rpc_xprt *xprt) 330 326 {
+4
net/sunrpc/Kconfig
··· 4 4 config SUNRPC_GSS 5 5 tristate 6 6 7 + config SUNRPC_BACKCHANNEL 8 + bool 9 + depends on SUNRPC 10 + 7 11 config SUNRPC_XPRT_RDMA 8 12 tristate 9 13 depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
+1 -1
net/sunrpc/Makefile
··· 13 13 addr.o rpcb_clnt.o timer.o xdr.o \ 14 14 sunrpc_syms.o cache.o rpc_pipe.o \ 15 15 svc_xprt.o 16 - sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o 16 + sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o 17 17 sunrpc-$(CONFIG_PROC_FS) += stats.o 18 18 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
+2 -5
net/sunrpc/backchannel_rqst.c
··· 29 29 #define RPCDBG_FACILITY RPCDBG_TRANS 30 30 #endif 31 31 32 - #if defined(CONFIG_NFS_V4_1) 33 - 34 32 /* 35 33 * Helper routines that track the number of preallocation elements 36 34 * on the transport. ··· 172 174 dprintk("RPC: setup backchannel transport failed\n"); 173 175 return -1; 174 176 } 175 - EXPORT_SYMBOL(xprt_setup_backchannel); 177 + EXPORT_SYMBOL_GPL(xprt_setup_backchannel); 176 178 177 179 /* 178 180 * Destroys the backchannel preallocated structures. ··· 202 204 dprintk("RPC: backchannel list empty= %s\n", 203 205 list_empty(&xprt->bc_pa_list) ? "true" : "false"); 204 206 } 205 - EXPORT_SYMBOL(xprt_destroy_backchannel); 207 + EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); 206 208 207 209 /* 208 210 * One or more rpc_rqst structure have been preallocated during the ··· 277 279 spin_unlock_bh(&xprt->bc_pa_lock); 278 280 } 279 281 280 - #endif /* CONFIG_NFS_V4_1 */
-3
net/sunrpc/bc_svc.c
··· 27 27 * reply over an existing open connection previously established by the client. 28 28 */ 29 29 30 - #if defined(CONFIG_NFS_V4_1) 31 - 32 30 #include <linux/module.h> 33 31 34 32 #include <linux/sunrpc/xprt.h> ··· 61 63 return ret; 62 64 } 63 65 64 - #endif /* CONFIG_NFS_V4_1 */
+7 -8
net/sunrpc/clnt.c
··· 64 64 static void call_bind(struct rpc_task *task); 65 65 static void call_bind_status(struct rpc_task *task); 66 66 static void call_transmit(struct rpc_task *task); 67 - #if defined(CONFIG_NFS_V4_1) 67 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 68 68 static void call_bc_transmit(struct rpc_task *task); 69 - #endif /* CONFIG_NFS_V4_1 */ 69 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 70 70 static void call_status(struct rpc_task *task); 71 71 static void call_transmit_status(struct rpc_task *task); 72 72 static void call_refresh(struct rpc_task *task); ··· 715 715 } 716 716 EXPORT_SYMBOL_GPL(rpc_call_async); 717 717 718 - #if defined(CONFIG_NFS_V4_1) 718 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 719 719 /** 720 720 * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run 721 721 * rpc_execute against it ··· 758 758 dprintk("RPC: rpc_run_bc_task: task= %p\n", task); 759 759 return task; 760 760 } 761 - #endif /* CONFIG_NFS_V4_1 */ 761 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 762 762 763 763 void 764 764 rpc_call_start(struct rpc_task *task) ··· 1361 1361 } 1362 1362 } 1363 1363 1364 - #if defined(CONFIG_NFS_V4_1) 1364 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 1365 1365 /* 1366 1366 * 5b. Send the backchannel RPC reply. On error, drop the reply. In 1367 1367 * addition, disconnect on connectivity errors. ··· 1425 1425 } 1426 1426 rpc_wake_up_queued_task(&req->rq_xprt->pending, task); 1427 1427 } 1428 - #endif /* CONFIG_NFS_V4_1 */ 1428 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1429 1429 1430 1430 /* 1431 1431 * 6. Sort out the RPC call status ··· 1550 1550 kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode; 1551 1551 __be32 *p; 1552 1552 1553 - dprintk("RPC: %5u call_decode (status %d)\n", 1554 - task->tk_pid, task->tk_status); 1553 + dprint_status(task); 1555 1554 1556 1555 if (task->tk_flags & RPC_CALL_MAJORSEEN) { 1557 1556 if (clnt->cl_chatty)
+29 -9
net/sunrpc/sched.c
··· 97 97 /* 98 98 * Add new request to a priority queue. 99 99 */ 100 - static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task) 100 + static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, 101 + struct rpc_task *task, 102 + unsigned char queue_priority) 101 103 { 102 104 struct list_head *q; 103 105 struct rpc_task *t; 104 106 105 107 INIT_LIST_HEAD(&task->u.tk_wait.links); 106 - q = &queue->tasks[task->tk_priority]; 107 - if (unlikely(task->tk_priority > queue->maxpriority)) 108 + q = &queue->tasks[queue_priority]; 109 + if (unlikely(queue_priority > queue->maxpriority)) 108 110 q = &queue->tasks[queue->maxpriority]; 109 111 list_for_each_entry(t, q, u.tk_wait.list) { 110 112 if (t->tk_owner == task->tk_owner) { ··· 125 123 * improve overall performance. 126 124 * Everyone else gets appended to the queue to ensure proper FIFO behavior. 127 125 */ 128 - static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) 126 + static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, 127 + struct rpc_task *task, 128 + unsigned char queue_priority) 129 129 { 130 130 BUG_ON (RPC_IS_QUEUED(task)); 131 131 132 132 if (RPC_IS_PRIORITY(queue)) 133 - __rpc_add_wait_queue_priority(queue, task); 133 + __rpc_add_wait_queue_priority(queue, task, queue_priority); 134 134 else if (RPC_IS_SWAPPER(task)) 135 135 list_add(&task->u.tk_wait.list, &queue->tasks[0]); 136 136 else ··· 315 311 * NB: An RPC task will only receive interrupt-driven events as long 316 312 * as it's on a wait queue. 317 313 */ 318 - static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, 319 - rpc_action action) 314 + static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, 315 + struct rpc_task *task, 316 + rpc_action action, 317 + unsigned char queue_priority) 320 318 { 321 319 dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", 322 320 task->tk_pid, rpc_qname(q), jiffies); 323 321 324 - __rpc_add_wait_queue(q, task); 322 + __rpc_add_wait_queue(q, task, queue_priority); 325 323 326 324 BUG_ON(task->tk_callback != NULL); 327 325 task->tk_callback = action; ··· 340 334 * Protect the queue operations. 341 335 */ 342 336 spin_lock_bh(&q->lock); 343 - __rpc_sleep_on(q, task, action); 337 + __rpc_sleep_on_priority(q, task, action, task->tk_priority); 344 338 spin_unlock_bh(&q->lock); 345 339 } 346 340 EXPORT_SYMBOL_GPL(rpc_sleep_on); 341 + 342 + void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, 343 + rpc_action action, int priority) 344 + { 345 + /* We shouldn't ever put an inactive task to sleep */ 346 + BUG_ON(!RPC_IS_ACTIVATED(task)); 347 + 348 + /* 349 + * Protect the queue operations. 350 + */ 351 + spin_lock_bh(&q->lock); 352 + __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW); 353 + spin_unlock_bh(&q->lock); 354 + } 347 355 348 356 /** 349 357 * __rpc_do_wake_up_task - wake up a single rpc_task
+3 -3
net/sunrpc/svc.c
··· 1252 1252 } 1253 1253 } 1254 1254 1255 - #if defined(CONFIG_NFS_V4_1) 1255 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 1256 1256 /* 1257 1257 * Process a backchannel RPC request that arrived over an existing 1258 1258 * outbound connection ··· 1300 1300 return 0; 1301 1301 } 1302 1302 } 1303 - EXPORT_SYMBOL(bc_svc_process); 1304 - #endif /* CONFIG_NFS_V4_1 */ 1303 + EXPORT_SYMBOL_GPL(bc_svc_process); 1304 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1305 1305 1306 1306 /* 1307 1307 * Return (transport-specific) limit on the rpc payload.
+7 -7
net/sunrpc/svcsock.c
··· 68 68 static struct svc_xprt *svc_create_socket(struct svc_serv *, int, 69 69 struct net *, struct sockaddr *, 70 70 int, int); 71 - #if defined(CONFIG_NFS_V4_1) 71 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 72 72 static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, 73 73 struct net *, struct sockaddr *, 74 74 int, int); 75 75 static void svc_bc_sock_free(struct svc_xprt *xprt); 76 - #endif /* CONFIG_NFS_V4_1 */ 76 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 77 77 78 78 #ifdef CONFIG_DEBUG_LOCK_ALLOC 79 79 static struct lock_class_key svc_key[2]; ··· 1243 1243 return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags); 1244 1244 } 1245 1245 1246 - #if defined(CONFIG_NFS_V4_1) 1246 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 1247 1247 static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, 1248 1248 struct net *, struct sockaddr *, 1249 1249 int, int); ··· 1284 1284 { 1285 1285 svc_unreg_xprt_class(&svc_tcp_bc_class); 1286 1286 } 1287 - #else /* CONFIG_NFS_V4_1 */ 1287 + #else /* CONFIG_SUNRPC_BACKCHANNEL */ 1288 1288 static void svc_init_bc_xprt_sock(void) 1289 1289 { 1290 1290 } ··· 1292 1292 static void svc_cleanup_bc_xprt_sock(void) 1293 1293 { 1294 1294 } 1295 - #endif /* CONFIG_NFS_V4_1 */ 1295 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1296 1296 1297 1297 static struct svc_xprt_ops svc_tcp_ops = { 1298 1298 .xpo_create = svc_tcp_create, ··· 1623 1623 kfree(svsk); 1624 1624 } 1625 1625 1626 - #if defined(CONFIG_NFS_V4_1) 1626 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 1627 1627 /* 1628 1628 * Create a back channel svc_xprt which shares the fore channel socket. 1629 1629 */ ··· 1662 1662 if (xprt) 1663 1663 kfree(container_of(xprt, struct svc_sock, sk_xprt)); 1664 1664 } 1665 - #endif /* CONFIG_NFS_V4_1 */ 1665 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */
+1 -1
net/sunrpc/xdr.c
··· 126 126 kaddr[buf->page_base + len] = '\0'; 127 127 kunmap_atomic(kaddr, KM_USER0); 128 128 } 129 - EXPORT_SYMBOL(xdr_terminate_string); 129 + EXPORT_SYMBOL_GPL(xdr_terminate_string); 130 130 131 131 void 132 132 xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
+170 -87
net/sunrpc/xprt.c
··· 62 62 /* 63 63 * Local functions 64 64 */ 65 + static void xprt_init(struct rpc_xprt *xprt, struct net *net); 65 66 static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); 66 67 static void xprt_connect_status(struct rpc_task *task); 67 68 static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); ··· 192 191 * transport connects from colliding with writes. No congestion control 193 192 * is provided. 194 193 */ 195 - int xprt_reserve_xprt(struct rpc_task *task) 194 + int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) 196 195 { 197 196 struct rpc_rqst *req = task->tk_rqstp; 198 - struct rpc_xprt *xprt = req->rq_xprt; 197 + int priority; 199 198 200 199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 201 200 if (task == xprt->snd_task) ··· 203 202 goto out_sleep; 204 203 } 205 204 xprt->snd_task = task; 206 - req->rq_bytes_sent = 0; 207 - req->rq_ntrans++; 205 + if (req != NULL) { 206 + req->rq_bytes_sent = 0; 207 + req->rq_ntrans++; 208 + } 208 209 209 210 return 1; 210 211 ··· 215 212 task->tk_pid, xprt); 216 213 task->tk_timeout = 0; 217 214 task->tk_status = -EAGAIN; 218 - if (req->rq_ntrans) 219 - rpc_sleep_on(&xprt->resend, task, NULL); 215 + if (req == NULL) 216 + priority = RPC_PRIORITY_LOW; 217 + else if (!req->rq_ntrans) 218 + priority = RPC_PRIORITY_NORMAL; 220 219 else 221 - rpc_sleep_on(&xprt->sending, task, NULL); 220 + priority = RPC_PRIORITY_HIGH; 221 + rpc_sleep_on_priority(&xprt->sending, task, NULL, priority); 222 222 return 0; 223 223 } 224 224 EXPORT_SYMBOL_GPL(xprt_reserve_xprt); ··· 245 239 * integrated into the decision of whether a request is allowed to be 246 240 * woken up and given access to the transport. 247 241 */ 248 - int xprt_reserve_xprt_cong(struct rpc_task *task) 242 + int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) 249 243 { 250 - struct rpc_xprt *xprt = task->tk_xprt; 251 244 struct rpc_rqst *req = task->tk_rqstp; 245 + int priority; 252 246 253 247 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 254 248 if (task == xprt->snd_task) 255 249 return 1; 256 250 goto out_sleep; 257 251 } 252 + if (req == NULL) { 253 + xprt->snd_task = task; 254 + return 1; 255 + } 258 256 if (__xprt_get_cong(xprt, task)) { 259 257 xprt->snd_task = task; 260 - if (req) { 261 - req->rq_bytes_sent = 0; 262 - req->rq_ntrans++; 263 - } 258 + req->rq_bytes_sent = 0; 259 + req->rq_ntrans++; 264 260 return 1; 265 261 } 266 262 xprt_clear_locked(xprt); ··· 270 262 dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); 271 263 task->tk_timeout = 0; 272 264 task->tk_status = -EAGAIN; 273 - if (req && req->rq_ntrans) 274 - rpc_sleep_on(&xprt->resend, task, NULL); 265 + if (req == NULL) 266 + priority = RPC_PRIORITY_LOW; 267 + else if (!req->rq_ntrans) 268 + priority = RPC_PRIORITY_NORMAL; 275 269 else 276 - rpc_sleep_on(&xprt->sending, task, NULL); 270 + priority = RPC_PRIORITY_HIGH; 271 + rpc_sleep_on_priority(&xprt->sending, task, NULL, priority); 277 272 return 0; 278 273 } 279 274 EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); ··· 286 275 int retval; 287 276 288 277 spin_lock_bh(&xprt->transport_lock); 289 - retval = xprt->ops->reserve_xprt(task); 278 + retval = xprt->ops->reserve_xprt(xprt, task); 290 279 spin_unlock_bh(&xprt->transport_lock); 291 280 return retval; 292 281 } ··· 299 288 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 300 289 return; 301 290 302 - task = rpc_wake_up_next(&xprt->resend); 303 - if (!task) { 304 - task = rpc_wake_up_next(&xprt->sending); 305 - if (!task) 306 - goto out_unlock; 307 - } 291 + task = rpc_wake_up_next(&xprt->sending); 292 + if (task == NULL) 293 + goto out_unlock; 308 294 309 295 req = task->tk_rqstp; 310 296 xprt->snd_task = task; ··· 318 310 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) 319 311 { 320 312 struct rpc_task *task; 313 + struct rpc_rqst *req; 321 314 322 315 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 323 316 return; 324 317 if (RPCXPRT_CONGESTED(xprt)) 325 318 goto out_unlock; 326 - task = rpc_wake_up_next(&xprt->resend); 327 - if (!task) { 328 - task = rpc_wake_up_next(&xprt->sending); 329 - if (!task) 330 - goto out_unlock; 319 + task = rpc_wake_up_next(&xprt->sending); 320 + if (task == NULL) 321 + goto out_unlock; 322 + 323 + req = task->tk_rqstp; 324 + if (req == NULL) { 325 + xprt->snd_task = task; 326 + return; 331 327 } 332 328 if (__xprt_get_cong(xprt, task)) { 333 - struct rpc_rqst *req = task->tk_rqstp; 334 329 xprt->snd_task = task; 335 - if (req) { 336 - req->rq_bytes_sent = 0; 337 - req->rq_ntrans++; 338 - } 330 + req->rq_bytes_sent = 0; 331 + req->rq_ntrans++; 339 332 return; 340 333 } 341 334 out_unlock: ··· 861 852 err = req->rq_reply_bytes_recvd; 862 853 goto out_unlock; 863 854 } 864 - if (!xprt->ops->reserve_xprt(task)) 855 + if (!xprt->ops->reserve_xprt(xprt, task)) 865 856 err = -EAGAIN; 866 857 out_unlock: 867 858 spin_unlock_bh(&xprt->transport_lock); ··· 937 928 spin_unlock_bh(&xprt->transport_lock); 938 929 } 939 930 931 + static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags) 932 + { 933 + struct rpc_rqst *req = ERR_PTR(-EAGAIN); 934 + 935 + if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs)) 936 + goto out; 937 + req = kzalloc(sizeof(struct rpc_rqst), gfp_flags); 938 + if (req != NULL) 939 + goto out; 940 + atomic_dec(&xprt->num_reqs); 941 + req = ERR_PTR(-ENOMEM); 942 + out: 943 + return req; 944 + } 945 + 946 + static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 947 + { 948 + if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) { 949 + kfree(req); 950 + return true; 951 + } 952 + return false; 953 + } 954 + 940 955 static void xprt_alloc_slot(struct rpc_task *task) 941 956 { 942 957 struct rpc_xprt *xprt = task->tk_xprt; 958 + struct rpc_rqst *req; 943 959 944 - task->tk_status = 0; 945 - if (task->tk_rqstp) 946 - return; 947 960 if (!list_empty(&xprt->free)) { 948 - struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); 949 - list_del_init(&req->rq_list); 950 - task->tk_rqstp = req; 951 - xprt_request_init(task, xprt); 952 - return; 961 + req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); 962 + list_del(&req->rq_list); 963 + goto out_init_req; 953 964 } 954 - dprintk("RPC: waiting for request slot\n"); 965 + req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT); 966 + if (!IS_ERR(req)) 967 + goto out_init_req; 968 + switch (PTR_ERR(req)) { 969 + case -ENOMEM: 970 + rpc_delay(task, HZ >> 2); 971 + dprintk("RPC: dynamic allocation of request slot " 972 + "failed! Retrying\n"); 973 + break; 974 + case -EAGAIN: 975 + rpc_sleep_on(&xprt->backlog, task, NULL); 976 + dprintk("RPC: waiting for request slot\n"); 977 + } 955 978 task->tk_status = -EAGAIN; 956 - task->tk_timeout = 0; 957 - rpc_sleep_on(&xprt->backlog, task, NULL); 979 + return; 980 + out_init_req: 981 + task->tk_status = 0; 982 + task->tk_rqstp = req; 983 + xprt_request_init(task, xprt); 958 984 } 959 985 960 986 static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 961 987 { 988 + if (xprt_dynamic_free_slot(xprt, req)) 989 + return; 990 + 962 991 memset(req, 0, sizeof(*req)); /* mark unused */ 963 992 964 993 spin_lock(&xprt->reserve_lock); ··· 1005 958 spin_unlock(&xprt->reserve_lock); 1006 959 } 1007 960 1008 - struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) 961 + static void xprt_free_all_slots(struct rpc_xprt *xprt) 962 + { 963 + struct rpc_rqst *req; 964 + while (!list_empty(&xprt->free)) { 965 + req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list); 966 + list_del(&req->rq_list); 967 + kfree(req); 968 + } 969 + } 970 + 971 + struct rpc_xprt *xprt_alloc(struct net *net, size_t size, 972 + unsigned int num_prealloc, 973 + unsigned int max_alloc) 1009 974 { 1010 975 struct rpc_xprt *xprt; 976 + struct rpc_rqst *req; 977 + int i; 1011 978 1012 979 xprt = kzalloc(size, GFP_KERNEL); 1013 980 if (xprt == NULL) 1014 981 goto out; 1015 - atomic_set(&xprt->count, 1); 1016 982 1017 - xprt->max_reqs = max_req; 1018 - xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); 1019 - if (xprt->slot == NULL) 983 + xprt_init(xprt, net); 984 + 985 + for (i = 0; i < num_prealloc; i++) { 986 + req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); 987 + if (!req) 988 + break; 989 + list_add(&req->rq_list, &xprt->free); 990 + } 991 + if (i < num_prealloc) 1020 992 goto out_free; 993 + if (max_alloc > num_prealloc) 994 + xprt->max_reqs = max_alloc; 995 + else 996 + xprt->max_reqs = num_prealloc; 997 + xprt->min_reqs = num_prealloc; 998 + atomic_set(&xprt->num_reqs, num_prealloc); 1021 999 1022 - xprt->xprt_net = get_net(net); 1023 1000 return xprt; 1024 1001 1025 1002 out_free: 1026 - kfree(xprt); 1003 + xprt_free(xprt); 1027 1004 out: 1028 1005 return NULL; 1029 1006 } ··· 1056 985 void xprt_free(struct rpc_xprt *xprt) 1057 986 { 1058 987 put_net(xprt->xprt_net); 1059 - kfree(xprt->slot); 988 + xprt_free_all_slots(xprt); 1060 989 kfree(xprt); 1061 990 } 1062 991 EXPORT_SYMBOL_GPL(xprt_free); ··· 1072 1001 { 1073 1002 struct rpc_xprt *xprt = task->tk_xprt; 1074 1003 1075 - task->tk_status = -EIO; 1004 + task->tk_status = 0; 1005 + if (task->tk_rqstp != NULL) 1006 + return; 1007 + 1008 + /* Note: grabbing the xprt_lock_write() here is not strictly needed, 1009 + * but ensures that we throttle new slot allocation if the transport 1010 + * is congested (e.g. if reconnecting or if we're out of socket 1011 + * write buffer space). 1012 + */ 1013 + task->tk_timeout = 0; 1014 + task->tk_status = -EAGAIN; 1015 + if (!xprt_lock_write(xprt, task)) 1016 + return; 1017 + 1076 1018 spin_lock(&xprt->reserve_lock); 1077 1019 xprt_alloc_slot(task); 1078 1020 spin_unlock(&xprt->reserve_lock); 1021 + xprt_release_write(xprt, task); 1079 1022 } 1080 1023 1081 1024 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) ··· 1106 1021 { 1107 1022 struct rpc_rqst *req = task->tk_rqstp; 1108 1023 1024 + INIT_LIST_HEAD(&req->rq_list); 1109 1025 req->rq_timeout = task->tk_client->cl_timeout->to_initval; 1110 1026 req->rq_task = task; 1111 1027 req->rq_xprt = xprt; ··· 1159 1073 xprt_free_bc_request(req); 1160 1074 } 1161 1075 1076 + static void xprt_init(struct rpc_xprt *xprt, struct net *net) 1077 + { 1078 + atomic_set(&xprt->count, 1); 1079 + 1080 + spin_lock_init(&xprt->transport_lock); 1081 + spin_lock_init(&xprt->reserve_lock); 1082 + 1083 + INIT_LIST_HEAD(&xprt->free); 1084 + INIT_LIST_HEAD(&xprt->recv); 1085 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 1086 + spin_lock_init(&xprt->bc_pa_lock); 1087 + INIT_LIST_HEAD(&xprt->bc_pa_list); 1088 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1089 + 1090 + xprt->last_used = jiffies; 1091 + xprt->cwnd = RPC_INITCWND; 1092 + xprt->bind_index = 0; 1093 + 1094 + rpc_init_wait_queue(&xprt->binding, "xprt_binding"); 1095 + rpc_init_wait_queue(&xprt->pending, "xprt_pending"); 1096 + rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending"); 1097 + rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); 1098 + 1099 + xprt_init_xid(xprt); 1100 + 1101 + xprt->xprt_net = get_net(net); 1102 + } 1103 + 1162 1104 /** 1163 1105 * xprt_create_transport - create an RPC transport 1164 1106 * @args: rpc transport creation arguments ··· 1195 1081 struct rpc_xprt *xprt_create_transport(struct xprt_create *args) 1196 1082 { 1197 1083 struct rpc_xprt *xprt; 1198 - struct rpc_rqst *req; 1199 1084 struct xprt_class *t; 1200 1085 1201 1086 spin_lock(&xprt_list_lock); ··· 1213 1100 if (IS_ERR(xprt)) { 1214 1101 dprintk("RPC: xprt_create_transport: failed, %ld\n", 1215 1102 -PTR_ERR(xprt)); 1216 - return xprt; 1103 + goto out; 1217 1104 } 1218 - if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state)) 1219 - /* ->setup returned a pre-initialized xprt: */ 1220 - return xprt; 1221 - 1222 - spin_lock_init(&xprt->transport_lock); 1223 - spin_lock_init(&xprt->reserve_lock); 1224 - 1225 - INIT_LIST_HEAD(&xprt->free); 1226 - INIT_LIST_HEAD(&xprt->recv); 1227 - #if defined(CONFIG_NFS_V4_1) 1228 - spin_lock_init(&xprt->bc_pa_lock); 1229 - INIT_LIST_HEAD(&xprt->bc_pa_list); 1230 - #endif /* CONFIG_NFS_V4_1 */ 1231 - 1232 1105 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1233 1106 if (xprt_has_timer(xprt)) 1234 1107 setup_timer(&xprt->timer, xprt_init_autodisconnect, 1235 1108 (unsigned long)xprt); 1236 1109 else 1237 1110 init_timer(&xprt->timer); 1238 - xprt->last_used = jiffies; 1239 - xprt->cwnd = RPC_INITCWND; 1240 - xprt->bind_index = 0; 1241 - 1242 - rpc_init_wait_queue(&xprt->binding, "xprt_binding"); 1243 - rpc_init_wait_queue(&xprt->pending, "xprt_pending"); 1244 - rpc_init_wait_queue(&xprt->sending, "xprt_sending"); 1245 - rpc_init_wait_queue(&xprt->resend, "xprt_resend"); 1246 - rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); 1247 - 1248 - /* initialize free list */ 1249 - for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) 1250 - list_add(&req->rq_list, &xprt->free); 1251 - 1252 - xprt_init_xid(xprt); 1253 - 1254 1111 dprintk("RPC: created transport %p with %u slots\n", xprt, 1255 1112 xprt->max_reqs); 1113 + out: 1256 1114 return xprt; 1257 1115 } 1258 1116 ··· 1241 1157 rpc_destroy_wait_queue(&xprt->binding); 1242 1158 rpc_destroy_wait_queue(&xprt->pending); 1243 1159 rpc_destroy_wait_queue(&xprt->sending); 1244 - rpc_destroy_wait_queue(&xprt->resend); 1245 1160 rpc_destroy_wait_queue(&xprt->backlog); 1246 1161 cancel_work_sync(&xprt->task_cleanup); 1247 1162 /*
+3 -3
net/sunrpc/xprtrdma/transport.c
··· 283 283 } 284 284 285 285 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 286 + xprt_rdma_slot_table_entries, 286 287 xprt_rdma_slot_table_entries); 287 288 if (xprt == NULL) { 288 289 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", ··· 453 452 } 454 453 455 454 static int 456 - xprt_rdma_reserve_xprt(struct rpc_task *task) 455 + xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) 457 456 { 458 - struct rpc_xprt *xprt = task->tk_xprt; 459 457 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 460 458 int credits = atomic_read(&r_xprt->rx_buf.rb_credits); 461 459 ··· 466 466 BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); 467 467 } 468 468 xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; 469 - return xprt_reserve_xprt_cong(task); 469 + return xprt_reserve_xprt_cong(xprt, task); 470 470 } 471 471 472 472 /*
+1 -1
net/sunrpc/xprtrdma/xprt_rdma.h
··· 109 109 */ 110 110 111 111 /* temporary static scatter/gather max */ 112 - #define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ 112 + #define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */ 113 113 #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ 114 114 #define MAX_RPCRDMAHDR (\ 115 115 /* max supported RPC/RDMA header */ \
+47 -10
net/sunrpc/xprtsock.c
··· 37 37 #include <linux/sunrpc/svcsock.h> 38 38 #include <linux/sunrpc/xprtsock.h> 39 39 #include <linux/file.h> 40 - #ifdef CONFIG_NFS_V4_1 40 + #ifdef CONFIG_SUNRPC_BACKCHANNEL 41 41 #include <linux/sunrpc/bc_xprt.h> 42 42 #endif 43 43 ··· 54 54 * xprtsock tunables 55 55 */ 56 56 unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; 57 - unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; 57 + unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; 58 + unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; 58 59 59 60 unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; 60 61 unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; ··· 76 75 77 76 static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; 78 77 static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; 78 + static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT; 79 79 static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; 80 80 static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; 81 81 ··· 104 102 .proc_handler = proc_dointvec_minmax, 105 103 .extra1 = &min_slot_table_size, 106 104 .extra2 = &max_slot_table_size 105 + }, 106 + { 107 + .procname = "tcp_max_slot_table_entries", 108 + .data = &xprt_max_tcp_slot_table_entries, 109 + .maxlen = sizeof(unsigned int), 110 + .mode = 0644, 111 + .proc_handler = proc_dointvec_minmax, 112 + .extra1 = &min_slot_table_size, 113 + .extra2 = &max_tcp_slot_table_limit 107 114 }, 108 115 { 109 116 .procname = "min_resvport", ··· 766 755 if (task == NULL) 767 756 goto out_release; 768 757 req = task->tk_rqstp; 758 + if (req == NULL) 759 + goto out_release; 769 760 if (req->rq_bytes_sent == 0) 770 761 goto out_release; 771 762 if (req->rq_bytes_sent == req->rq_snd_buf.len) ··· 1249 1236 return 0; 1250 1237 } 1251 1238 1252 - #if defined(CONFIG_NFS_V4_1) 1239 + #if defined(CONFIG_SUNRPC_BACKCHANNEL) 1253 1240 /* 1254 1241 * Obtains an rpc_rqst previously allocated and invokes the common 1255 1242 * tcp read code to read the data. The result is placed in the callback ··· 1312 1299 { 1313 1300 return xs_tcp_read_reply(xprt, desc); 1314 1301 } 1315 - #endif /* CONFIG_NFS_V4_1 */ 1302 + #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1316 1303 1317 1304 /* 1318 1305 * Read data off the transport. This can be either an RPC_CALL or an ··· 2502 2489 } 2503 2490 2504 2491 static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2505 - unsigned int slot_table_size) 2492 + unsigned int slot_table_size, 2493 + unsigned int max_slot_table_size) 2506 2494 { 2507 2495 struct rpc_xprt *xprt; 2508 2496 struct sock_xprt *new; ··· 2513 2499 return ERR_PTR(-EBADF); 2514 2500 } 2515 2501 2516 - xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size); 2502 + xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size, 2503 + max_slot_table_size); 2517 2504 if (xprt == NULL) { 2518 2505 dprintk("RPC: xs_setup_xprt: couldn't allocate " 2519 2506 "rpc_xprt\n"); ··· 2556 2541 struct rpc_xprt *xprt; 2557 2542 struct rpc_xprt *ret; 2558 2543 2559 - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2544 + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, 2545 + xprt_max_tcp_slot_table_entries); 2560 2546 if (IS_ERR(xprt)) 2561 2547 return xprt; 2562 2548 transport = container_of(xprt, struct sock_xprt, xprt); ··· 2621 2605 struct sock_xprt *transport; 2622 2606 struct rpc_xprt *ret; 2623 2607 2624 - xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); 2608 + xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries, 2609 + xprt_udp_slot_table_entries); 2625 2610 if (IS_ERR(xprt)) 2626 2611 return xprt; 2627 2612 transport = container_of(xprt, struct sock_xprt, xprt); ··· 2698 2681 struct sock_xprt *transport; 2699 2682 struct rpc_xprt *ret; 2700 2683 2701 - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2684 + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, 2685 + xprt_max_tcp_slot_table_entries); 2702 2686 if (IS_ERR(xprt)) 2703 2687 return xprt; 2704 2688 transport = container_of(xprt, struct sock_xprt, xprt); ··· 2778 2760 */ 2779 2761 return args->bc_xprt->xpt_bc_xprt; 2780 2762 } 2781 - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2763 + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, 2764 + xprt_tcp_slot_table_entries); 2782 2765 if (IS_ERR(xprt)) 2783 2766 return xprt; 2784 2767 transport = container_of(xprt, struct sock_xprt, xprt); ··· 2966 2947 #define param_check_slot_table_size(name, p) \ 2967 2948 __param_check(name, p, unsigned int); 2968 2949 2950 + static int param_set_max_slot_table_size(const char *val, 2951 + const struct kernel_param *kp) 2952 + { 2953 + return param_set_uint_minmax(val, kp, 2954 + RPC_MIN_SLOT_TABLE, 2955 + RPC_MAX_SLOT_TABLE_LIMIT); 2956 + } 2957 + 2958 + static struct kernel_param_ops param_ops_max_slot_table_size = { 2959 + .set = param_set_max_slot_table_size, 2960 + .get = param_get_uint, 2961 + }; 2962 + 2963 + #define param_check_max_slot_table_size(name, p) \ 2964 + __param_check(name, p, unsigned int); 2965 + 2969 2966 module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, 2970 2967 slot_table_size, 0644); 2968 + module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries, 2969 + max_slot_table_size, 0644); 2971 2970 module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, 2972 2971 slot_table_size, 0644); 2973 2972