Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFSv4.1: filelayout async error handler

Use our own async error handler.
Mark the layout as failed and retry i/o through the MDS on specified errors.

Update the mds_offset in nfs_readpage_retry so that a failed short-read retry
to a DS gets correctly resent through the MDS.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

authored by

Andy Adamson and committed by
Trond Myklebust
cbdabc7f dc70d7b3

+123 -6
+1
fs/nfs/internal.h
··· 285 285 #endif 286 286 287 287 /* nfs4proc.c */ 288 + extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); 288 289 extern int nfs4_init_client(struct nfs_client *clp, 289 290 const struct rpc_timeout *timeparms, 290 291 const char *ip_addr,
+81
fs/nfs/nfs4filelayout.c
··· 40 40 MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>"); 41 41 MODULE_DESCRIPTION("The NFSv4 file layout driver"); 42 42 43 + #define FILELAYOUT_POLL_RETRY_MAX (15*HZ) 44 + 43 45 static int 44 46 filelayout_set_layoutdriver(struct nfs_server *nfss) 45 47 { ··· 102 100 BUG(); 103 101 } 104 102 103 + /* For data server errors we don't recover from */ 104 + static void 105 + filelayout_set_lo_fail(struct pnfs_layout_segment *lseg) 106 + { 107 + if (lseg->pls_range.iomode == IOMODE_RW) { 108 + dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); 109 + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); 110 + } else { 111 + dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); 112 + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 113 + } 114 + } 115 + 116 + static int filelayout_async_handle_error(struct rpc_task *task, 117 + struct nfs4_state *state, 118 + struct nfs_client *clp, 119 + int *reset) 120 + { 121 + if (task->tk_status >= 0) 122 + return 0; 123 + 124 + *reset = 0; 125 + 126 + switch (task->tk_status) { 127 + case -NFS4ERR_BADSESSION: 128 + case -NFS4ERR_BADSLOT: 129 + case -NFS4ERR_BAD_HIGH_SLOT: 130 + case -NFS4ERR_DEADSESSION: 131 + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 132 + case -NFS4ERR_SEQ_FALSE_RETRY: 133 + case -NFS4ERR_SEQ_MISORDERED: 134 + dprintk("%s ERROR %d, Reset session. Exchangeid " 135 + "flags 0x%x\n", __func__, task->tk_status, 136 + clp->cl_exchange_flags); 137 + nfs4_schedule_session_recovery(clp->cl_session); 138 + break; 139 + case -NFS4ERR_DELAY: 140 + case -NFS4ERR_GRACE: 141 + case -EKEYEXPIRED: 142 + rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); 143 + break; 144 + default: 145 + dprintk("%s DS error. Retry through MDS %d\n", __func__, 146 + task->tk_status); 147 + *reset = 1; 148 + break; 149 + } 150 + task->tk_status = 0; 151 + return -EAGAIN; 152 + } 153 + 154 + /* NFS_PROTO call done callback routines */ 155 + 156 + static int filelayout_read_done_cb(struct rpc_task *task, 157 + struct nfs_read_data *data) 158 + { 159 + struct nfs_client *clp = data->ds_clp; 160 + int reset = 0; 161 + 162 + dprintk("%s DS read\n", __func__); 163 + 164 + if (filelayout_async_handle_error(task, data->args.context->state, 165 + data->ds_clp, &reset) == -EAGAIN) { 166 + dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", 167 + __func__, data->ds_clp, data->ds_clp->cl_session); 168 + if (reset) { 169 + filelayout_set_lo_fail(data->lseg); 170 + nfs4_reset_read(task, data); 171 + clp = NFS_SERVER(data->inode)->nfs_client; 172 + } 173 + nfs_restart_rpc(task, clp); 174 + return -EAGAIN; 175 + } 176 + 177 + return 0; 178 + } 179 + 105 180 /* 106 181 * Call ops for the async read/write cases 107 182 * In the case of dense layouts, the offset needs to be reset to its ··· 187 108 static void filelayout_read_prepare(struct rpc_task *task, void *data) 188 109 { 189 110 struct nfs_read_data *rdata = (struct nfs_read_data *)data; 111 + 112 + rdata->read_done_cb = filelayout_read_done_cb; 190 113 191 114 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 192 115 &rdata->args.seq_args, &rdata->res.seq_res,
+29 -6
fs/nfs/nfs4proc.c
··· 3074 3074 return err; 3075 3075 } 3076 3076 3077 - static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) 3077 + static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) 3078 3078 { 3079 3079 struct nfs_server *server = NFS_SERVER(data->inode); 3080 - 3081 - dprintk("--> %s\n", __func__); 3082 - 3083 - if (!nfs4_sequence_done(task, &data->res.seq_res)) 3084 - return -EAGAIN; 3085 3080 3086 3081 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3087 3082 nfs_restart_rpc(task, server->nfs_client); ··· 3089 3094 return 0; 3090 3095 } 3091 3096 3097 + static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) 3098 + { 3099 + 3100 + dprintk("--> %s\n", __func__); 3101 + 3102 + if (!nfs4_sequence_done(task, &data->res.seq_res)) 3103 + return -EAGAIN; 3104 + 3105 + return data->read_done_cb(task, data); 3106 + } 3107 + 3092 3108 static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 3093 3109 { 3094 3110 data->timestamp = jiffies; 3111 + data->read_done_cb = nfs4_read_done_cb; 3095 3112 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 3096 3113 } 3114 + 3115 + /* Reset the the nfs_read_data to send the read to the MDS. */ 3116 + void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) 3117 + { 3118 + dprintk("%s Reset task for i/o through\n", __func__); 3119 + put_lseg(data->lseg); 3120 + data->lseg = NULL; 3121 + /* offsets will differ in the dense stripe case */ 3122 + data->args.offset = data->mds_offset; 3123 + data->ds_clp = NULL; 3124 + data->args.fh = NFS_FH(data->inode); 3125 + data->read_done_cb = nfs4_read_done_cb; 3126 + task->tk_ops = data->mds_ops; 3127 + rpc_task_reset_client(task, NFS_CLIENT(data->inode)); 3128 + } 3129 + EXPORT_SYMBOL_GPL(nfs4_reset_read); 3097 3130 3098 3131 static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) 3099 3132 {
+1
fs/nfs/nfs4state.c
··· 1453 1453 { 1454 1454 nfs4_schedule_lease_recovery(session->clp); 1455 1455 } 1456 + EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); 1456 1457 1457 1458 void nfs41_handle_recall_slot(struct nfs_client *clp) 1458 1459 {
+1
fs/nfs/read.c
··· 391 391 return; 392 392 393 393 /* Yes, so retry the read at the end of the data */ 394 + data->mds_offset += resp->count; 394 395 argp->offset += resp->count; 395 396 argp->pgbase += resp->count; 396 397 argp->count -= resp->count;
+1
include/linux/nfs_xdr.h
··· 1020 1020 struct pnfs_layout_segment *lseg; 1021 1021 struct nfs_client *ds_clp; /* pNFS data server */ 1022 1022 const struct rpc_call_ops *mds_ops; 1023 + int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); 1023 1024 __u64 mds_offset; 1024 1025 struct page *page_array[NFS_PAGEVEC_SIZE]; 1025 1026 };
+1
include/linux/sunrpc/clnt.h
··· 129 129 struct rpc_clnt *rpc_create(struct rpc_create_args *args); 130 130 struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, 131 131 struct rpc_program *, u32); 132 + void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); 132 133 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); 133 134 void rpc_shutdown_client(struct rpc_clnt *); 134 135 void rpc_release_client(struct rpc_clnt *);
+8
net/sunrpc/clnt.c
··· 597 597 } 598 598 } 599 599 600 + void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt) 601 + { 602 + rpc_task_release_client(task); 603 + rpc_task_set_client(task, clnt); 604 + } 605 + EXPORT_SYMBOL_GPL(rpc_task_reset_client); 606 + 607 + 600 608 static void 601 609 rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) 602 610 {