Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFSD introduce async copy feature

Upon receiving a request for async copy, create a new kthread. If we
get asynchronous request, make sure to copy the needed arguments/state
from the stack before starting the copy. Then start the thread and reply
back to the client indicating copy is asynchronous.

nfsd_copy_file_range() will copy in a loop over the total number of
bytes is needed to copy. In case a failure happens in the middle, we
ignore the error and return how much we copied so far. Once done
creating a workitem for the callback workqueue and send CB_OFFLOAD with
the results.

The lifetime of the copy stateid is bound to the vfs copy. This way we
don't need to keep the nfsd_net structure for the callback. We could
keep it around longer so that an OFFLOAD_STATUS that came late would
still get results, but clients should be able to deal without that.

We handle OFFLOAD_CANCEL by sending a signal to the copy thread and
calling kthread_stop.

A client should cancel any ongoing copies before calling DESTROY_CLIENT;
if not, we return a CLIENT_BUSY error.

If the client is destroyed for some other reason (lease expiration, or
server shutdown), we must clean up any ongoing copies ourselves.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
[colin.king@canonical.com: fix leak in error case]
[bfields@fieldses.org: remove signalling, merge patches]
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

authored by

Olga Kornievskaia and committed by
J. Bruce Fields
e0639dc5 885e2bf3

+327 -25
+8
fs/nfsd/netns.h
··· 123 123 124 124 wait_queue_head_t ntf_wq; 125 125 atomic_t ntf_refcnt; 126 + 127 + /* 128 + * clientid and stateid data for construction of net unique COPY 129 + * stateids. 130 + */ 131 + u32 s2s_cp_cl_id; 132 + struct idr s2s_cp_stateids; 133 + spinlock_t s2s_cp_lock; 126 134 }; 127 135 128 136 /* Simple check to find out if a given net was properly initialized */
+242 -19
fs/nfsd/nfs4proc.c
··· 36 36 #include <linux/file.h> 37 37 #include <linux/falloc.h> 38 38 #include <linux/slab.h> 39 + #include <linux/kthread.h> 39 40 40 41 #include "idmap.h" 41 42 #include "cache.h" ··· 1090 1089 return status; 1091 1090 } 1092 1091 1092 + void nfs4_put_copy(struct nfsd4_copy *copy) 1093 + { 1094 + if (!refcount_dec_and_test(&copy->refcount)) 1095 + return; 1096 + kfree(copy); 1097 + } 1098 + 1099 + static bool 1100 + check_and_set_stop_copy(struct nfsd4_copy *copy) 1101 + { 1102 + bool value; 1103 + 1104 + spin_lock(&copy->cp_clp->async_lock); 1105 + value = copy->stopped; 1106 + if (!copy->stopped) 1107 + copy->stopped = true; 1108 + spin_unlock(&copy->cp_clp->async_lock); 1109 + return value; 1110 + } 1111 + 1112 + static void nfsd4_stop_copy(struct nfsd4_copy *copy) 1113 + { 1114 + /* only 1 thread should stop the copy */ 1115 + if (!check_and_set_stop_copy(copy)) 1116 + kthread_stop(copy->copy_task); 1117 + nfs4_put_copy(copy); 1118 + } 1119 + 1120 + static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp) 1121 + { 1122 + struct nfsd4_copy *copy = NULL; 1123 + 1124 + spin_lock(&clp->async_lock); 1125 + if (!list_empty(&clp->async_copies)) { 1126 + copy = list_first_entry(&clp->async_copies, struct nfsd4_copy, 1127 + copies); 1128 + refcount_inc(&copy->refcount); 1129 + } 1130 + spin_unlock(&clp->async_lock); 1131 + return copy; 1132 + } 1133 + 1134 + void nfsd4_shutdown_copy(struct nfs4_client *clp) 1135 + { 1136 + struct nfsd4_copy *copy; 1137 + 1138 + while ((copy = nfsd4_get_copy(clp)) != NULL) 1139 + nfsd4_stop_copy(copy); 1140 + } 1141 + 1142 + static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) 1143 + { 1144 + struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb); 1145 + 1146 + nfs4_put_copy(copy); 1147 + } 1148 + 1149 + static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, 1150 + struct rpc_task *task) 1151 + { 1152 + return 1; 1153 + } 1154 + 1155 + static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = { 1156 + .release = nfsd4_cb_offload_release, 1157 + .done = nfsd4_cb_offload_done 1158 + }; 1159 + 1160 + static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) 1161 + { 1162 + copy->cp_res.wr_stable_how = NFS_UNSTABLE; 1163 + copy->cp_synchronous = sync; 1164 + gen_boot_verifier(&copy->cp_res.wr_verifier, copy->cp_clp->net); 1165 + } 1166 + 1167 + static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) 1168 + { 1169 + ssize_t bytes_copied = 0; 1170 + size_t bytes_total = copy->cp_count; 1171 + u64 src_pos = copy->cp_src_pos; 1172 + u64 dst_pos = copy->cp_dst_pos; 1173 + 1174 + do { 1175 + if (kthread_should_stop()) 1176 + break; 1177 + bytes_copied = nfsd_copy_file_range(copy->file_src, src_pos, 1178 + copy->file_dst, dst_pos, bytes_total); 1179 + if (bytes_copied <= 0) 1180 + break; 1181 + bytes_total -= bytes_copied; 1182 + copy->cp_res.wr_bytes_written += bytes_copied; 1183 + src_pos += bytes_copied; 1184 + dst_pos += bytes_copied; 1185 + } while (bytes_total > 0 && !copy->cp_synchronous); 1186 + return bytes_copied; 1187 + } 1188 + 1189 + static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) 1190 + { 1191 + __be32 status; 1192 + ssize_t bytes; 1193 + 1194 + bytes = _nfsd_copy_file_range(copy); 1195 + /* for async copy, we ignore the error, client can always retry 1196 + * to get the error 1197 + */ 1198 + if (bytes < 0 && !copy->cp_res.wr_bytes_written) 1199 + status = nfserrno(bytes); 1200 + else { 1201 + nfsd4_init_copy_res(copy, sync); 1202 + status = nfs_ok; 1203 + } 1204 + 1205 + fput(copy->file_src); 1206 + fput(copy->file_dst); 1207 + return status; 1208 + } 1209 + 1210 + static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) 1211 + { 1212 + dst->cp_src_pos = src->cp_src_pos; 1213 + dst->cp_dst_pos = src->cp_dst_pos; 1214 + dst->cp_count = src->cp_count; 1215 + dst->cp_synchronous = src->cp_synchronous; 1216 + memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res)); 1217 + memcpy(&dst->fh, &src->fh, sizeof(src->fh)); 1218 + dst->cp_clp = src->cp_clp; 1219 + dst->file_dst = get_file(src->file_dst); 1220 + dst->file_src = get_file(src->file_src); 1221 + memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); 1222 + } 1223 + 1224 + static void cleanup_async_copy(struct nfsd4_copy *copy) 1225 + { 1226 + nfs4_free_cp_state(copy); 1227 + fput(copy->file_dst); 1228 + fput(copy->file_src); 1229 + spin_lock(&copy->cp_clp->async_lock); 1230 + list_del(&copy->copies); 1231 + spin_unlock(&copy->cp_clp->async_lock); 1232 + nfs4_put_copy(copy); 1233 + } 1234 + 1235 + static int nfsd4_do_async_copy(void *data) 1236 + { 1237 + struct nfsd4_copy *copy = (struct nfsd4_copy *)data; 1238 + struct nfsd4_copy *cb_copy; 1239 + 1240 + copy->nfserr = nfsd4_do_copy(copy, 0); 1241 + cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); 1242 + if (!cb_copy) 1243 + goto out; 1244 + memcpy(&cb_copy->cp_res, &copy->cp_res, sizeof(copy->cp_res)); 1245 + cb_copy->cp_clp = copy->cp_clp; 1246 + cb_copy->nfserr = copy->nfserr; 1247 + memcpy(&cb_copy->fh, &copy->fh, sizeof(copy->fh)); 1248 + nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp, 1249 + &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); 1250 + nfsd4_run_cb(&cb_copy->cp_cb); 1251 + out: 1252 + cleanup_async_copy(copy); 1253 + return 0; 1254 + } 1255 + 1093 1256 static __be32 1094 1257 nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1095 1258 union nfsd4_op_u *u) 1096 1259 { 1097 1260 struct nfsd4_copy *copy = &u->copy; 1098 - struct file *src, *dst; 1099 1261 __be32 status; 1100 - ssize_t bytes; 1262 + struct nfsd4_copy *async_copy = NULL; 1101 1263 1102 - status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid, &src, 1103 - &copy->cp_dst_stateid, &dst); 1264 + status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid, 1265 + &copy->file_src, &copy->cp_dst_stateid, 1266 + &copy->file_dst); 1104 1267 if (status) 1105 1268 goto out; 1106 1269 1107 - bytes = nfsd_copy_file_range(src, copy->cp_src_pos, 1108 - dst, copy->cp_dst_pos, copy->cp_count); 1270 + copy->cp_clp = cstate->clp; 1271 + memcpy(&copy->fh, &cstate->current_fh.fh_handle, 1272 + sizeof(struct knfsd_fh)); 1273 + if (!copy->cp_synchronous) { 1274 + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 1109 1275 1110 - if (bytes < 0) 1111 - status = nfserrno(bytes); 1112 - else { 1113 - copy->cp_res.wr_bytes_written = bytes; 1114 - copy->cp_res.wr_stable_how = NFS_UNSTABLE; 1115 - copy->cp_synchronous = 1; 1116 - gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp)); 1276 + status = nfserrno(-ENOMEM); 1277 + async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); 1278 + if (!async_copy) 1279 + goto out; 1280 + if (!nfs4_init_cp_state(nn, copy)) { 1281 + kfree(async_copy); 1282 + goto out; 1283 + } 1284 + refcount_set(&async_copy->refcount, 1); 1285 + memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid, 1286 + sizeof(copy->cp_stateid)); 1287 + dup_copy_fields(copy, async_copy); 1288 + async_copy->copy_task = kthread_create(nfsd4_do_async_copy, 1289 + async_copy, "%s", "copy thread"); 1290 + if (IS_ERR(async_copy->copy_task)) 1291 + goto out_err; 1292 + spin_lock(&async_copy->cp_clp->async_lock); 1293 + list_add(&async_copy->copies, 1294 + &async_copy->cp_clp->async_copies); 1295 + spin_unlock(&async_copy->cp_clp->async_lock); 1296 + wake_up_process(async_copy->copy_task); 1117 1297 status = nfs_ok; 1118 - } 1119 - 1120 - fput(src); 1121 - fput(dst); 1298 + } else 1299 + status = nfsd4_do_copy(copy, 1); 1122 1300 out: 1123 1301 return status; 1302 + out_err: 1303 + cleanup_async_copy(async_copy); 1304 + goto out; 1305 + } 1306 + 1307 + struct nfsd4_copy * 1308 + find_async_copy(struct nfs4_client *clp, stateid_t *stateid) 1309 + { 1310 + struct nfsd4_copy *copy; 1311 + 1312 + spin_lock(&clp->async_lock); 1313 + list_for_each_entry(copy, &clp->async_copies, copies) { 1314 + if (memcmp(&copy->cp_stateid, stateid, NFS4_STATEID_SIZE)) 1315 + continue; 1316 + refcount_inc(&copy->refcount); 1317 + spin_unlock(&clp->async_lock); 1318 + return copy; 1319 + } 1320 + spin_unlock(&clp->async_lock); 1321 + return NULL; 1124 1322 } 1125 1323 1126 1324 static __be32 ··· 1327 1127 struct nfsd4_compound_state *cstate, 1328 1128 union nfsd4_op_u *u) 1329 1129 { 1330 - return 0; 1130 + struct nfsd4_offload_status *os = &u->offload_status; 1131 + __be32 status = 0; 1132 + struct nfsd4_copy *copy; 1133 + struct nfs4_client *clp = cstate->clp; 1134 + 1135 + copy = find_async_copy(clp, &os->stateid); 1136 + if (copy) 1137 + nfsd4_stop_copy(copy); 1138 + else 1139 + status = nfserr_bad_stateid; 1140 + 1141 + return status; 1331 1142 } 1332 1143 1333 1144 static __be32 ··· 1368 1157 struct nfsd4_compound_state *cstate, 1369 1158 union nfsd4_op_u *u) 1370 1159 { 1371 - return nfserr_notsupp; 1160 + struct nfsd4_offload_status *os = &u->offload_status; 1161 + __be32 status = 0; 1162 + struct nfsd4_copy *copy; 1163 + struct nfs4_client *clp = cstate->clp; 1164 + 1165 + copy = find_async_copy(clp, &os->stateid); 1166 + if (copy) { 1167 + os->count = copy->cp_res.wr_bytes_written; 1168 + nfs4_put_copy(copy); 1169 + } else 1170 + status = nfserr_bad_stateid; 1171 + 1172 + return status; 1372 1173 } 1373 1174 1374 1175 static __be32
+37 -1
fs/nfsd/nfs4state.c
··· 713 713 return NULL; 714 714 } 715 715 716 + /* 717 + * Create a unique stateid_t to represent each COPY. 718 + */ 719 + int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy) 720 + { 721 + int new_id; 722 + 723 + idr_preload(GFP_KERNEL); 724 + spin_lock(&nn->s2s_cp_lock); 725 + new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT); 726 + spin_unlock(&nn->s2s_cp_lock); 727 + idr_preload_end(); 728 + if (new_id < 0) 729 + return 0; 730 + copy->cp_stateid.si_opaque.so_id = new_id; 731 + copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time; 732 + copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; 733 + return 1; 734 + } 735 + 736 + void nfs4_free_cp_state(struct nfsd4_copy *copy) 737 + { 738 + struct nfsd_net *nn; 739 + 740 + nn = net_generic(copy->cp_clp->net, nfsd_net_id); 741 + spin_lock(&nn->s2s_cp_lock); 742 + idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id); 743 + spin_unlock(&nn->s2s_cp_lock); 744 + } 745 + 716 746 static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) 717 747 { 718 748 struct nfs4_stid *stid; ··· 1857 1827 #ifdef CONFIG_NFSD_PNFS 1858 1828 INIT_LIST_HEAD(&clp->cl_lo_states); 1859 1829 #endif 1830 + INIT_LIST_HEAD(&clp->async_copies); 1831 + spin_lock_init(&clp->async_lock); 1860 1832 spin_lock_init(&clp->cl_lock); 1861 1833 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 1862 1834 return clp; ··· 1974 1942 } 1975 1943 } 1976 1944 nfsd4_return_all_client_layouts(clp); 1945 + nfsd4_shutdown_copy(clp); 1977 1946 nfsd4_shutdown_callback(clp); 1978 1947 if (clp->cl_cb_conn.cb_xprt) 1979 1948 svc_xprt_put(clp->cl_cb_conn.cb_xprt); ··· 2508 2475 || !list_empty(&clp->cl_lo_states) 2509 2476 #endif 2510 2477 || !list_empty(&clp->cl_delegations) 2511 - || !list_empty(&clp->cl_sessions); 2478 + || !list_empty(&clp->cl_sessions) 2479 + || !list_empty(&clp->async_copies); 2512 2480 } 2513 2481 2514 2482 __be32 ··· 7195 7161 INIT_LIST_HEAD(&nn->close_lru); 7196 7162 INIT_LIST_HEAD(&nn->del_recall_lru); 7197 7163 spin_lock_init(&nn->client_lock); 7164 + spin_lock_init(&nn->s2s_cp_lock); 7165 + idr_init(&nn->s2s_cp_stateids); 7198 7166 7199 7167 spin_lock_init(&nn->blocked_locks_lock); 7200 7168 INIT_LIST_HEAD(&nn->blocked_locks_lru);
+18 -5
fs/nfsd/nfs4xdr.c
··· 4231 4231 #endif /* CONFIG_NFSD_PNFS */ 4232 4232 4233 4233 static __be32 4234 - nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write) 4234 + nfsd42_encode_write_res(struct nfsd4_compoundres *resp, 4235 + struct nfsd42_write_res *write, bool sync) 4235 4236 { 4236 4237 __be32 *p; 4237 - 4238 - p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); 4238 + p = xdr_reserve_space(&resp->xdr, 4); 4239 4239 if (!p) 4240 4240 return nfserr_resource; 4241 4241 4242 - *p++ = cpu_to_be32(0); 4242 + if (sync) 4243 + *p++ = cpu_to_be32(0); 4244 + else { 4245 + __be32 nfserr; 4246 + *p++ = cpu_to_be32(1); 4247 + nfserr = nfsd4_encode_stateid(&resp->xdr, &write->cb_stateid); 4248 + if (nfserr) 4249 + return nfserr; 4250 + } 4251 + p = xdr_reserve_space(&resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); 4252 + if (!p) 4253 + return nfserr_resource; 4254 + 4243 4255 p = xdr_encode_hyper(p, write->wr_bytes_written); 4244 4256 *p++ = cpu_to_be32(write->wr_stable_how); 4245 4257 p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, ··· 4265 4253 { 4266 4254 __be32 *p; 4267 4255 4268 - nfserr = nfsd42_encode_write_res(resp, &copy->cp_res); 4256 + nfserr = nfsd42_encode_write_res(resp, &copy->cp_res, 4257 + copy->cp_synchronous); 4269 4258 if (nfserr) 4270 4259 return nfserr; 4271 4260
+1
fs/nfsd/nfsctl.c
··· 1242 1242 nn->somebody_reclaimed = false; 1243 1243 nn->clverifier_counter = prandom_u32(); 1244 1244 nn->clientid_counter = prandom_u32(); 1245 + nn->s2s_cp_cl_id = nn->clientid_counter++; 1245 1246 1246 1247 atomic_set(&nn->ntf_refcnt, 0); 1247 1248 init_waitqueue_head(&nn->ntf_wq);
+9
fs/nfsd/state.h
··· 355 355 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ 356 356 /* wait here for slots */ 357 357 struct net *net; 358 + struct list_head async_copies; /* list of async copies */ 359 + spinlock_t async_lock; /* lock for async copies */ 358 360 }; 359 361 360 362 /* struct nfs4_client_reset ··· 602 600 603 601 struct nfsd4_compound_state; 604 602 struct nfsd_net; 603 + struct nfsd4_copy; 605 604 606 605 extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, 607 606 struct nfsd4_compound_state *cstate, struct svc_fh *fhp, ··· 612 609 struct nfs4_stid **s, struct nfsd_net *nn); 613 610 struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, 614 611 void (*sc_free)(struct nfs4_stid *)); 612 + int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy); 613 + void nfs4_free_cp_state(struct nfsd4_copy *copy); 615 614 void nfs4_unhash_stid(struct nfs4_stid *s); 616 615 void nfs4_put_stid(struct nfs4_stid *s); 617 616 void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); ··· 632 627 extern int nfsd4_create_callback_queue(void); 633 628 extern void nfsd4_destroy_callback_queue(void); 634 629 extern void nfsd4_shutdown_callback(struct nfs4_client *); 630 + extern void nfsd4_shutdown_copy(struct nfs4_client *clp); 635 631 extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); 636 632 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, 637 633 struct nfsd_net *nn); ··· 640 634 641 635 struct nfs4_file *find_file(struct knfsd_fh *fh); 642 636 void put_nfs4_file(struct nfs4_file *fi); 637 + extern void nfs4_put_copy(struct nfsd4_copy *copy); 638 + extern struct nfsd4_copy * 639 + find_async_copy(struct nfs4_client *clp, stateid_t *staetid); 643 640 static inline void get_nfs4_file(struct nfs4_file *fi) 644 641 { 645 642 refcount_inc(&fi->fi_ref);
+12
fs/nfsd/xdr4.h
··· 532 532 struct nfsd4_callback cp_cb; 533 533 __be32 nfserr; 534 534 struct knfsd_fh fh; 535 + 536 + struct nfs4_client *cp_clp; 537 + 538 + struct file *file_src; 539 + struct file *file_dst; 540 + 541 + stateid_t cp_stateid; 542 + 543 + struct list_head copies; 544 + struct task_struct *copy_task; 545 + refcount_t refcount; 546 + bool stopped; 535 547 }; 536 548 537 549 struct nfsd4_seek {