Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'pnfs_generic'

* pnfs_generic:
NFSv4.1/pNFS: Cleanup constify struct pnfs_layout_range arguments
NFSv4.1/pnfs: Cleanup copying of pnfs_layout_range structures
NFSv4.1/pNFS: Cleanup pnfs_mark_matching_lsegs_invalid()
NFSv4.1/pNFS: Fix a race in initiate_file_draining()
NFSv4.1/pNFS: pnfs_error_mark_layout_for_return() must always return layout
NFSv4.1/pNFS: pnfs_mark_matching_lsegs_return() should set the iomode
NFSv4.1/pNFS: Use nfs4_stateid_copy for copying stateids
NFSv4.1/pNFS: Don't pass stateids by value to pnfs_send_layoutreturn()
NFS: Relax requirements in nfs_flush_incompatible
NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid
NFS: Allow multiple commit requests in flight per file
NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs
NFSv4: List stateid information in the callback tracepoints
NFSv4.1/pNFS: Don't return NFS4ERR_DELAY unnecessarily in CB_LAYOUTRECALL
NFSv4.1/pNFS: Ensure we enforce RFC5661 Section 12.5.5.2.1
pNFS: If we have to delay the layout callback, mark the layout for return
NFSv4.1/pNFS: Add a helper to mark the layout as returned
pNFS: Ensure nfs4_layoutget_prepare returns the correct error

+298 -119
+45 -7
fs/nfs/callback_proc.c
··· 83 83 84 84 res = htonl(NFS4ERR_BADHANDLE); 85 85 inode = nfs_delegation_find_inode(cps->clp, &args->fh); 86 - if (inode == NULL) 86 + if (inode == NULL) { 87 + trace_nfs4_cb_recall(cps->clp, &args->fh, NULL, 88 + &args->stateid, -ntohl(res)); 87 89 goto out; 90 + } 88 91 /* Set up a helper thread to actually return the delegation */ 89 92 switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { 90 93 case 0: ··· 99 96 default: 100 97 res = htonl(NFS4ERR_RESOURCE); 101 98 } 102 - trace_nfs4_recall_delegation(inode, -ntohl(res)); 99 + trace_nfs4_cb_recall(cps->clp, &args->fh, inode, 100 + &args->stateid, -ntohl(res)); 103 101 iput(inode); 104 102 out: 105 103 dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); ··· 164 160 return lo; 165 161 } 166 162 163 + /* 164 + * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing) 165 + */ 166 + static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo, 167 + const nfs4_stateid *new) 168 + { 169 + u32 oldseq, newseq; 170 + 171 + oldseq = be32_to_cpu(lo->plh_stateid.seqid); 172 + newseq = be32_to_cpu(new->seqid); 173 + 174 + if (newseq > oldseq + 1) 175 + return false; 176 + return true; 177 + } 178 + 167 179 static u32 initiate_file_draining(struct nfs_client *clp, 168 180 struct cb_layoutrecallargs *args) 169 181 { ··· 189 169 LIST_HEAD(free_me_list); 190 170 191 171 lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); 192 - if (!lo) 172 + if (!lo) { 173 + trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL, 174 + &args->cbl_stateid, -rv); 193 175 goto out; 176 + } 194 177 195 178 ino = lo->plh_inode; 196 179 197 180 spin_lock(&ino->i_lock); 181 + if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) { 182 + rv = NFS4ERR_DELAY; 183 + goto unlock; 184 + } 198 185 pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); 199 186 spin_unlock(&ino->i_lock); 200 187 201 188 pnfs_layoutcommit_inode(ino, false); 202 189 203 190 spin_lock(&ino->i_lock); 204 - if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 205 - pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, 206 - &args->cbl_range)) { 191 + /* 192 + * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return) 193 + */ 194 + if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 207 195 rv = NFS4ERR_DELAY; 196 + goto unlock; 197 + } 198 + 199 + if (pnfs_mark_matching_lsegs_return(lo, &free_me_list, 200 + &args->cbl_range)) { 201 + rv = NFS4_OK; 208 202 goto unlock; 209 203 } 210 204 ··· 226 192 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, 227 193 &args->cbl_range); 228 194 } 195 + pnfs_mark_layout_returned_if_empty(lo); 229 196 unlock: 230 197 spin_unlock(&ino->i_lock); 231 198 pnfs_free_lseg_list(&free_me_list); 199 + /* Free all lsegs that are attached to commit buckets */ 200 + nfs_commit_inode(ino, 0); 232 201 pnfs_put_layout_hdr(lo); 233 - trace_nfs4_cb_layoutrecall_inode(clp, &args->cbl_fh, ino, -rv); 202 + trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, 203 + &args->cbl_stateid, -rv); 234 204 iput(ino); 235 205 out: 236 206 return rv;
+24 -9
fs/nfs/direct.c
··· 117 117 return atomic_dec_and_test(&dreq->io_count); 118 118 } 119 119 120 - void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq) 121 - { 122 - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 123 - } 124 - EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes); 125 - 126 120 static void 127 121 nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) 128 122 { ··· 729 735 nfs_direct_write_complete(dreq, data->inode); 730 736 } 731 737 732 - static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) 738 + static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, 739 + struct nfs_page *req) 733 740 { 734 - /* There is no lock to clear */ 741 + struct nfs_direct_req *dreq = cinfo->dreq; 742 + 743 + spin_lock(&dreq->lock); 744 + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 745 + spin_unlock(&dreq->lock); 746 + nfs_mark_request_commit(req, NULL, cinfo, 0); 735 747 } 736 748 737 749 static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { 738 750 .completion = nfs_direct_commit_complete, 739 - .error_cleanup = nfs_direct_error_cleanup, 751 + .resched_write = nfs_direct_resched_write, 740 752 }; 741 753 742 754 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) ··· 847 847 } 848 848 } 849 849 850 + static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) 851 + { 852 + struct nfs_direct_req *dreq = hdr->dreq; 853 + 854 + spin_lock(&dreq->lock); 855 + if (dreq->error == 0) { 856 + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 857 + /* fake unstable write to let common nfs resend pages */ 858 + hdr->verf.committed = NFS_UNSTABLE; 859 + hdr->good_bytes = hdr->args.count; 860 + } 861 + spin_unlock(&dreq->lock); 862 + } 863 + 850 864 static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { 851 865 .error_cleanup = nfs_write_sync_pgio_error, 852 866 .init_hdr = nfs_direct_pgio_init, 853 867 .completion = nfs_direct_write_completion, 868 + .reschedule_io = nfs_direct_write_reschedule_io, 854 869 }; 855 870 856 871
+1 -1
fs/nfs/file.c
··· 514 514 * so it will not block due to pages that will shortly be freeable. 515 515 */ 516 516 nfsi = NFS_I(mapping->host); 517 - if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { 517 + if (atomic_read(&nfsi->commit_info.rpcs_out)) { 518 518 *writeback = true; 519 519 return; 520 520 }
+1 -12
fs/nfs/flexfilelayout/flexfilelayout.c
··· 941 941 hdr->args.count, 942 942 (unsigned long long)hdr->args.offset); 943 943 944 - if (!hdr->dreq) { 945 - struct nfs_open_context *ctx; 946 - 947 - ctx = nfs_list_entry(hdr->pages.next)->wb_context; 948 - set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); 949 - hdr->completion_ops->error_cleanup(&hdr->pages); 950 - } else { 951 - nfs_direct_set_resched_writes(hdr->dreq); 952 - /* fake unstable write to let common nfs resend pages */ 953 - hdr->verf.committed = NFS_UNSTABLE; 954 - hdr->good_bytes = hdr->args.count; 955 - } 944 + hdr->completion_ops->reschedule_io(hdr); 956 945 return; 957 946 } 958 947
+6 -1
fs/nfs/internal.h
··· 264 264 return desc->pg_mirror_count > 1; 265 265 } 266 266 267 + static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1, 268 + const struct nfs_open_context *ctx2) 269 + { 270 + return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state; 271 + } 272 + 267 273 /* nfs2xdr.c */ 268 274 extern struct rpc_procinfo nfs_procedures[]; 269 275 extern int nfs2_decode_dirent(struct xdr_stream *, ··· 525 519 inode_dio_wait(inode); 526 520 } 527 521 extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); 528 - extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq); 529 522 530 523 /* nfs4proc.c */ 531 524 extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
+7 -5
fs/nfs/nfs4proc.c
··· 7776 7776 struct nfs4_layoutget *lgp = calldata; 7777 7777 struct nfs_server *server = NFS_SERVER(lgp->args.inode); 7778 7778 struct nfs4_session *session = nfs4_get_session(server); 7779 + int ret; 7779 7780 7780 7781 dprintk("--> %s\n", __func__); 7781 7782 /* Note the is a race here, where a CB_LAYOUTRECALL can come in ··· 7787 7786 if (nfs41_setup_sequence(session, &lgp->args.seq_args, 7788 7787 &lgp->res.seq_res, task)) 7789 7788 return; 7790 - if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, 7789 + ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid, 7791 7790 NFS_I(lgp->args.inode)->layout, 7792 7791 &lgp->args.range, 7793 - lgp->args.ctx->state)) { 7794 - rpc_exit(task, NFS4_OK); 7795 - } 7792 + lgp->args.ctx->state); 7793 + if (ret < 0) 7794 + rpc_exit(task, ret); 7796 7795 } 7797 7796 7798 7797 static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) ··· 8074 8073 8075 8074 dprintk("--> %s\n", __func__); 8076 8075 spin_lock(&lo->plh_inode->i_lock); 8076 + pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); 8077 + pnfs_mark_layout_returned_if_empty(lo); 8077 8078 if (lrp->res.lrs_present) 8078 8079 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); 8079 - pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); 8080 8080 pnfs_clear_layoutreturn_waitbit(lo); 8081 8081 lo->plh_block_lgets--; 8082 8082 spin_unlock(&lo->plh_inode->i_lock);
+67 -2
fs/nfs/nfs4trace.h
··· 982 982 DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label); 983 983 DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); 984 984 #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ 985 - DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation); 986 985 987 986 DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, 988 987 TP_PROTO( ··· 1144 1145 ), \ 1145 1146 TP_ARGS(clp, fhandle, inode, error)) 1146 1147 DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr); 1147 - DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_layoutrecall_inode); 1148 1148 1149 + DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, 1150 + TP_PROTO( 1151 + const struct nfs_client *clp, 1152 + const struct nfs_fh *fhandle, 1153 + const struct inode *inode, 1154 + const nfs4_stateid *stateid, 1155 + int error 1156 + ), 1157 + 1158 + TP_ARGS(clp, fhandle, inode, stateid, error), 1159 + 1160 + TP_STRUCT__entry( 1161 + __field(int, error) 1162 + __field(dev_t, dev) 1163 + __field(u32, fhandle) 1164 + __field(u64, fileid) 1165 + __string(dstaddr, clp ? 1166 + rpc_peeraddr2str(clp->cl_rpcclient, 1167 + RPC_DISPLAY_ADDR) : "unknown") 1168 + __field(int, stateid_seq) 1169 + __field(u32, stateid_hash) 1170 + ), 1171 + 1172 + TP_fast_assign( 1173 + __entry->error = error; 1174 + __entry->fhandle = nfs_fhandle_hash(fhandle); 1175 + if (inode != NULL) { 1176 + __entry->fileid = NFS_FILEID(inode); 1177 + __entry->dev = inode->i_sb->s_dev; 1178 + } else { 1179 + __entry->fileid = 0; 1180 + __entry->dev = 0; 1181 + } 1182 + __assign_str(dstaddr, clp ? 1183 + rpc_peeraddr2str(clp->cl_rpcclient, 1184 + RPC_DISPLAY_ADDR) : "unknown") 1185 + __entry->stateid_seq = 1186 + be32_to_cpu(stateid->seqid); 1187 + __entry->stateid_hash = 1188 + nfs_stateid_hash(stateid); 1189 + ), 1190 + 1191 + TP_printk( 1192 + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1193 + "stateid=%d:0x%08x dstaddr=%s", 1194 + __entry->error, 1195 + show_nfsv4_errors(__entry->error), 1196 + MAJOR(__entry->dev), MINOR(__entry->dev), 1197 + (unsigned long long)__entry->fileid, 1198 + __entry->fhandle, 1199 + __entry->stateid_seq, __entry->stateid_hash, 1200 + __get_str(dstaddr) 1201 + ) 1202 + ); 1203 + 1204 + #define DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(name) \ 1205 + DEFINE_EVENT(nfs4_inode_stateid_callback_event, name, \ 1206 + TP_PROTO( \ 1207 + const struct nfs_client *clp, \ 1208 + const struct nfs_fh *fhandle, \ 1209 + const struct inode *inode, \ 1210 + const nfs4_stateid *stateid, \ 1211 + int error \ 1212 + ), \ 1213 + TP_ARGS(clp, fhandle, inode, stateid, error)) 1214 + DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall); 1215 + DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file); 1149 1216 1150 1217 DECLARE_EVENT_CLASS(nfs4_idmap_event, 1151 1218 TP_PROTO(
-1
fs/nfs/nfstrace.h
··· 39 39 { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ 40 40 { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ 41 41 { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ 42 - { 1 << NFS_INO_COMMIT, "COMMIT" }, \ 43 42 { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ 44 43 { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) 45 44
-6
fs/nfs/pagelist.c
··· 899 899 pgio->pg_mirrors_dynamic = NULL; 900 900 } 901 901 902 - static bool nfs_match_open_context(const struct nfs_open_context *ctx1, 903 - const struct nfs_open_context *ctx2) 904 - { 905 - return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state; 906 - } 907 - 908 902 static bool nfs_match_lock_context(const struct nfs_lock_context *l1, 909 903 const struct nfs_lock_context *l2) 910 904 {
+55 -27
fs/nfs/pnfs.c
··· 53 53 static LIST_HEAD(pnfs_modules_tbl); 54 54 55 55 static int 56 - pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, 56 + pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, 57 57 enum pnfs_iomode iomode, bool sync); 58 58 59 59 /* Return the registered pnfs layout driver module matching given id */ ··· 385 385 enum pnfs_iomode iomode; 386 386 bool send; 387 387 388 - stateid = lo->plh_stateid; 388 + nfs4_stateid_copy(&stateid, &lo->plh_stateid); 389 389 iomode = lo->plh_return_iomode; 390 390 send = pnfs_prepare_layoutreturn(lo); 391 391 spin_unlock(&inode->i_lock); 392 392 if (send) { 393 393 /* Send an async layoutreturn so we dont deadlock */ 394 - pnfs_send_layoutreturn(lo, stateid, iomode, false); 394 + pnfs_send_layoutreturn(lo, &stateid, iomode, false); 395 395 } 396 396 } else 397 397 spin_unlock(&inode->i_lock); ··· 566 566 int 567 567 pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 568 568 struct list_head *tmp_list, 569 - struct pnfs_layout_range *recall_range) 569 + const struct pnfs_layout_range *recall_range) 570 570 { 571 571 struct pnfs_layout_segment *lseg, *next; 572 - int invalid = 0, removed = 0; 572 + int remaining = 0; 573 573 574 574 dprintk("%s:Begin lo %p\n", __func__, lo); 575 575 ··· 582 582 "offset %llu length %llu\n", __func__, 583 583 lseg, lseg->pls_range.iomode, lseg->pls_range.offset, 584 584 lseg->pls_range.length); 585 - invalid++; 586 - removed += mark_lseg_invalid(lseg, tmp_list); 585 + if (!mark_lseg_invalid(lseg, tmp_list)) 586 + remaining++; 587 587 } 588 - dprintk("%s:Return %i\n", __func__, invalid - removed); 589 - return invalid - removed; 588 + dprintk("%s:Return %i\n", __func__, remaining); 589 + return remaining; 590 590 } 591 591 592 592 /* note free_me must contain lsegs from a single layout_hdr */ ··· 702 702 ret = -EAGAIN; 703 703 spin_unlock(&inode->i_lock); 704 704 pnfs_free_lseg_list(&lseg_list); 705 + /* Free all lsegs that are attached to commit buckets */ 706 + nfs_commit_inode(inode, 0); 705 707 pnfs_put_layout_hdr(lo); 706 708 iput(inode); 707 709 } ··· 827 825 828 826 int 829 827 pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, 830 - struct pnfs_layout_range *range, 828 + const struct pnfs_layout_range *range, 831 829 struct nfs4_state *open_state) 832 830 { 833 831 int status = 0; ··· 862 860 static struct pnfs_layout_segment * 863 861 send_layoutget(struct pnfs_layout_hdr *lo, 864 862 struct nfs_open_context *ctx, 865 - struct pnfs_layout_range *range, 863 + const struct pnfs_layout_range *range, 866 864 gfp_t gfp_flags) 867 865 { 868 866 struct inode *ino = lo->plh_inode; ··· 895 893 lgp->args.minlength = i_size - range->offset; 896 894 } 897 895 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 898 - lgp->args.range = *range; 896 + pnfs_copy_range(&lgp->args.range, range); 899 897 lgp->args.type = server->pnfs_curr_ld->id; 900 898 lgp->args.inode = ino; 901 899 lgp->args.ctx = get_nfs_open_context(ctx); ··· 938 936 } 939 937 940 938 static int 941 - pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, 939 + pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, 942 940 enum pnfs_iomode iomode, bool sync) 943 941 { 944 942 struct inode *ino = lo->plh_inode; ··· 955 953 goto out; 956 954 } 957 955 958 - lrp->args.stateid = stateid; 956 + nfs4_stateid_copy(&lrp->args.stateid, stateid); 959 957 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 960 958 lrp->args.inode = ino; 961 959 lrp->args.range.iomode = iomode; ··· 998 996 dprintk("NFS: %s no layout to return\n", __func__); 999 997 goto out; 1000 998 } 1001 - stateid = nfsi->layout->plh_stateid; 999 + nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid); 1002 1000 /* Reference matched in nfs4_layoutreturn_release */ 1003 1001 pnfs_get_layout_hdr(lo); 1004 1002 empty = list_empty(&lo->plh_segs); ··· 1026 1024 spin_unlock(&ino->i_lock); 1027 1025 pnfs_free_lseg_list(&tmp_list); 1028 1026 if (send) 1029 - status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); 1027 + status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1030 1028 out_put_layout_hdr: 1031 1029 pnfs_put_layout_hdr(lo); 1032 1030 out: ··· 1089 1087 goto out_noroc; 1090 1088 } 1091 1089 1092 - stateid = lo->plh_stateid; 1090 + nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1093 1091 /* always send layoutreturn if being marked so */ 1094 1092 if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1095 1093 &lo->plh_flags)) ··· 1116 1114 pnfs_free_lseg_list(&tmp_list); 1117 1115 pnfs_layoutcommit_inode(ino, true); 1118 1116 if (layoutreturn) 1119 - pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); 1117 + pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1120 1118 return roc; 1121 1119 } 1122 1120 ··· 1141 1139 1142 1140 spin_lock(&ino->i_lock); 1143 1141 lo = NFS_I(ino)->layout; 1142 + pnfs_mark_layout_returned_if_empty(lo); 1144 1143 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) 1145 1144 lo->plh_barrier = barrier; 1146 1145 spin_unlock(&ino->i_lock); ··· 1737 1734 } 1738 1735 1739 1736 static void 1737 + pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode) 1738 + { 1739 + if (lo->plh_return_iomode == iomode) 1740 + return; 1741 + if (lo->plh_return_iomode != 0) 1742 + iomode = IOMODE_ANY; 1743 + lo->plh_return_iomode = iomode; 1744 + } 1745 + 1746 + int 1740 1747 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1741 1748 struct list_head *tmp_list, 1742 - struct pnfs_layout_range *return_range) 1749 + const struct pnfs_layout_range *return_range) 1743 1750 { 1744 1751 struct pnfs_layout_segment *lseg, *next; 1752 + int remaining = 0; 1745 1753 1746 1754 dprintk("%s:Begin lo %p\n", __func__, lo); 1747 1755 1748 1756 if (list_empty(&lo->plh_segs)) 1749 - return; 1757 + return 0; 1758 + 1759 + assert_spin_locked(&lo->plh_inode->i_lock); 1750 1760 1751 1761 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 1752 1762 if (should_free_lseg(&lseg->pls_range, return_range)) { ··· 1769 1753 lseg->pls_range.offset, 1770 1754 lseg->pls_range.length); 1771 1755 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1772 - mark_lseg_invalid(lseg, tmp_list); 1756 + pnfs_set_plh_return_iomode(lo, return_range->iomode); 1757 + if (!mark_lseg_invalid(lseg, tmp_list)) 1758 + remaining++; 1773 1759 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1774 1760 &lo->plh_flags); 1775 1761 } 1762 + return remaining; 1776 1763 } 1777 1764 1778 1765 void pnfs_error_mark_layout_for_return(struct inode *inode, ··· 1788 1769 .length = NFS4_MAX_UINT64, 1789 1770 }; 1790 1771 LIST_HEAD(free_me); 1772 + bool return_now = false; 1791 1773 1792 1774 spin_lock(&inode->i_lock); 1793 - if (lo->plh_return_iomode == 0) 1794 - lo->plh_return_iomode = range.iomode; 1795 - else if (lo->plh_return_iomode != range.iomode) 1796 - lo->plh_return_iomode = IOMODE_ANY; 1775 + pnfs_set_plh_return_iomode(lo, range.iomode); 1797 1776 /* 1798 1777 * mark all matching lsegs so that we are sure to have no live 1799 1778 * segments at hand when sending layoutreturn. See pnfs_put_lseg() 1800 1779 * for how it works. 1801 1780 */ 1802 - pnfs_mark_matching_lsegs_return(lo, &free_me, &range); 1803 - spin_unlock(&inode->i_lock); 1781 + if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range)) { 1782 + nfs4_stateid stateid; 1783 + enum pnfs_iomode iomode = lo->plh_return_iomode; 1784 + 1785 + nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1786 + return_now = pnfs_prepare_layoutreturn(lo); 1787 + spin_unlock(&inode->i_lock); 1788 + if (return_now) 1789 + pnfs_send_layoutreturn(lo, &stateid, iomode, false); 1790 + } else { 1791 + spin_unlock(&inode->i_lock); 1792 + nfs_commit_inode(inode, 0); 1793 + } 1804 1794 pnfs_free_lseg_list(&free_me); 1805 1795 } 1806 1796 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
+31 -2
fs/nfs/pnfs.h
··· 260 260 bool update_barrier); 261 261 int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, 262 262 struct pnfs_layout_hdr *lo, 263 - struct pnfs_layout_range *range, 263 + const struct pnfs_layout_range *range, 264 264 struct nfs4_state *open_state); 265 265 int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 266 266 struct list_head *tmp_list, 267 - struct pnfs_layout_range *recall_range); 267 + const struct pnfs_layout_range *recall_range); 268 + int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 269 + struct list_head *tmp_list, 270 + const struct pnfs_layout_range *recall_range); 268 271 bool pnfs_roc(struct inode *ino); 269 272 void pnfs_roc_release(struct inode *ino); 270 273 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); ··· 389 386 smp_mb__after_atomic(); 390 387 } 391 388 return lseg; 389 + } 390 + 391 + static inline bool 392 + pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg) 393 + { 394 + return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0; 392 395 } 393 396 394 397 /* Return true if a layout driver is being used for this mountpoint */ ··· 542 533 if (end == NFS4_MAX_UINT64 || end <= offset) 543 534 return NFS4_MAX_UINT64; 544 535 return 1 + end - offset; 536 + } 537 + 538 + /** 539 + * pnfs_mark_layout_returned_if_empty - marks the layout as returned 540 + * @lo: layout header 541 + * 542 + * Note: Caller must hold inode->i_lock 543 + */ 544 + static inline void 545 + pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo) 546 + { 547 + if (list_empty(&lo->plh_segs)) 548 + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 549 + } 550 + 551 + static inline void 552 + pnfs_copy_range(struct pnfs_layout_range *dst, 553 + const struct pnfs_layout_range *src) 554 + { 555 + memcpy(dst, src, sizeof(*dst)); 545 556 } 546 557 547 558 extern unsigned int layoutstats_timer;
+6 -4
fs/nfs/pnfs_nfs.c
··· 266 266 } else { 267 267 nfs_retry_commit(mds_pages, NULL, cinfo, 0); 268 268 pnfs_generic_retry_commit(cinfo, 0); 269 - cinfo->completion_ops->error_cleanup(NFS_I(inode)); 270 269 return -ENOMEM; 271 270 } 272 271 } 273 272 274 273 nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); 275 274 276 - if (nreq == 0) { 277 - cinfo->completion_ops->error_cleanup(NFS_I(inode)); 275 + if (nreq == 0) 278 276 goto out; 279 - } 280 277 281 278 atomic_add(nreq, &cinfo->mds->rpcs_out); 282 279 ··· 868 871 buckets = cinfo->ds->buckets; 869 872 list = &buckets[ds_commit_idx].written; 870 873 if (list_empty(list)) { 874 + if (!pnfs_is_valid_lseg(lseg)) { 875 + spin_unlock(cinfo->lock); 876 + cinfo->completion_ops->resched_write(cinfo, req); 877 + return; 878 + } 871 879 /* Non-empty buckets hold a reference on the lseg. That ref 872 880 * is normally transferred to the COMMIT call and released 873 881 * there. It could also be released if the last req is pulled
+52 -40
fs/nfs/write.c
··· 21 21 #include <linux/nfs_page.h> 22 22 #include <linux/backing-dev.h> 23 23 #include <linux/export.h> 24 + #include <linux/freezer.h> 25 + #include <linux/wait.h> 24 26 25 27 #include <asm/uaccess.h> 26 28 ··· 1157 1155 if (req == NULL) 1158 1156 return 0; 1159 1157 l_ctx = req->wb_lock_context; 1160 - do_flush = req->wb_page != page || req->wb_context != ctx; 1158 + do_flush = req->wb_page != page || 1159 + !nfs_match_open_context(req->wb_context, ctx); 1161 1160 /* for now, flush if more than 1 request in page_group */ 1162 1161 do_flush |= req->wb_this_page != req; 1163 1162 if (l_ctx && flctx && ··· 1356 1353 } 1357 1354 } 1358 1355 1356 + static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr) 1357 + { 1358 + nfs_async_write_error(&hdr->pages); 1359 + } 1360 + 1359 1361 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { 1360 1362 .error_cleanup = nfs_async_write_error, 1361 1363 .completion = nfs_write_completion, 1364 + .reschedule_io = nfs_async_write_reschedule_io, 1362 1365 }; 1363 1366 1364 1367 void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, ··· 1565 1556 } 1566 1557 } 1567 1558 1568 - 1569 - static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1559 + static int nfs_wait_atomic_killable(atomic_t *key) 1570 1560 { 1571 - int ret; 1572 - 1573 - if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1574 - return 1; 1575 - if (!may_wait) 1576 - return 0; 1577 - ret = out_of_line_wait_on_bit_lock(&nfsi->flags, 1578 - NFS_INO_COMMIT, 1579 - nfs_wait_bit_killable, 1580 - TASK_KILLABLE); 1581 - return (ret < 0) ? ret : 1; 1561 + if (fatal_signal_pending(current)) 1562 + return -ERESTARTSYS; 1563 + freezable_schedule_unsafe(); 1564 + return 0; 1582 1565 } 1583 1566 1584 - static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1567 + static int wait_on_commit(struct nfs_mds_commit_info *cinfo) 1585 1568 { 1586 - clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1587 - smp_mb__after_atomic(); 1588 - wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1569 + return wait_on_atomic_t(&cinfo->rpcs_out, 1570 + nfs_wait_atomic_killable, TASK_KILLABLE); 1571 + } 1572 + 1573 + static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) 1574 + { 1575 + atomic_inc(&cinfo->rpcs_out); 1576 + } 1577 + 1578 + static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) 1579 + { 1580 + if (atomic_dec_and_test(&cinfo->rpcs_out)) 1581 + wake_up_atomic_t(&cinfo->rpcs_out); 1589 1582 } 1590 1583 1591 1584 void nfs_commitdata_release(struct nfs_commit_data *data) ··· 1704 1693 } 1705 1694 EXPORT_SYMBOL_GPL(nfs_retry_commit); 1706 1695 1696 + static void 1697 + nfs_commit_resched_write(struct nfs_commit_info *cinfo, 1698 + struct nfs_page *req) 1699 + { 1700 + __set_page_dirty_nobuffers(req->wb_page); 1701 + } 1702 + 1707 1703 /* 1708 1704 * Commit dirty pages 1709 1705 */ ··· 1732 1714 data->mds_ops, how, 0); 1733 1715 out_bad: 1734 1716 nfs_retry_commit(head, NULL, cinfo, 0); 1735 - cinfo->completion_ops->error_cleanup(NFS_I(inode)); 1736 1717 return -ENOMEM; 1737 1718 } 1738 1719 ··· 1793 1776 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 1794 1777 1795 1778 nfs_init_cinfo(&cinfo, data->inode, data->dreq); 1796 - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) 1797 - nfs_commit_clear_lock(NFS_I(data->inode)); 1779 + nfs_commit_end(cinfo.mds); 1798 1780 } 1799 1781 1800 1782 static void nfs_commit_release(void *calldata) ··· 1812 1796 1813 1797 static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { 1814 1798 .completion = nfs_commit_release_pages, 1815 - .error_cleanup = nfs_commit_clear_lock, 1799 + .resched_write = nfs_commit_resched_write, 1816 1800 }; 1817 1801 1818 1802 int nfs_generic_commit_list(struct inode *inode, struct list_head *head, ··· 1831 1815 LIST_HEAD(head); 1832 1816 struct nfs_commit_info cinfo; 1833 1817 int may_wait = how & FLUSH_SYNC; 1818 + int error = 0; 1834 1819 int res; 1835 1820 1836 - res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1837 - if (res <= 0) 1838 - goto out_mark_dirty; 1839 1821 nfs_init_cinfo_from_inode(&cinfo, inode); 1822 + nfs_commit_begin(cinfo.mds); 1840 1823 res = nfs_scan_commit(inode, &head, &cinfo); 1841 - if (res) { 1842 - int error; 1843 - 1824 + if (res) 1844 1825 error = nfs_generic_commit_list(inode, &head, how, &cinfo); 1845 - if (error < 0) 1846 - return error; 1847 - if (!may_wait) 1848 - goto out_mark_dirty; 1849 - error = wait_on_bit_action(&NFS_I(inode)->flags, 1850 - NFS_INO_COMMIT, 1851 - nfs_wait_bit_killable, 1852 - TASK_KILLABLE); 1853 - if (error < 0) 1854 - return error; 1855 - } else 1856 - nfs_commit_clear_lock(NFS_I(inode)); 1826 + nfs_commit_end(cinfo.mds); 1827 + if (error < 0) 1828 + goto out_error; 1829 + if (!may_wait) 1830 + goto out_mark_dirty; 1831 + error = wait_on_commit(cinfo.mds); 1832 + if (error < 0) 1833 + return error; 1857 1834 return res; 1835 + out_error: 1836 + res = error; 1858 1837 /* Note: If we exit without ensuring that the commit is complete, 1859 1838 * we must mark the inode as dirty. Otherwise, future calls to 1860 1839 * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure ··· 1859 1848 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1860 1849 return res; 1861 1850 } 1851 + EXPORT_SYMBOL_GPL(nfs_commit_inode); 1862 1852 1863 1853 int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1864 1854 {
-1
include/linux/nfs_fs.h
··· 216 216 #define NFS_INO_FLUSHING (4) /* inode is flushing out data */ 217 217 #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ 218 218 #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ 219 - #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ 220 219 #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ 221 220 #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ 222 221 #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
+3 -1
include/linux/nfs_xdr.h
··· 1421 1421 struct list_head list; 1422 1422 }; 1423 1423 1424 + struct nfs_commit_info; 1424 1425 struct nfs_commit_data; 1425 1426 struct nfs_inode; 1426 1427 struct nfs_commit_completion_ops { 1427 - void (*error_cleanup) (struct nfs_inode *nfsi); 1428 1428 void (*completion) (struct nfs_commit_data *data); 1429 + void (*resched_write) (struct nfs_commit_info *, struct nfs_page *); 1429 1430 }; 1430 1431 1431 1432 struct nfs_commit_info { ··· 1463 1462 void (*error_cleanup)(struct list_head *head); 1464 1463 void (*init_hdr)(struct nfs_pgio_header *hdr); 1465 1464 void (*completion)(struct nfs_pgio_header *hdr); 1465 + void (*reschedule_io)(struct nfs_pgio_header *hdr); 1466 1466 }; 1467 1467 1468 1468 struct nfs_unlinkdata {