Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

NFSv4.1: Fix up replays of interrupted requests

If the previous request on a slot was interrupted before it was
processed by the server, then our slot sequence number may be out of whack,
and so we try the next operation using the old sequence number.

The problem with this, is that not all servers check to see that the
client is replaying the same operations as previously when they decide
to go to the replay cache, and so instead of the expected error of
NFS4ERR_SEQ_FALSE_RETRY, we get a replay of the old reply, which could
(if the operations match up) be mistaken by the client for a new reply.

To fix this, we attempt to send a COMPOUND containing only the SEQUENCE op
in order to resync our slot sequence number.

Cc: Olga Kornievskaia <olga.kornievskaia@gmail.com>
[olga.kornievskaia@gmail.com: fix an Oops]
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

authored by

Trond Myklebust and committed by
Anna Schumaker
3be0f80b 6f0afc28

+103 -47
+1 -1
fs/nfs/nfs4_fs.h
··· 464 464 extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); 465 465 extern void nfs_release_seqid(struct nfs_seqid *seqid); 466 466 extern void nfs_free_seqid(struct nfs_seqid *seqid); 467 - extern int nfs4_setup_sequence(const struct nfs_client *client, 467 + extern int nfs4_setup_sequence(struct nfs_client *client, 468 468 struct nfs4_sequence_args *args, 469 469 struct nfs4_sequence_res *res, 470 470 struct rpc_task *task);
+102 -46
fs/nfs/nfs4proc.c
··· 96 96 struct nfs_open_context *ctx, struct nfs4_label *ilabel, 97 97 struct nfs4_label *olabel); 98 98 #ifdef CONFIG_NFS_V4_1 99 + static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, 100 + struct rpc_cred *cred, 101 + struct nfs4_slot *slot, 102 + bool is_privileged); 99 103 static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, 100 104 struct rpc_cred *); 101 105 static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, ··· 648 644 649 645 #if defined(CONFIG_NFS_V4_1) 650 646 651 - static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) 647 + static void nfs41_release_slot(struct nfs4_slot *slot) 652 648 { 653 649 struct nfs4_session *session; 654 650 struct nfs4_slot_table *tbl; 655 - struct nfs4_slot *slot = res->sr_slot; 656 651 bool send_new_highest_used_slotid = false; 657 652 653 + if (!slot) 654 + return; 658 655 tbl = slot->table; 659 656 session = tbl->session; 660 657 ··· 681 676 send_new_highest_used_slotid = false; 682 677 out_unlock: 683 678 spin_unlock(&tbl->slot_tbl_lock); 684 - res->sr_slot = NULL; 685 679 if (send_new_highest_used_slotid) 686 680 nfs41_notify_server(session->clp); 687 681 if (waitqueue_active(&tbl->slot_waitq)) 688 682 wake_up_all(&tbl->slot_waitq); 683 + } 684 + 685 + static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) 686 + { 687 + nfs41_release_slot(res->sr_slot); 688 + res->sr_slot = NULL; 689 689 } 690 690 691 691 static int nfs41_sequence_process(struct rpc_task *task, ··· 720 710 /* Check the SEQUENCE operation status */ 721 711 switch (res->sr_status) { 722 712 case 0: 723 - /* If previous op on slot was interrupted and we reused 724 - * the seq# and got a reply from the cache, then retry 725 - */ 726 - if (task->tk_status == -EREMOTEIO && interrupted) { 727 - ++slot->seq_nr; 728 - goto retry_nowait; 729 - } 730 713 /* Update the slot's sequence and clientid lease timer */ 731 714 slot->seq_done = 1; 732 715 clp = session->clp; ··· 753 750 * The slot id we used was probably retired. Try again 754 751 * using a different slot id. 755 752 */ 753 + if (slot->seq_nr < slot->table->target_highest_slotid) 754 + goto session_recover; 756 755 goto retry_nowait; 757 756 case -NFS4ERR_SEQ_MISORDERED: 758 757 /* 759 758 * Was the last operation on this sequence interrupted? 760 759 * If so, retry after bumping the sequence number. 761 760 */ 762 - if (interrupted) { 763 - ++slot->seq_nr; 764 - goto retry_nowait; 765 - } 761 + if (interrupted) 762 + goto retry_new_seq; 766 763 /* 767 764 * Could this slot have been previously retired? 768 765 * If so, then the server may be expecting seq_nr = 1! ··· 771 768 slot->seq_nr = 1; 772 769 goto retry_nowait; 773 770 } 774 - break; 771 + goto session_recover; 775 772 case -NFS4ERR_SEQ_FALSE_RETRY: 776 - ++slot->seq_nr; 777 - goto retry_nowait; 773 + if (interrupted) 774 + goto retry_new_seq; 775 + goto session_recover; 778 776 default: 779 777 /* Just update the slot sequence no. */ 780 778 slot->seq_done = 1; ··· 785 781 dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); 786 782 out_noaction: 787 783 return ret; 784 + session_recover: 785 + nfs4_schedule_session_recovery(session, res->sr_status); 786 + goto retry_nowait; 787 + retry_new_seq: 788 + ++slot->seq_nr; 788 789 retry_nowait: 789 790 if (rpc_restart_call_prepare(task)) { 790 791 nfs41_sequence_free_slot(res); ··· 866 857 .rpc_call_done = nfs41_call_sync_done, 867 858 }; 868 859 860 + static void 861 + nfs4_sequence_process_interrupted(struct nfs_client *client, 862 + struct nfs4_slot *slot, struct rpc_cred *cred) 863 + { 864 + struct rpc_task *task; 865 + 866 + task = _nfs41_proc_sequence(client, cred, slot, true); 867 + if (!IS_ERR(task)) 868 + rpc_put_task_async(task); 869 + } 870 + 869 871 #else /* !CONFIG_NFS_V4_1 */ 870 872 871 873 static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) ··· 897 877 } 898 878 EXPORT_SYMBOL_GPL(nfs4_sequence_done); 899 879 880 + static void 881 + nfs4_sequence_process_interrupted(struct nfs_client *client, 882 + struct nfs4_slot *slot, struct rpc_cred *cred) 883 + { 884 + WARN_ON_ONCE(1); 885 + slot->interrupted = 0; 886 + } 887 + 900 888 #endif /* !CONFIG_NFS_V4_1 */ 901 889 902 - int nfs4_setup_sequence(const struct nfs_client *client, 890 + static 891 + void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args, 892 + struct nfs4_sequence_res *res, 893 + struct nfs4_slot *slot) 894 + { 895 + if (!slot) 896 + return; 897 + slot->privileged = args->sa_privileged ? 1 : 0; 898 + args->sa_slot = slot; 899 + 900 + res->sr_slot = slot; 901 + res->sr_timestamp = jiffies; 902 + res->sr_status_flags = 0; 903 + res->sr_status = 1; 904 + 905 + } 906 + 907 + int nfs4_setup_sequence(struct nfs_client *client, 903 908 struct nfs4_sequence_args *args, 904 909 struct nfs4_sequence_res *res, 905 910 struct rpc_task *task) ··· 942 897 task->tk_timeout = 0; 943 898 } 944 899 945 - spin_lock(&tbl->slot_tbl_lock); 946 - /* The state manager will wait until the slot table is empty */ 947 - if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) 948 - goto out_sleep; 900 + for (;;) { 901 + spin_lock(&tbl->slot_tbl_lock); 902 + /* The state manager will wait until the slot table is empty */ 903 + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) 904 + goto out_sleep; 949 905 950 - slot = nfs4_alloc_slot(tbl); 951 - if (IS_ERR(slot)) { 952 - /* Try again in 1/4 second */ 953 - if (slot == ERR_PTR(-ENOMEM)) 954 - task->tk_timeout = HZ >> 2; 955 - goto out_sleep; 906 + slot = nfs4_alloc_slot(tbl); 907 + if (IS_ERR(slot)) { 908 + /* Try again in 1/4 second */ 909 + if (slot == ERR_PTR(-ENOMEM)) 910 + task->tk_timeout = HZ >> 2; 911 + goto out_sleep; 912 + } 913 + spin_unlock(&tbl->slot_tbl_lock); 914 + 915 + if (likely(!slot->interrupted)) 916 + break; 917 + nfs4_sequence_process_interrupted(client, 918 + slot, task->tk_msg.rpc_cred); 956 919 } 957 - spin_unlock(&tbl->slot_tbl_lock); 958 920 959 - slot->privileged = args->sa_privileged ? 1 : 0; 960 - args->sa_slot = slot; 961 - 962 - res->sr_slot = slot; 963 - if (session) { 964 - res->sr_timestamp = jiffies; 965 - res->sr_status_flags = 0; 966 - res->sr_status = 1; 967 - } 921 + nfs4_sequence_attach_slot(args, res, slot); 968 922 969 923 trace_nfs4_setup_sequence(session, args); 970 924 out_start: ··· 8162 8118 8163 8119 static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, 8164 8120 struct rpc_cred *cred, 8121 + struct nfs4_slot *slot, 8165 8122 bool is_privileged) 8166 8123 { 8167 8124 struct nfs4_sequence_data *calldata; ··· 8176 8131 .callback_ops = &nfs41_sequence_ops, 8177 8132 .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, 8178 8133 }; 8134 + struct rpc_task *ret; 8179 8135 8136 + ret = ERR_PTR(-EIO); 8180 8137 if (!atomic_inc_not_zero(&clp->cl_count)) 8181 - return ERR_PTR(-EIO); 8138 + goto out_err; 8139 + 8140 + ret = ERR_PTR(-ENOMEM); 8182 8141 calldata = kzalloc(sizeof(*calldata), GFP_NOFS); 8183 - if (calldata == NULL) { 8184 - nfs_put_client(clp); 8185 - return ERR_PTR(-ENOMEM); 8186 - } 8142 + if (calldata == NULL) 8143 + goto out_put_clp; 8187 8144 nfs4_init_sequence(&calldata->args, &calldata->res, 0); 8145 + nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot); 8188 8146 if (is_privileged) 8189 8147 nfs4_set_sequence_privileged(&calldata->args); 8190 8148 msg.rpc_argp = &calldata->args; ··· 8195 8147 calldata->clp = clp; 8196 8148 task_setup_data.callback_data = calldata; 8197 8149 8198 - return rpc_run_task(&task_setup_data); 8150 + ret = rpc_run_task(&task_setup_data); 8151 + if (IS_ERR(ret)) 8152 + goto out_err; 8153 + return ret; 8154 + out_put_clp: 8155 + nfs_put_client(clp); 8156 + out_err: 8157 + nfs41_release_slot(slot); 8158 + return ret; 8199 8159 } 8200 8160 8201 8161 static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) ··· 8213 8157 8214 8158 if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) 8215 8159 return -EAGAIN; 8216 - task = _nfs41_proc_sequence(clp, cred, false); 8160 + task = _nfs41_proc_sequence(clp, cred, NULL, false); 8217 8161 if (IS_ERR(task)) 8218 8162 ret = PTR_ERR(task); 8219 8163 else ··· 8227 8171 struct rpc_task *task; 8228 8172 int ret; 8229 8173 8230 - task = _nfs41_proc_sequence(clp, cred, true); 8174 + task = _nfs41_proc_sequence(clp, cred, NULL, true); 8231 8175 if (IS_ERR(task)) { 8232 8176 ret = PTR_ERR(task); 8233 8177 goto out;