Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nfsd: fix callback restarts

Checking the rpc_client pointer is not a reliable way to detect
backchannel changes: cl_cb_client is changed only after shutting down
the rpc client, so the condition cl_cb_client = tk_client will always be
true.

Check the RPC_TASK_KILLED flag instead, and rewrite the code to avoid
the buggy cl_callbacks list and fix the lifetime rules due to double
calls of the ->prepare callback operations method for this retry case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

authored by

Christoph Hellwig and committed by
J. Bruce Fields
cba5f62b ef2a1b3e

+24 -33
+23 -29
fs/nfsd/nfs4callback.c
··· 879 879 if (!nfsd41_cb_get_slot(clp, task)) 880 880 return; 881 881 } 882 - spin_lock(&clp->cl_lock); 883 - if (list_empty(&cb->cb_per_client)) { 884 - /* This is the first call, not a restart */ 885 - cb->cb_done = false; 886 - list_add(&cb->cb_per_client, &clp->cl_callbacks); 887 - } 888 - spin_unlock(&clp->cl_lock); 889 882 rpc_call_start(task); 890 883 } 891 884 ··· 900 907 clp->cl_cb_session->se_cb_seq_nr); 901 908 } 902 909 903 - if (clp->cl_cb_client != task->tk_client) { 904 - /* We're shutting down or changing cl_cb_client; leave 905 - * it to nfsd4_process_cb_update to restart the call if 906 - * necessary. */ 910 + /* 911 + * If the backchannel connection was shut down while this 912 + * task was queued, we need to resubmit it after setting up 913 + * a new backchannel connection. 914 + * 915 + * Note that if we lost our callback connection permanently 916 + * the submission code will error out, so we don't need to 917 + * handle that case here. 918 + */ 919 + if (task->tk_flags & RPC_TASK_KILLED) { 920 + task->tk_status = 0; 921 + cb->cb_need_restart = true; 907 922 return; 908 923 } 909 - 910 - if (cb->cb_done) 911 - return; 912 924 913 925 if (cb->cb_status) { 914 926 WARN_ON_ONCE(task->tk_status); ··· 934 936 default: 935 937 BUG(); 936 938 } 937 - cb->cb_done = true; 938 939 } 939 940 940 941 static void nfsd4_cb_release(void *calldata) 941 942 { 942 943 struct nfsd4_callback *cb = calldata; 943 - struct nfs4_client *clp = cb->cb_clp; 944 944 945 - if (cb->cb_done) { 946 - spin_lock(&clp->cl_lock); 947 - list_del(&cb->cb_per_client); 948 - spin_unlock(&clp->cl_lock); 949 - 945 + if (cb->cb_need_restart) 946 + nfsd4_run_cb(cb); 947 + else 950 948 cb->cb_ops->release(cb); 951 - } 949 + 952 950 } 953 951 954 952 static const struct rpc_call_ops nfsd4_cb_ops = { ··· 1039 1045 nfsd4_mark_cb_down(clp, err); 1040 1046 return; 1041 1047 } 1042 - /* Yay, the callback channel's back! Restart any callbacks: */ 1043 - list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) 1044 - queue_work(callback_wq, &cb->cb_work); 1045 1048 } 1046 1049 1047 1050 static void ··· 1049 1058 struct nfs4_client *clp = cb->cb_clp; 1050 1059 struct rpc_clnt *clnt; 1051 1060 1052 - if (cb->cb_ops && cb->cb_ops->prepare) 1053 - cb->cb_ops->prepare(cb); 1061 + if (cb->cb_need_restart) { 1062 + cb->cb_need_restart = false; 1063 + } else { 1064 + if (cb->cb_ops && cb->cb_ops->prepare) 1065 + cb->cb_ops->prepare(cb); 1066 + } 1054 1067 1055 1068 if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) 1056 1069 nfsd4_process_cb_update(cb); ··· 1080 1085 cb->cb_msg.rpc_resp = cb; 1081 1086 cb->cb_ops = ops; 1082 1087 INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); 1083 - INIT_LIST_HEAD(&cb->cb_per_client); 1084 1088 cb->cb_status = 0; 1085 - cb->cb_done = true; 1089 + cb->cb_need_restart = false; 1086 1090 } 1087 1091 1088 1092 void nfsd4_run_cb(struct nfsd4_callback *cb)
-1
fs/nfsd/nfs4state.c
··· 1626 1626 INIT_LIST_HEAD(&clp->cl_openowners); 1627 1627 INIT_LIST_HEAD(&clp->cl_delegations); 1628 1628 INIT_LIST_HEAD(&clp->cl_lru); 1629 - INIT_LIST_HEAD(&clp->cl_callbacks); 1630 1629 INIT_LIST_HEAD(&clp->cl_revoked); 1631 1630 #ifdef CONFIG_NFSD_PNFS 1632 1631 INIT_LIST_HEAD(&clp->cl_lo_states);
+1 -3
fs/nfsd/state.h
··· 63 63 64 64 struct nfsd4_callback { 65 65 struct nfs4_client *cb_clp; 66 - struct list_head cb_per_client; 67 66 u32 cb_minorversion; 68 67 struct rpc_message cb_msg; 69 68 struct nfsd4_callback_ops *cb_ops; 70 69 struct work_struct cb_work; 71 70 int cb_status; 72 - bool cb_done; 71 + bool cb_need_restart; 73 72 }; 74 73 75 74 struct nfsd4_callback_ops { ··· 333 334 int cl_cb_state; 334 335 struct nfsd4_callback cl_cb_null; 335 336 struct nfsd4_session *cl_cb_session; 336 - struct list_head cl_callbacks; /* list of in-progress callbacks */ 337 337 338 338 /* for all client information that callback code might need: */ 339 339 spinlock_t cl_lock;