Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] RPC: parametrize various transport connect timeouts

Each transport implementation can now set unique bind, connect,
reestablishment, and idle timeout values. These are variables,
allowing the values to be modified dynamically. This permits
exponential backoff of any of these values, for instance.

As an example, we implement exponential backoff for the connection
reestablishment timeout.

Test-plan:
Destructive testing (unplugging the network temporarily). Connectathon
with UDP and TCP.

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

authored by

Chuck Lever and committed by
Trond Myklebust
03bf4b70 3167e12c

+84 -34
+5 -5
fs/nfs/inode.c
··· 369 369 case IPPROTO_TCP: 370 370 if (!to->to_initval) 371 371 to->to_initval = 60 * HZ; 372 - if (to->to_initval > RPC_MAX_TCP_TIMEOUT) 373 - to->to_initval = RPC_MAX_TCP_TIMEOUT; 372 + if (to->to_initval > NFS_MAX_TCP_TIMEOUT) 373 + to->to_initval = NFS_MAX_TCP_TIMEOUT; 374 374 to->to_increment = to->to_initval; 375 375 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); 376 376 to->to_exponential = 0; ··· 379 379 default: 380 380 if (!to->to_initval) 381 381 to->to_initval = 11 * HZ / 10; 382 - if (to->to_initval > RPC_MAX_UDP_TIMEOUT) 383 - to->to_initval = RPC_MAX_UDP_TIMEOUT; 384 - to->to_maxval = RPC_MAX_UDP_TIMEOUT; 382 + if (to->to_initval > NFS_MAX_UDP_TIMEOUT) 383 + to->to_initval = NFS_MAX_UDP_TIMEOUT; 384 + to->to_maxval = NFS_MAX_UDP_TIMEOUT; 385 385 to->to_exponential = 1; 386 386 break; 387 387 }
+4
include/linux/nfs_fs.h
··· 41 41 #define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 42 42 #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 43 43 44 + /* Default timeout values */ 45 + #define NFS_MAX_UDP_TIMEOUT (60*HZ) 46 + #define NFS_MAX_TCP_TIMEOUT (600*HZ) 47 + 44 48 /* 45 49 * superblock magic number for NFS 46 50 */
+6 -23
include/linux/sunrpc/xprt.h
··· 22 22 #define RPC_DEF_SLOT_TABLE (16U) 23 23 #define RPC_MAX_SLOT_TABLE (128U) 24 24 25 - /* Default timeout values */ 26 - #define RPC_MAX_UDP_TIMEOUT (60*HZ) 27 - #define RPC_MAX_TCP_TIMEOUT (600*HZ) 28 - 29 - /* 30 - * Wait duration for an RPC TCP connection to be established. Solaris 31 - * NFS over TCP uses 60 seconds, for example, which is in line with how 32 - * long a server takes to reboot. 33 - */ 34 - #define RPC_CONNECT_TIMEOUT (60*HZ) 35 - 36 - /* 37 - * Delay an arbitrary number of seconds before attempting to reconnect 38 - * after an error. 39 - */ 40 - #define RPC_REESTABLISH_TIMEOUT (15*HZ) 41 - 42 - /* 43 - * RPC transport idle timeout. 44 - */ 45 - #define RPC_IDLE_DISCONNECT_TIMEOUT (5*60*HZ) 46 - 47 25 /* 48 26 * RPC call and reply header size as number of 32bit words (verifier 49 27 * size computed separately) ··· 160 182 /* 161 183 * Connection of transports 162 184 */ 185 + unsigned long connect_timeout, 186 + bind_timeout, 187 + reestablish_timeout; 163 188 struct work_struct connect_worker; 164 189 unsigned short port; 190 + 165 191 /* 166 192 * Disconnection of idle transports 167 193 */ 168 194 struct work_struct task_cleanup; 169 195 struct timer_list timer; 170 - unsigned long last_used; 196 + unsigned long last_used, 197 + idle_timeout; 171 198 172 199 /* 173 200 * Send stuff
+1 -1
net/sunrpc/clnt.c
··· 740 740 task->tk_action = call_connect; 741 741 if (!clnt->cl_port) { 742 742 task->tk_action = call_bind_status; 743 - task->tk_timeout = RPC_CONNECT_TIMEOUT; 743 + task->tk_timeout = task->tk_xprt->bind_timeout; 744 744 rpc_getport(task, clnt); 745 745 } 746 746 }
+2 -3
net/sunrpc/xprt.c
··· 551 551 if (task->tk_rqstp) 552 552 task->tk_rqstp->rq_bytes_sent = 0; 553 553 554 - task->tk_timeout = RPC_CONNECT_TIMEOUT; 554 + task->tk_timeout = xprt->connect_timeout; 555 555 rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); 556 556 xprt->ops->connect(task); 557 557 } ··· 763 763 764 764 switch (status) { 765 765 case -ECONNREFUSED: 766 - task->tk_timeout = RPC_REESTABLISH_TIMEOUT; 767 766 rpc_sleep_on(&xprt->sending, task, NULL, NULL); 768 767 case -EAGAIN: 769 768 case -ENOTCONN: ··· 856 857 xprt->last_used = jiffies; 857 858 if (list_empty(&xprt->recv) && !xprt->shutdown) 858 859 mod_timer(&xprt->timer, 859 - xprt->last_used + RPC_IDLE_DISCONNECT_TIMEOUT); 860 + xprt->last_used + xprt->idle_timeout); 860 861 spin_unlock_bh(&xprt->transport_lock); 861 862 task->tk_rqstp = NULL; 862 863 memset(req, 0, sizeof(*req)); /* mark unused */
+66 -2
net/sunrpc/xprtsock.c
··· 41 41 */ 42 42 #define XS_SENDMSG_RETRY (10U) 43 43 44 + /* 45 + * Time out for an RPC UDP socket connect. UDP socket connects are 46 + * synchronous, but we set a timeout anyway in case of resource 47 + * exhaustion on the local host. 48 + */ 49 + #define XS_UDP_CONN_TO (5U * HZ) 50 + 51 + /* 52 + * Wait duration for an RPC TCP connection to be established. Solaris 53 + * NFS over TCP uses 60 seconds, for example, which is in line with how 54 + * long a server takes to reboot. 55 + */ 56 + #define XS_TCP_CONN_TO (60U * HZ) 57 + 58 + /* 59 + * Wait duration for a reply from the RPC portmapper. 60 + */ 61 + #define XS_BIND_TO (60U * HZ) 62 + 63 + /* 64 + * Delay if a UDP socket connect error occurs. This is most likely some 65 + * kind of resource problem on the local host. 66 + */ 67 + #define XS_UDP_REEST_TO (2U * HZ) 68 + 69 + /* 70 + * The reestablish timeout allows clients to delay for a bit before attempting 71 + * to reconnect to a server that just dropped our connection. 72 + * 73 + * We implement an exponential backoff when trying to reestablish a TCP 74 + * transport connection with the server. Some servers like to drop a TCP 75 + * connection when they are overworked, so we start with a short timeout and 76 + * increase over time if the server is down or not responding. 77 + */ 78 + #define XS_TCP_INIT_REEST_TO (3U * HZ) 79 + #define XS_TCP_MAX_REEST_TO (5U * 60 * HZ) 80 + 81 + /* 82 + * TCP idle timeout; client drops the transport socket if it is idle 83 + * for this long. Note that we also timeout UDP sockets to prevent 84 + * holding port numbers when there is no RPC traffic. 85 + */ 86 + #define XS_IDLE_DISC_TO (5U * 60 * HZ) 87 + 44 88 #ifdef RPC_DEBUG 45 89 # undef RPC_DEBUG_DATA 46 90 # define RPCDBG_FACILITY RPCDBG_TRANS ··· 783 739 xprt->tcp_reclen = 0; 784 740 xprt->tcp_copied = 0; 785 741 xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; 742 + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 786 743 xprt_wake_pending_tasks(xprt, 0); 787 744 } 788 745 spin_unlock_bh(&xprt->transport_lock); ··· 1111 1066 * @task: address of RPC task that manages state of connect request 1112 1067 * 1113 1068 * TCP: If the remote end dropped the connection, delay reconnecting. 1069 + * 1070 + * UDP socket connects are synchronous, but we use a work queue anyway 1071 + * to guarantee that even unprivileged user processes can set up a 1072 + * socket on a privileged port. 1073 + * 1074 + * If a UDP socket connect fails, the delay behavior here prevents 1075 + * retry floods (hard mounts). 1114 1076 */ 1115 1077 static void xs_connect(struct rpc_task *task) 1116 1078 { ··· 1127 1075 return; 1128 1076 1129 1077 if (xprt->sock != NULL) { 1130 - dprintk("RPC: xs_connect delayed xprt %p\n", xprt); 1078 + dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", 1079 + xprt, xprt->reestablish_timeout / HZ); 1131 1080 schedule_delayed_work(&xprt->connect_worker, 1132 - RPC_REESTABLISH_TIMEOUT); 1081 + xprt->reestablish_timeout); 1082 + xprt->reestablish_timeout <<= 1; 1083 + if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) 1084 + xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; 1133 1085 } else { 1134 1086 dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); 1135 1087 schedule_work(&xprt->connect_worker); ··· 1195 1139 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); 1196 1140 1197 1141 INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); 1142 + xprt->bind_timeout = XS_BIND_TO; 1143 + xprt->connect_timeout = XS_UDP_CONN_TO; 1144 + xprt->reestablish_timeout = XS_UDP_REEST_TO; 1145 + xprt->idle_timeout = XS_IDLE_DISC_TO; 1198 1146 1199 1147 xprt->ops = &xs_udp_ops; 1200 1148 ··· 1236 1176 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 1237 1177 1238 1178 INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); 1179 + xprt->bind_timeout = XS_BIND_TO; 1180 + xprt->connect_timeout = XS_TCP_CONN_TO; 1181 + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 1182 + xprt->idle_timeout = XS_IDLE_DISC_TO; 1239 1183 1240 1184 xprt->ops = &xs_tcp_ops; 1241 1185