Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

SUNRPC: Add basic load balancing to the transport switch

For now, just count the queue length. It is less accurate than counting
number of bytes queued, but easier to implement.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

authored by

Trond Myklebust and committed by
Trond Myklebust
21f0ffaf 44942b4e

+59 -4
+1
include/linux/sunrpc/xprt.h
··· 238 238 /* 239 239 * Send stuff 240 240 */ 241 + atomic_long_t queuelen; 241 242 spinlock_t transport_lock; /* lock transport info */ 242 243 spinlock_t reserve_lock; /* lock slot table */ 243 244 spinlock_t queue_lock; /* send/receive queue lock */
+2
include/linux/sunrpc/xprtmultipath.h
··· 15 15 struct kref xps_kref; 16 16 17 17 unsigned int xps_nxprts; 18 + unsigned int xps_nactive; 19 + atomic_long_t xps_queuelen; 18 20 struct list_head xps_xprt_list; 19 21 20 22 struct net * xps_net;
+37 -3
net/sunrpc/clnt.c
··· 968 968 } 969 969 EXPORT_SYMBOL_GPL(rpc_bind_new_program); 970 970 971 + static struct rpc_xprt * 972 + rpc_task_get_xprt(struct rpc_clnt *clnt) 973 + { 974 + struct rpc_xprt_switch *xps; 975 + struct rpc_xprt *xprt= xprt_iter_get_next(&clnt->cl_xpi); 976 + 977 + if (!xprt) 978 + return NULL; 979 + rcu_read_lock(); 980 + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); 981 + atomic_long_inc(&xps->xps_queuelen); 982 + rcu_read_unlock(); 983 + atomic_long_inc(&xprt->queuelen); 984 + 985 + return xprt; 986 + } 987 + 988 + static void 989 + rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) 990 + { 991 + struct rpc_xprt_switch *xps; 992 + 993 + atomic_long_dec(&xprt->queuelen); 994 + rcu_read_lock(); 995 + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); 996 + atomic_long_dec(&xps->xps_queuelen); 997 + rcu_read_unlock(); 998 + 999 + xprt_put(xprt); 1000 + } 1001 + 971 1002 void rpc_task_release_transport(struct rpc_task *task) 972 1003 { 973 1004 struct rpc_xprt *xprt = task->tk_xprt; 974 1005 975 1006 if (xprt) { 976 1007 task->tk_xprt = NULL; 977 - xprt_put(xprt); 1008 + if (task->tk_client) 1009 + rpc_task_release_xprt(task->tk_client, xprt); 1010 + else 1011 + xprt_put(xprt); 978 1012 } 979 1013 } 980 1014 EXPORT_SYMBOL_GPL(rpc_task_release_transport); ··· 1017 983 { 1018 984 struct rpc_clnt *clnt = task->tk_client; 1019 985 986 + rpc_task_release_transport(task); 1020 987 if (clnt != NULL) { 1021 988 /* Remove from client task list */ 1022 989 spin_lock(&clnt->cl_lock); ··· 1027 992 1028 993 rpc_release_client(clnt); 1029 994 } 1030 - rpc_task_release_transport(task); 1031 995 } 1032 996 1033 997 static 1034 998 void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) 1035 999 { 1036 1000 if (!task->tk_xprt) 1037 - task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi); 1001 + task->tk_xprt = rpc_task_get_xprt(clnt); 1038 1002 } 1039 1003 1040 1004 static
+19 -1
net/sunrpc/xprtmultipath.c
··· 36 36 if (xps->xps_nxprts == 0) 37 37 xps->xps_net = xprt->xprt_net; 38 38 xps->xps_nxprts++; 39 + xps->xps_nactive++; 39 40 } 40 41 41 42 /** ··· 63 62 { 64 63 if (unlikely(xprt == NULL)) 65 64 return; 65 + xps->xps_nactive--; 66 66 xps->xps_nxprts--; 67 67 if (xps->xps_nxprts == 0) 68 68 xps->xps_net = NULL; ··· 319 317 static 320 318 struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi) 321 319 { 322 - return xprt_iter_next_entry_multiple(xpi, 320 + struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch); 321 + struct rpc_xprt *xprt; 322 + unsigned long xprt_queuelen; 323 + unsigned long xps_queuelen; 324 + unsigned long xps_avglen; 325 + 326 + do { 327 + xprt = xprt_iter_next_entry_multiple(xpi, 323 328 xprt_switch_find_next_entry_roundrobin); 329 + if (xprt == NULL) 330 + break; 331 + xprt_queuelen = atomic_long_read(&xprt->queuelen); 332 + if (xprt_queuelen <= 2) 333 + break; 334 + xps_queuelen = atomic_long_read(&xps->xps_queuelen); 335 + xps_avglen = DIV_ROUND_UP(xps_queuelen, xps->xps_nactive); 336 + } while (xprt_queuelen > xps_avglen); 337 + return xprt; 324 338 } 325 339 326 340 static