Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipvs: sloppy TCP and SCTP

This adds support for sloppy TCP and SCTP modes to IPVS.

When enabled (sysctls net.ipv4.vs.sloppy_tcp and
net.ipv4.vs.sloppy_sctp), allows IPVS to create connection state on any
packet, not just a TCP SYN (or SCTP INIT).

This allows connections to fail over from one IPVS director to another
mid-flight.

Signed-off-by: Alexander Frolkin <avf@eldamar.org.uk>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>

authored by

Alexander Frolkin and committed by
Simon Horman
c6c96c18 bba54de5

+56 -14
+24
include/net/ip_vs.h
··· 978 978 int sysctl_sync_sock_size; 979 979 int sysctl_cache_bypass; 980 980 int sysctl_expire_nodest_conn; 981 + int sysctl_sloppy_tcp; 982 + int sysctl_sloppy_sctp; 981 983 int sysctl_expire_quiescent_template; 982 984 int sysctl_sync_threshold[2]; 983 985 unsigned int sysctl_sync_refresh_period; ··· 1022 1020 #define DEFAULT_SYNC_THRESHOLD 3 1023 1021 #define DEFAULT_SYNC_PERIOD 50 1024 1022 #define DEFAULT_SYNC_VER 1 1023 + #define DEFAULT_SLOPPY_TCP 0 1024 + #define DEFAULT_SLOPPY_SCTP 0 1025 1025 #define DEFAULT_SYNC_REFRESH_PERIOD (0U * HZ) 1026 1026 #define DEFAULT_SYNC_RETRIES 0 1027 1027 #define IPVS_SYNC_WAKEUP_RATE 8 ··· 1058 1054 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) 1059 1055 { 1060 1056 return ipvs->sysctl_sync_ver; 1057 + } 1058 + 1059 + static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs) 1060 + { 1061 + return ipvs->sysctl_sloppy_tcp; 1062 + } 1063 + 1064 + static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs) 1065 + { 1066 + return ipvs->sysctl_sloppy_sctp; 1061 1067 } 1062 1068 1063 1069 static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) ··· 1121 1107 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) 1122 1108 { 1123 1109 return DEFAULT_SYNC_VER; 1110 + } 1111 + 1112 + static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs) 1113 + { 1114 + return DEFAULT_SLOPPY_TCP; 1115 + } 1116 + 1117 + static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs) 1118 + { 1119 + return DEFAULT_SLOPPY_SCTP; 1124 1120 } 1125 1121 1126 1122 static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
+14
net/netfilter/ipvs/ip_vs_ctl.c
··· 1739 1739 .proc_handler = proc_dointvec, 1740 1740 }, 1741 1741 { 1742 + .procname = "sloppy_tcp", 1743 + .maxlen = sizeof(int), 1744 + .mode = 0644, 1745 + .proc_handler = proc_dointvec, 1746 + }, 1747 + { 1748 + .procname = "sloppy_sctp", 1749 + .maxlen = sizeof(int), 1750 + .mode = 0644, 1751 + .proc_handler = proc_dointvec, 1752 + }, 1753 + { 1742 1754 .procname = "expire_quiescent_template", 1743 1755 .maxlen = sizeof(int), 1744 1756 .mode = 0644, ··· 3735 3723 tbl[idx++].data = &ipvs->sysctl_sync_sock_size; 3736 3724 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 3737 3725 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 3726 + tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; 3727 + tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; 3738 3728 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 3739 3729 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 3740 3730 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
+10 -8
net/netfilter/ipvs/ip_vs_proto_sctp.c
··· 15 15 { 16 16 struct net *net; 17 17 struct ip_vs_service *svc; 18 + struct netns_ipvs *ipvs; 18 19 sctp_chunkhdr_t _schunkh, *sch; 19 20 sctp_sctphdr_t *sh, _sctph; 20 21 ··· 28 27 if (sch == NULL) 29 28 return 0; 30 29 net = skb_net(skb); 30 + ipvs = net_ipvs(net); 31 31 rcu_read_lock(); 32 - if ((sch->type == SCTP_CID_INIT) && 32 + if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && 33 33 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, 34 34 &iph->daddr, sh->dest))) { 35 35 int ignored; 36 36 37 - if (ip_vs_todrop(net_ipvs(net))) { 37 + if (ip_vs_todrop(ipvs)) { 38 38 /* 39 39 * It seems that we are very loaded. 40 40 * We have to drop this packet :( ··· 234 232 * STATE : IP_VS_SCTP_S_NONE 235 233 */ 236 234 /*next state *//*event */ 237 - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, 235 + {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, 238 236 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, 239 237 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, 240 238 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, 241 - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, 239 + {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, 242 240 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, 243 - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, 241 + {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, 244 242 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, 245 - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, 243 + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, 246 244 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, 247 245 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, 248 246 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, 249 - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, 247 + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, 250 248 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, 251 - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, 249 + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, 252 250 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, 253 251 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, 254 252 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ },
+8 -6
net/netfilter/ipvs/ip_vs_proto_tcp.c
··· 39 39 struct net *net; 40 40 struct ip_vs_service *svc; 41 41 struct tcphdr _tcph, *th; 42 + struct netns_ipvs *ipvs; 42 43 43 44 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); 44 45 if (th == NULL) { ··· 47 46 return 0; 48 47 } 49 48 net = skb_net(skb); 49 + ipvs = net_ipvs(net); 50 50 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 51 51 rcu_read_lock(); 52 - if (th->syn && 52 + if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst && 53 53 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, 54 54 &iph->daddr, th->dest))) { 55 55 int ignored; 56 56 57 - if (ip_vs_todrop(net_ipvs(net))) { 57 + if (ip_vs_todrop(ipvs)) { 58 58 /* 59 59 * It seems that we are very loaded. 60 60 * We have to drop this packet :( ··· 403 401 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ 404 402 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, 405 403 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }}, 406 - /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, 404 + /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, 407 405 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }}, 408 406 409 407 /* OUTPUT */ ··· 417 415 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ 418 416 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, 419 417 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, 420 - /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, 418 + /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, 421 419 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 422 420 }; 423 421 ··· 426 424 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ 427 425 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }}, 428 426 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }}, 429 - /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, 427 + /*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, 430 428 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 431 429 432 430 /* OUTPUT */ ··· 440 438 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ 441 439 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }}, 442 440 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, 443 - /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, 441 + /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, 444 442 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 445 443 }; 446 444