Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipvs: Remove all remaining references to rt->rt_{src,dst}

Remove all remaining references to rt->rt_{src,dst}
by using dest->dst_saddr to cache saddr (used for TUN mode).
For ICMP in FORWARD hook just restrict the rt_mode for NAT
to disable LOCALNODE. All other modes do not allow
IP_VS_RT_MODE_RDR, so we should be safe with the ICMP
forwarding. Using cp->daddr as replacement for rt_dst
is safe for all modes except BYPASS, even when cp->dest is
NULL because it is cp->daddr that is used to assign cp->dest
for sync-ed connections.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Julian Anastasov and committed by
David S. Miller
c92f5ca2 44e3125c

+54 -51
+4 -5
include/net/ip_vs.h
··· 665 665 struct dst_entry *dst_cache; /* destination cache entry */ 666 666 u32 dst_rtos; /* RT_TOS(tos) for dst */ 667 667 u32 dst_cookie; 668 - #ifdef CONFIG_IP_VS_IPV6 669 - struct in6_addr dst_saddr; 670 - #endif 668 + union nf_inet_addr dst_saddr; 671 669 672 670 /* for virtual service */ 673 671 struct ip_vs_service *svc; /* service it belongs to */ ··· 1251 1253 extern int ip_vs_dr_xmit 1252 1254 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1253 1255 extern int ip_vs_icmp_xmit 1254 - (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset); 1256 + (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, 1257 + int offset, unsigned int hooknum); 1255 1258 extern void ip_vs_dst_reset(struct ip_vs_dest *dest); 1256 1259 1257 1260 #ifdef CONFIG_IP_VS_IPV6 ··· 1266 1267 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 1267 1268 extern int ip_vs_icmp_xmit_v6 1268 1269 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, 1269 - int offset); 1270 + int offset, unsigned int hooknum); 1270 1271 #endif 1271 1272 1272 1273 #ifdef CONFIG_SYSCTL
+2 -22
net/netfilter/ipvs/ip_vs_core.c
··· 1382 1382 ip_vs_in_stats(cp, skb); 1383 1383 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) 1384 1384 offset += 2 * sizeof(__u16); 1385 - verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); 1386 - /* LOCALNODE from FORWARD hook is not supported */ 1387 - if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD && 1388 - skb_rtable(skb)->rt_flags & RTCF_LOCAL) { 1389 - IP_VS_DBG(1, "%s(): " 1390 - "local delivery to %pI4 but in FORWARD\n", 1391 - __func__, &skb_rtable(skb)->rt_dst); 1392 - verdict = NF_DROP; 1393 - } 1385 + verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum); 1394 1386 1395 1387 out: 1396 1388 __ip_vs_conn_put(cp); ··· 1404 1412 struct ip_vs_protocol *pp; 1405 1413 struct ip_vs_proto_data *pd; 1406 1414 unsigned int offset, verdict; 1407 - struct rt6_info *rt; 1408 1415 1409 1416 *related = 1; 1410 1417 ··· 1465 1474 if (!cp) 1466 1475 return NF_ACCEPT; 1467 1476 1468 - verdict = NF_DROP; 1469 - 1470 1477 /* do the statistics and put it back */ 1471 1478 ip_vs_in_stats(cp, skb); 1472 1479 if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || 1473 1480 IPPROTO_SCTP == cih->nexthdr) 1474 1481 offset += 2 * sizeof(__u16); 1475 - verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); 1476 - /* LOCALNODE from FORWARD hook is not supported */ 1477 - if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD && 1478 - (rt = (struct rt6_info *) skb_dst(skb)) && 1479 - rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK) { 1480 - IP_VS_DBG(1, "%s(): " 1481 - "local delivery to %pI6 but in FORWARD\n", 1482 - __func__, &rt->rt6i_dst); 1483 - verdict = NF_DROP; 1484 - } 1482 + verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum); 1485 1483 1486 1484 __ip_vs_conn_put(cp); 1487 1485
+48 -24
net/netfilter/ipvs/ip_vs_xmit.c
··· 87 87 /* Get route to destination or remote server */ 88 88 static struct rtable * 89 89 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, 90 - __be32 daddr, u32 rtos, int rt_mode) 90 + __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) 91 91 { 92 92 struct net *net = dev_net(skb_dst(skb)->dev); 93 93 struct rtable *rt; /* Route to the other host */ ··· 98 98 spin_lock(&dest->dst_lock); 99 99 if (!(rt = (struct rtable *) 100 100 __ip_vs_dst_check(dest, rtos))) { 101 - rt = ip_route_output(net, dest->addr.ip, 0, rtos, 0); 101 + struct flowi4 fl4; 102 + 103 + memset(&fl4, 0, sizeof(fl4)); 104 + fl4.daddr = dest->addr.ip; 105 + fl4.flowi4_tos = rtos; 106 + rt = ip_route_output_key(net, &fl4); 102 107 if (IS_ERR(rt)) { 103 108 spin_unlock(&dest->dst_lock); 104 109 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", ··· 111 106 return NULL; 112 107 } 113 108 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); 114 - IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", 115 - &dest->addr.ip, 109 + dest->dst_saddr.ip = fl4.saddr; 110 + IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " 111 + "rtos=%X\n", 112 + &dest->addr.ip, &dest->dst_saddr.ip, 116 113 atomic_read(&rt->dst.__refcnt), rtos); 117 114 } 118 115 daddr = dest->addr.ip; 116 + if (ret_saddr) 117 + *ret_saddr = dest->dst_saddr.ip; 119 118 spin_unlock(&dest->dst_lock); 120 119 } else { 121 - rt = ip_route_output(net, daddr, 0, rtos, 0); 120 + struct flowi4 fl4; 121 + 122 + memset(&fl4, 0, sizeof(fl4)); 123 + fl4.daddr = daddr; 124 + fl4.flowi4_tos = rtos; 125 + rt = ip_route_output_key(net, &fl4); 122 126 if (IS_ERR(rt)) { 123 127 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 124 128 &daddr); 125 129 return NULL; 126 130 } 131 + if (ret_saddr) 132 + *ret_saddr = fl4.saddr; 127 133 } 128 134 129 135 local = rt->rt_flags & RTCF_LOCAL; ··· 265 249 u32 cookie; 266 250 267 251 dst = __ip_vs_route_output_v6(net, &dest->addr.in6, 268 - &dest->dst_saddr, 252 + &dest->dst_saddr.in6, 269 253 do_xfrm); 270 254 if (!dst) { 271 255 spin_unlock(&dest->dst_lock); ··· 275 259 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 276 260 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); 277 261 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 278 - &dest->addr.in6, &dest->dst_saddr, 262 + &dest->addr.in6, &dest->dst_saddr.in6, 279 263 atomic_read(&rt->dst.__refcnt)); 280 264 } 281 265 if (ret_saddr) 282 - ipv6_addr_copy(ret_saddr, &dest->dst_saddr); 266 + ipv6_addr_copy(ret_saddr, &dest->dst_saddr.in6); 283 267 spin_unlock(&dest->dst_lock); 284 268 } else { 285 269 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); ··· 402 386 EnterFunction(10); 403 387 404 388 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), 405 - IP_VS_RT_MODE_NON_LOCAL))) 389 + IP_VS_RT_MODE_NON_LOCAL, NULL))) 406 390 goto tx_error_icmp; 407 391 408 392 /* MTU checking */ ··· 534 518 RT_TOS(iph->tos), 535 519 IP_VS_RT_MODE_LOCAL | 536 520 IP_VS_RT_MODE_NON_LOCAL | 537 - IP_VS_RT_MODE_RDR))) 521 + IP_VS_RT_MODE_RDR, NULL))) 538 522 goto tx_error_icmp; 539 523 local = rt->rt_flags & RTCF_LOCAL; 540 524 /* ··· 556 540 #endif 557 541 558 542 /* From world but DNAT to loopback address? */ 559 - if (local && ipv4_is_loopback(rt->rt_dst) && 543 + if (local && ipv4_is_loopback(cp->daddr.ip) && 560 544 rt_is_input_route(skb_rtable(skb))) { 561 545 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " 562 546 "stopping DNAT to loopback address"); ··· 767 751 struct ip_vs_protocol *pp) 768 752 { 769 753 struct rtable *rt; /* Route to the other host */ 754 + __be32 saddr; /* Source for tunnel */ 770 755 struct net_device *tdev; /* Device to other host */ 771 756 struct iphdr *old_iph = ip_hdr(skb); 772 757 u8 tos = old_iph->tos; ··· 781 764 782 765 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 783 766 RT_TOS(tos), IP_VS_RT_MODE_LOCAL | 784 - IP_VS_RT_MODE_NON_LOCAL))) 767 + IP_VS_RT_MODE_NON_LOCAL, 768 + &saddr))) 785 769 goto tx_error_icmp; 786 770 if (rt->rt_flags & RTCF_LOCAL) { 787 771 ip_rt_put(rt); ··· 850 832 iph->frag_off = df; 851 833 iph->protocol = IPPROTO_IPIP; 852 834 iph->tos = tos; 853 - iph->daddr = rt->rt_dst; 854 - iph->saddr = rt->rt_src; 835 + iph->daddr = cp->daddr.ip; 836 + iph->saddr = saddr; 855 837 iph->ttl = old_iph->ttl; 856 838 ip_select_ident(iph, &rt->dst, NULL); 857 839 ··· 1014 996 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1015 997 RT_TOS(iph->tos), 1016 998 IP_VS_RT_MODE_LOCAL | 1017 - IP_VS_RT_MODE_NON_LOCAL))) 999 + IP_VS_RT_MODE_NON_LOCAL, NULL))) 1018 1000 goto tx_error_icmp; 1019 1001 if (rt->rt_flags & RTCF_LOCAL) { 1020 1002 ip_rt_put(rt); ··· 1132 1114 */ 1133 1115 int 1134 1116 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1135 - struct ip_vs_protocol *pp, int offset) 1117 + struct ip_vs_protocol *pp, int offset, unsigned int hooknum) 1136 1118 { 1137 1119 struct rtable *rt; /* Route to the other host */ 1138 1120 int mtu; 1139 1121 int rc; 1140 1122 int local; 1123 + int rt_mode; 1141 1124 1142 1125 EnterFunction(10); 1143 1126 ··· 1159 1140 * mangle and send the packet here (only for VS/NAT) 1160 1141 */ 1161 1142 1143 + /* LOCALNODE from FORWARD hook is not supported */ 1144 + rt_mode = (hooknum != NF_INET_FORWARD) ? 1145 + IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1146 + IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1162 1147 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1163 1148 RT_TOS(ip_hdr(skb)->tos), 1164 - IP_VS_RT_MODE_LOCAL | 1165 - IP_VS_RT_MODE_NON_LOCAL | 1166 - IP_VS_RT_MODE_RDR))) 1149 + rt_mode, NULL))) 1167 1150 goto tx_error_icmp; 1168 1151 local = rt->rt_flags & RTCF_LOCAL; 1169 1152 ··· 1188 1167 #endif 1189 1168 1190 1169 /* From world but DNAT to loopback address? */ 1191 - if (local && ipv4_is_loopback(rt->rt_dst) && 1170 + if (local && ipv4_is_loopback(cp->daddr.ip) && 1192 1171 rt_is_input_route(skb_rtable(skb))) { 1193 1172 IP_VS_DBG(1, "%s(): " 1194 1173 "stopping DNAT to loopback %pI4\n", ··· 1253 1232 #ifdef CONFIG_IP_VS_IPV6 1254 1233 int 1255 1234 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1256 - struct ip_vs_protocol *pp, int offset) 1235 + struct ip_vs_protocol *pp, int offset, unsigned int hooknum) 1257 1236 { 1258 1237 struct rt6_info *rt; /* Route to the other host */ 1259 1238 int mtu; 1260 1239 int rc; 1261 1240 int local; 1241 + int rt_mode; 1262 1242 1263 1243 EnterFunction(10); 1264 1244 ··· 1280 1258 * mangle and send the packet here (only for VS/NAT) 1281 1259 */ 1282 1260 1261 + /* LOCALNODE from FORWARD hook is not supported */ 1262 + rt_mode = (hooknum != NF_INET_FORWARD) ? 1263 + IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1264 + IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1283 1265 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1284 - 0, (IP_VS_RT_MODE_LOCAL | 1285 - IP_VS_RT_MODE_NON_LOCAL | 1286 - IP_VS_RT_MODE_RDR)))) 1266 + 0, rt_mode))) 1287 1267 goto tx_error_icmp; 1288 1268 1289 1269 local = __ip_vs_is_local_route6(rt);