Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: ipv6: add second dif to inet6 socket lookups

Add a second device index, sdif, to inet6 socket lookups. sdif is the
index for ingress devices enslaved to an l3mdev. It allows the lookups
to consider the enslaved device as well as the L3 domain when searching
for a socket.

TCP moves the data in the cb. Prior to tcp_v4_rcv (e.g., early demux) the
ingress index is obtained from IPCB using inet_sdif and after tcp_v4_rcv
tcp_v4_sdif is used.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

David Ahern and committed by
David S. Miller
4297a0ef 1801b570

+56 -32
+13 -9
include/net/inet6_hashtables.h
··· 49 49 const struct in6_addr *saddr, 50 50 const __be16 sport, 51 51 const struct in6_addr *daddr, 52 - const u16 hnum, const int dif); 52 + const u16 hnum, const int dif, 53 + const int sdif); 53 54 54 55 struct sock *inet6_lookup_listener(struct net *net, 55 56 struct inet_hashinfo *hashinfo, ··· 58 57 const struct in6_addr *saddr, 59 58 const __be16 sport, 60 59 const struct in6_addr *daddr, 61 - const unsigned short hnum, const int dif); 60 + const unsigned short hnum, 61 + const int dif, const int sdif); 62 62 63 63 static inline struct sock *__inet6_lookup(struct net *net, 64 64 struct inet_hashinfo *hashinfo, ··· 68 66 const __be16 sport, 69 67 const struct in6_addr *daddr, 70 68 const u16 hnum, 71 - const int dif, 69 + const int dif, const int sdif, 72 70 bool *refcounted) 73 71 { 74 72 struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, 75 - sport, daddr, hnum, dif); 73 + sport, daddr, hnum, 74 + dif, sdif); 76 75 *refcounted = true; 77 76 if (sk) 78 77 return sk; 79 78 *refcounted = false; 80 79 return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, 81 - daddr, hnum, dif); 80 + daddr, hnum, dif, sdif); 82 81 } 83 82 84 83 static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, 85 84 struct sk_buff *skb, int doff, 86 85 const __be16 sport, 87 86 const __be16 dport, 88 - int iif, 87 + int iif, int sdif, 89 88 bool *refcounted) 90 89 { 91 90 struct sock *sk = skb_steal_sock(skb); ··· 98 95 return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, 99 96 doff, &ipv6_hdr(skb)->saddr, sport, 100 97 &ipv6_hdr(skb)->daddr, ntohs(dport), 101 - iif, refcounted); 98 + iif, sdif, refcounted); 102 99 } 103 100 104 101 struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, ··· 110 107 int inet6_hash(struct sock *sk); 111 108 #endif /* IS_ENABLED(CONFIG_IPV6) */ 112 109 113 - #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ 110 + #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \ 114 111 (((__sk)->sk_portpair == (__ports)) && \ 115 112 ((__sk)->sk_family == AF_INET6) && \ 116 113 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ 117 114 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ 118 115 (!(__sk)->sk_bound_dev_if || \ 119 - ((__sk)->sk_bound_dev_if == (__dif))) && \ 116 + ((__sk)->sk_bound_dev_if == (__dif)) || \ 117 + ((__sk)->sk_bound_dev_if == (__sdif))) && \ 120 118 net_eq(sock_net(__sk), (__net))) 121 119 122 120 #endif /* _INET6_HASHTABLES_H */
+10
include/net/tcp.h
··· 827 827 828 828 return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; 829 829 } 830 + 831 + /* TCP_SKB_CB reference means this can not be used from early demux */ 832 + static inline int tcp_v6_sdif(const struct sk_buff *skb) 833 + { 834 + #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 835 + if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags)) 836 + return TCP_SKB_CB(skb)->header.h6.iif; 837 + #endif 838 + return 0; 839 + } 830 840 #endif 831 841 832 842 /* TCP_SKB_CB reference means this can not be used from early demux */
+2 -2
net/dccp/ipv6.c
··· 89 89 sk = __inet6_lookup_established(net, &dccp_hashinfo, 90 90 &hdr->daddr, dh->dccph_dport, 91 91 &hdr->saddr, ntohs(dh->dccph_sport), 92 - inet6_iif(skb)); 92 + inet6_iif(skb), 0); 93 93 94 94 if (!sk) { 95 95 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ··· 687 687 lookup: 688 688 sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), 689 689 dh->dccph_sport, dh->dccph_dport, 690 - inet6_iif(skb), &refcounted); 690 + inet6_iif(skb), 0, &refcounted); 691 691 if (!sk) { 692 692 dccp_pr_debug("failed to look up flow ID in table and " 693 693 "get corresponding socket\n");
+17 -11
net/ipv6/inet6_hashtables.c
··· 56 56 const __be16 sport, 57 57 const struct in6_addr *daddr, 58 58 const u16 hnum, 59 - const int dif) 59 + const int dif, const int sdif) 60 60 { 61 61 struct sock *sk; 62 62 const struct hlist_nulls_node *node; ··· 73 73 sk_nulls_for_each_rcu(sk, node, &head->chain) { 74 74 if (sk->sk_hash != hash) 75 75 continue; 76 - if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) 76 + if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif)) 77 77 continue; 78 78 if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) 79 79 goto out; 80 80 81 - if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { 81 + if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) { 82 82 sock_gen_put(sk); 83 83 goto begin; 84 84 } ··· 96 96 static inline int compute_score(struct sock *sk, struct net *net, 97 97 const unsigned short hnum, 98 98 const struct in6_addr *daddr, 99 - const int dif, bool exact_dif) 99 + const int dif, const int sdif, bool exact_dif) 100 100 { 101 101 int score = -1; 102 102 ··· 110 110 score++; 111 111 } 112 112 if (sk->sk_bound_dev_if || exact_dif) { 113 - if (sk->sk_bound_dev_if != dif) 113 + bool dev_match = (sk->sk_bound_dev_if == dif || 114 + sk->sk_bound_dev_if == sdif); 115 + 116 + if (exact_dif && !dev_match) 114 117 return -1; 115 - score++; 118 + if (sk->sk_bound_dev_if && dev_match) 119 + score++; 116 120 } 117 121 if (sk->sk_incoming_cpu == raw_smp_processor_id()) 118 122 score++; ··· 130 126 struct sk_buff *skb, int doff, 131 127 const struct in6_addr *saddr, 132 128 const __be16 sport, const struct in6_addr *daddr, 133 - const unsigned short hnum, const int dif) 129 + const unsigned short hnum, const int dif, const int sdif) 134 130 { 135 131 unsigned int hash = inet_lhashfn(net, hnum); 136 132 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; ··· 140 136 u32 phash = 0; 141 137 142 138 sk_for_each(sk, &ilb->head) { 143 - score = compute_score(sk, net, hnum, daddr, dif, exact_dif); 139 + score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); 144 140 if (score > hiscore) { 145 141 reuseport = sk->sk_reuseport; 146 142 if (reuseport) { ··· 175 171 bool refcounted; 176 172 177 173 sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, 178 - ntohs(dport), dif, &refcounted); 174 + ntohs(dport), dif, 0, &refcounted); 179 175 if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) 180 176 sk = NULL; 181 177 return sk; ··· 191 187 const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; 192 188 const struct in6_addr *saddr = &sk->sk_v6_daddr; 193 189 const int dif = sk->sk_bound_dev_if; 194 - const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); 195 190 struct net *net = sock_net(sk); 191 + const int sdif = l3mdev_master_ifindex_by_index(net, dif); 192 + const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); 196 193 const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, 197 194 inet->inet_dport); 198 195 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); ··· 208 203 if (sk2->sk_hash != hash) 209 204 continue; 210 205 211 - if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) { 206 + if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, 207 + dif, sdif))) { 212 208 if (sk2->sk_state == TCP_TIME_WAIT) { 213 209 tw = inet_twsk(sk2); 214 210 if (twsk_unique(sk, sk2, twp))
+8 -5
net/ipv6/tcp_ipv6.c
··· 350 350 sk = __inet6_lookup_established(net, &tcp_hashinfo, 351 351 &hdr->daddr, th->dest, 352 352 &hdr->saddr, ntohs(th->source), 353 - skb->dev->ifindex); 353 + skb->dev->ifindex, inet6_sdif(skb)); 354 354 355 355 if (!sk) { 356 356 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ··· 918 918 &tcp_hashinfo, NULL, 0, 919 919 &ipv6h->saddr, 920 920 th->source, &ipv6h->daddr, 921 - ntohs(th->source), tcp_v6_iif(skb)); 921 + ntohs(th->source), tcp_v6_iif(skb), 922 + tcp_v6_sdif(skb)); 922 923 if (!sk1) 923 924 goto out; 924 925 ··· 1398 1397 1399 1398 static int tcp_v6_rcv(struct sk_buff *skb) 1400 1399 { 1400 + int sdif = inet6_sdif(skb); 1401 1401 const struct tcphdr *th; 1402 1402 const struct ipv6hdr *hdr; 1403 1403 bool refcounted; ··· 1432 1430 1433 1431 lookup: 1434 1432 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1435 - th->source, th->dest, inet6_iif(skb), 1433 + th->source, th->dest, inet6_iif(skb), sdif, 1436 1434 &refcounted); 1437 1435 if (!sk) 1438 1436 goto no_tcp_socket; ··· 1565 1563 skb, __tcp_hdrlen(th), 1566 1564 &ipv6_hdr(skb)->saddr, th->source, 1567 1565 &ipv6_hdr(skb)->daddr, 1568 - ntohs(th->dest), tcp_v6_iif(skb)); 1566 + ntohs(th->dest), tcp_v6_iif(skb), 1567 + sdif); 1569 1568 if (sk2) { 1570 1569 struct inet_timewait_sock *tw = inet_twsk(sk); 1571 1570 inet_twsk_deschedule_put(tw); ··· 1613 1610 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1614 1611 &hdr->saddr, th->source, 1615 1612 &hdr->daddr, ntohs(th->dest), 1616 - inet6_iif(skb)); 1613 + inet6_iif(skb), inet6_sdif(skb)); 1617 1614 if (sk) { 1618 1615 skb->sk = sk; 1619 1616 skb->destructor = sock_edemux;
+4 -3
net/ipv6/udp.c
··· 897 897 static struct sock *__udp6_lib_demux_lookup(struct net *net, 898 898 __be16 loc_port, const struct in6_addr *loc_addr, 899 899 __be16 rmt_port, const struct in6_addr *rmt_addr, 900 - int dif) 900 + int dif, int sdif) 901 901 { 902 902 unsigned short hnum = ntohs(loc_port); 903 903 unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum); ··· 908 908 909 909 udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { 910 910 if (sk->sk_state == TCP_ESTABLISHED && 911 - INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif)) 911 + INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif)) 912 912 return sk; 913 913 /* Only check first socket in chain */ 914 914 break; ··· 923 923 struct sock *sk; 924 924 struct dst_entry *dst; 925 925 int dif = skb->dev->ifindex; 926 + int sdif = inet6_sdif(skb); 926 927 927 928 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 928 929 sizeof(struct udphdr))) ··· 935 934 sk = __udp6_lib_demux_lookup(net, uh->dest, 936 935 &ipv6_hdr(skb)->daddr, 937 936 uh->source, &ipv6_hdr(skb)->saddr, 938 - dif); 937 + dif, sdif); 939 938 else 940 939 return; 941 940
+2 -2
net/netfilter/xt_TPROXY.c
··· 195 195 thoff + __tcp_hdrlen(tcph), 196 196 saddr, sport, 197 197 daddr, ntohs(dport), 198 - in->ifindex); 198 + in->ifindex, 0); 199 199 200 200 if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) 201 201 sk = NULL; ··· 208 208 case NFT_LOOKUP_ESTABLISHED: 209 209 sk = __inet6_lookup_established(net, &tcp_hashinfo, 210 210 saddr, sport, daddr, ntohs(dport), 211 - in->ifindex); 211 + in->ifindex, 0); 212 212 break; 213 213 default: 214 214 BUG();