Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net, ip6_tunnel: enhance tunnel locate with link check

With ipip, it is possible to create an extra interface explicitly
attached to a given physical interface:

# ip link show tunl0
4: tunl0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ipip 0.0.0.0 brd 0.0.0.0
# ip link add tunl1 type ipip dev eth0
# ip link show tunl1
6: tunl1@eth0: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ipip 0.0.0.0 brd 0.0.0.0

But it is not possible with ip6tnl:

# ip link show ip6tnl0
5: ip6tnl0@NONE: <NOARP> mtu 1452 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/tunnel6 :: brd ::
# ip link add ip6tnl1 type ip6tnl dev eth0
RTNETLINK answers: File exists

This patch aims to make it possible by adding link comparaison in both
tunnel locate and lookup functions; we also modify mtu calculation when
attached to an interface with a lower mtu.

This permits to make use of x-netns communication by moving the newly
created tunnel in a given netns.

Signed-off-by: William Dauchy <w.dauchy@criteo.com>
Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

William Dauchy and committed by
David S. Miller
5fdcce21 b32cb6fc

+47 -21
+47 -21
net/ipv6/ip6_tunnel.c
··· 121 121 122 122 /** 123 123 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 124 + * @link: ifindex of underlying interface 124 125 * @remote: the address of the tunnel exit-point 125 126 * @local: the address of the tunnel entry-point 126 127 * ··· 135 134 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 136 135 137 136 static struct ip6_tnl * 138 - ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) 137 + ip6_tnl_lookup(struct net *net, int link, 138 + const struct in6_addr *remote, const struct in6_addr *local) 139 139 { 140 140 unsigned int hash = HASH(remote, local); 141 - struct ip6_tnl *t; 141 + struct ip6_tnl *t, *cand = NULL; 142 142 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 143 143 struct in6_addr any; 144 144 145 145 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { 146 - if (ipv6_addr_equal(local, &t->parms.laddr) && 147 - ipv6_addr_equal(remote, &t->parms.raddr) && 148 - (t->dev->flags & IFF_UP)) 146 + if (!ipv6_addr_equal(local, &t->parms.laddr) || 147 + !ipv6_addr_equal(remote, &t->parms.raddr) || 148 + !(t->dev->flags & IFF_UP)) 149 + continue; 150 + 151 + if (link == t->parms.link) 149 152 return t; 153 + else 154 + cand = t; 150 155 } 151 156 152 157 memset(&any, 0, sizeof(any)); 153 158 hash = HASH(&any, local); 154 159 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { 155 - if (ipv6_addr_equal(local, &t->parms.laddr) && 156 - ipv6_addr_any(&t->parms.raddr) && 157 - (t->dev->flags & IFF_UP)) 160 + if (!ipv6_addr_equal(local, &t->parms.laddr) || 161 + !ipv6_addr_any(&t->parms.raddr) || 162 + !(t->dev->flags & IFF_UP)) 163 + continue; 164 + 165 + if (link == t->parms.link) 158 166 return t; 167 + else if (!cand) 168 + cand = t; 159 169 } 160 170 161 171 hash = HASH(remote, &any); 162 172 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { 163 - if (ipv6_addr_equal(remote, &t->parms.raddr) && 164 - ipv6_addr_any(&t->parms.laddr) && 165 - (t->dev->flags & IFF_UP)) 173 + if (!ipv6_addr_equal(remote, &t->parms.raddr) || 174 + !ipv6_addr_any(&t->parms.laddr) || 175 + !(t->dev->flags & IFF_UP)) 176 + continue; 177 + 178 + if (link == t->parms.link) 166 179 return t; 180 + else if (!cand) 181 + cand = t; 167 182 } 183 + 184 + if (cand) 185 + return cand; 168 186 169 187 t = rcu_dereference(ip6n->collect_md_tun); 170 188 if (t && t->dev->flags & IFF_UP) ··· 371 351 (t = rtnl_dereference(*tp)) != NULL; 372 352 tp = &t->next) { 373 353 if (ipv6_addr_equal(local, &t->parms.laddr) && 374 - ipv6_addr_equal(remote, &t->parms.raddr)) { 354 + ipv6_addr_equal(remote, &t->parms.raddr) && 355 + p->link == t->parms.link) { 375 356 if (create) 376 357 return ERR_PTR(-EEXIST); 377 358 ··· 506 485 processing of the error. */ 507 486 508 487 rcu_read_lock(); 509 - t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr); 488 + t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->daddr, &ipv6h->saddr); 510 489 if (!t) 511 490 goto out; 512 491 ··· 908 887 int ret = -1; 909 888 910 889 rcu_read_lock(); 911 - t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); 890 + t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->saddr, &ipv6h->daddr); 912 891 913 892 if (t) { 914 893 u8 tproto = READ_ONCE(t->parms.proto); ··· 1441 1420 static void ip6_tnl_link_config(struct ip6_tnl *t) 1442 1421 { 1443 1422 struct net_device *dev = t->dev; 1423 + struct net_device *tdev = NULL; 1444 1424 struct __ip6_tnl_parm *p = &t->parms; 1445 1425 struct flowi6 *fl6 = &t->fl.u.ip6; 1426 + unsigned int mtu; 1446 1427 int t_hlen; 1447 1428 1448 1429 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); ··· 1480 1457 struct rt6_info *rt = rt6_lookup(t->net, 1481 1458 &p->raddr, &p->laddr, 1482 1459 p->link, NULL, strict); 1460 + if (rt) { 1461 + tdev = rt->dst.dev; 1462 + ip6_rt_put(rt); 1463 + } 1483 1464 1484 - if (!rt) 1485 - return; 1465 + if (!tdev && p->link) 1466 + tdev = __dev_get_by_index(t->net, p->link); 1486 1467 1487 - if (rt->dst.dev) { 1488 - dev->hard_header_len = rt->dst.dev->hard_header_len + 1489 - t_hlen; 1468 + if (tdev) { 1469 + dev->hard_header_len = tdev->hard_header_len + t_hlen; 1470 + mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); 1490 1471 1491 - dev->mtu = rt->dst.dev->mtu - t_hlen; 1472 + dev->mtu = mtu - t_hlen; 1492 1473 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1493 1474 dev->mtu -= 8; 1494 1475 1495 1476 if (dev->mtu < IPV6_MIN_MTU) 1496 1477 dev->mtu = IPV6_MIN_MTU; 1497 1478 } 1498 - ip6_rt_put(rt); 1499 1479 } 1500 1480 } 1501 1481