Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'ipv6-Move-exceptions-to-fib6_nh-and-make-it-optional-in-a-fib6_info'

David Ahern says:

====================
ipv6: Move exceptions to fib6_nh and make it optional in a fib6_info

Patches 1 and 4 move pcpu and exception caches from fib6_info to fib6_nh.
With respect to the current FIB entries this is only a movement from one
struct to another contained within the first.

Patch 2 refactors the core logic of fib6_drop_pcpu_from into a helper
that is invoked per fib6_nh.

Patch 3 refactors exception handling in a similar way - creating a bunch
of helpers that can be invoked per fib6_nh with the goal of making patch
4 easier to review as well as creating the code needed for nexthop
objects.

Patch 5 makes a fib6_nh at the end of a fib6_info an array similar to
IPv4 and its fib_info. For the current fib entry model, all fib6_info
will have a fib6_nh allocated for it.

Patch 6 refactors ip6_route_del moving the code for deleting an
exception entry into a new function.

Patch 7 adds tests for redirect route exceptions. The new test was
written against 5.1 (before any of the nexthop refactoring). It and the
pmtu.sh selftest exercise the exception code paths - from creating
exceptions to cleaning them up on device delete. All tests pass without
any rcu locking or memleak warnings.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+820 -244
+16 -15
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
··· 2886 2886 return false; 2887 2887 2888 2888 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { 2889 - struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh; 2889 + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; 2890 2890 struct in6_addr *gw; 2891 2891 int ifindex, weight; 2892 2892 ··· 2958 2958 struct net_device *dev; 2959 2959 2960 2960 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { 2961 - dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev; 2961 + dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev; 2962 2962 val ^= dev->ifindex; 2963 2963 } 2964 2964 ··· 3960 3960 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; 3961 3961 struct fib6_info *rt = mlxsw_sp_rt6->rt; 3962 3962 3963 - if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev && 3963 + if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev && 3964 3964 ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, 3965 - &rt->fib6_nh.fib_nh_gw6)) 3965 + &rt->fib6_nh->fib_nh_gw6)) 3966 3966 return nh; 3967 3967 continue; 3968 3968 } ··· 4022 4022 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || 4023 4023 fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) { 4024 4024 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, 4025 - list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD; 4025 + list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; 4026 4026 return; 4027 4027 } 4028 4028 4029 4029 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { 4030 4030 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; 4031 - struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh; 4031 + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; 4032 4032 struct mlxsw_sp_nexthop *nh; 4033 4033 4034 4034 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); ··· 4050 4050 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { 4051 4051 struct fib6_info *rt = mlxsw_sp_rt6->rt; 4052 4052 4053 - rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD; 4053 + rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; 4054 4054 } 4055 4055 } 4056 4056 ··· 4928 4928 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) 4929 4929 { 4930 4930 /* RTF_CACHE routes are ignored */ 4931 - return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family; 4931 + return !(rt->fib6_flags & RTF_ADDRCONF) && 4932 + rt->fib6_nh->fib_nh_gw_family; 4932 4933 } 4933 4934 4934 4935 static struct fib6_info * ··· 4988 4987 const struct fib6_info *rt, 4989 4988 enum mlxsw_sp_ipip_type *ret) 4990 4989 { 4991 - return rt->fib6_nh.fib_nh_dev && 4992 - mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret); 4990 + return rt->fib6_nh->fib_nh_dev && 4991 + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret); 4993 4992 } 4994 4993 4995 4994 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, ··· 4999 4998 { 5000 4999 const struct mlxsw_sp_ipip_ops *ipip_ops; 5001 5000 struct mlxsw_sp_ipip_entry *ipip_entry; 5002 - struct net_device *dev = rt->fib6_nh.fib_nh_dev; 5001 + struct net_device *dev = rt->fib6_nh->fib_nh_dev; 5003 5002 struct mlxsw_sp_rif *rif; 5004 5003 int err; 5005 5004 ··· 5042 5041 struct mlxsw_sp_nexthop *nh, 5043 5042 const struct fib6_info *rt) 5044 5043 { 5045 - struct net_device *dev = rt->fib6_nh.fib_nh_dev; 5044 + struct net_device *dev = rt->fib6_nh->fib_nh_dev; 5046 5045 5047 5046 nh->nh_grp = nh_grp; 5048 - nh->nh_weight = rt->fib6_nh.fib_nh_weight; 5049 - memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr)); 5047 + nh->nh_weight = rt->fib6_nh->fib_nh_weight; 5048 + memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr)); 5050 5049 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); 5051 5050 5052 5051 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); ··· 5069 5068 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, 5070 5069 const struct fib6_info *rt) 5071 5070 { 5072 - return rt->fib6_nh.fib_nh_gw_family || 5071 + return rt->fib6_nh->fib_nh_gw_family || 5073 5072 mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); 5074 5073 } 5075 5074
+8 -9
include/net/ip6_fib.h
··· 131 131 #ifdef CONFIG_IPV6_ROUTER_PREF 132 132 unsigned long last_probe; 133 133 #endif 134 + 135 + struct rt6_info * __percpu *rt6i_pcpu; 136 + struct rt6_exception_bucket __rcu *rt6i_exception_bucket; 134 137 }; 135 138 136 139 struct fib6_info { ··· 159 156 struct rt6key fib6_src; 160 157 struct rt6key fib6_prefsrc; 161 158 162 - struct rt6_info * __percpu *rt6i_pcpu; 163 - struct rt6_exception_bucket __rcu *rt6i_exception_bucket; 164 - 165 159 u32 fib6_metric; 166 160 u8 fib6_protocol; 167 161 u8 fib6_type; 168 - u8 exception_bucket_flushed:1, 169 - should_flush:1, 162 + u8 should_flush:1, 170 163 dst_nocount:1, 171 164 dst_nopolicy:1, 172 165 dst_host:1, 173 166 fib6_destroying:1, 174 - unused:2; 167 + unused:3; 175 168 176 - struct fib6_nh fib6_nh; 177 169 struct rcu_head rcu; 170 + struct fib6_nh fib6_nh[0]; 178 171 }; 179 172 180 173 struct rt6_info { ··· 280 281 dst_release(&rt->dst); 281 282 } 282 283 283 - struct fib6_info *fib6_info_alloc(gfp_t gfp_flags); 284 + struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh); 284 285 void fib6_info_destroy_rcu(struct rcu_head *head); 285 286 286 287 static inline void fib6_info_hold(struct fib6_info *f6i) ··· 443 444 444 445 static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) 445 446 { 446 - return f6i->fib6_nh.fib_nh_dev; 447 + return f6i->fib6_nh->fib_nh_dev; 447 448 } 448 449 449 450 int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
+2 -2
include/net/ip6_route.h
··· 70 70 { 71 71 /* the RTF_ADDRCONF flag filters out RA's */ 72 72 return !(f6i->fib6_flags & RTF_ADDRCONF) && 73 - f6i->fib6_nh.fib_nh_gw_family; 73 + f6i->fib6_nh->fib_nh_gw_family; 74 74 } 75 75 76 76 void ip6_route_input(struct sk_buff *skb); ··· 275 275 276 276 static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) 277 277 { 278 - struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh; 278 + struct fib6_nh *nha = a->fib6_nh, *nhb = b->fib6_nh; 279 279 280 280 return nha->fib_nh_dev == nhb->fib_nh_dev && 281 281 ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
+5 -5
net/ipv6/addrconf.c
··· 2421 2421 goto out; 2422 2422 2423 2423 for_each_fib6_node_rt_rcu(fn) { 2424 - if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex) 2424 + if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex) 2425 2425 continue; 2426 - if (no_gw && rt->fib6_nh.fib_nh_gw_family) 2426 + if (no_gw && rt->fib6_nh->fib_nh_gw_family) 2427 2427 continue; 2428 2428 if ((rt->fib6_flags & flags) != flags) 2429 2429 continue; ··· 6341 6341 list_for_each_entry(ifa, &idev->addr_list, if_list) { 6342 6342 spin_lock(&ifa->lock); 6343 6343 if (ifa->rt) { 6344 - struct fib6_info *rt = ifa->rt; 6344 + struct fib6_nh *nh = ifa->rt->fib6_nh; 6345 6345 int cpu; 6346 6346 6347 6347 rcu_read_lock(); 6348 6348 ifa->rt->dst_nopolicy = val ? true : false; 6349 - if (rt->rt6i_pcpu) { 6349 + if (nh->rt6i_pcpu) { 6350 6350 for_each_possible_cpu(cpu) { 6351 6351 struct rt6_info **rtp; 6352 6352 6353 - rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu); 6353 + rtp = per_cpu_ptr(nh->rt6i_pcpu, cpu); 6354 6354 addrconf_set_nopolicy(*rtp, val); 6355 6355 } 6356 6356 }
+39 -48
net/ipv6/ip6_fib.c
··· 147 147 addr[fn_bit >> 5]; 148 148 } 149 149 150 - struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) 150 + struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) 151 151 { 152 152 struct fib6_info *f6i; 153 + size_t sz = sizeof(*f6i); 153 154 154 - f6i = kzalloc(sizeof(*f6i), gfp_flags); 155 + if (with_fib6_nh) 156 + sz += sizeof(struct fib6_nh); 157 + 158 + f6i = kzalloc(sz, gfp_flags); 155 159 if (!f6i) 156 160 return NULL; 157 - 158 - f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); 159 - if (!f6i->rt6i_pcpu) { 160 - kfree(f6i); 161 - return NULL; 162 - } 163 161 164 162 INIT_LIST_HEAD(&f6i->fib6_siblings); 165 163 refcount_set(&f6i->fib6_ref, 1); ··· 168 170 void fib6_info_destroy_rcu(struct rcu_head *head) 169 171 { 170 172 struct fib6_info *f6i = container_of(head, struct fib6_info, rcu); 171 - struct rt6_exception_bucket *bucket; 172 173 173 174 WARN_ON(f6i->fib6_node); 174 175 175 - bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1); 176 - kfree(bucket); 177 - 178 - if (f6i->rt6i_pcpu) { 179 - int cpu; 180 - 181 - for_each_possible_cpu(cpu) { 182 - struct rt6_info **ppcpu_rt; 183 - struct rt6_info *pcpu_rt; 184 - 185 - ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu); 186 - pcpu_rt = *ppcpu_rt; 187 - if (pcpu_rt) { 188 - dst_dev_put(&pcpu_rt->dst); 189 - dst_release(&pcpu_rt->dst); 190 - *ppcpu_rt = NULL; 191 - } 192 - } 193 - 194 - free_percpu(f6i->rt6i_pcpu); 195 - } 196 - 197 - fib6_nh_release(&f6i->fib6_nh); 198 - 176 + fib6_nh_release(f6i->fib6_nh); 199 177 ip_fib_metrics_put(f6i->fib6_metrics); 200 - 201 178 kfree(f6i); 202 179 } 203 180 EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu); ··· 872 899 return ln; 873 900 } 874 901 875 - static void fib6_drop_pcpu_from(struct fib6_info *f6i, 876 - const struct fib6_table *table) 902 + static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, 903 + const struct fib6_info *match, 904 + const struct fib6_table *table) 877 905 { 878 906 int cpu; 879 907 880 - /* Make sure rt6_make_pcpu_route() wont add other percpu routes 881 - * while we are cleaning them here. 882 - */ 883 - f6i->fib6_destroying = 1; 884 - mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */ 908 + if (!fib6_nh->rt6i_pcpu) 909 + return; 885 910 886 911 /* release the reference to this fib entry from 887 912 * all of its cached pcpu routes ··· 888 917 struct rt6_info **ppcpu_rt; 889 918 struct rt6_info *pcpu_rt; 890 919 891 - ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu); 920 + ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); 892 921 pcpu_rt = *ppcpu_rt; 893 - if (pcpu_rt) { 922 + 923 + /* only dropping the 'from' reference if the cached route 924 + * is using 'match'. The cached pcpu_rt->from only changes 925 + * from a fib6_info to NULL (ip6_dst_destroy); it can never 926 + * change from one fib6_info reference to another 927 + */ 928 + if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) { 894 929 struct fib6_info *from; 895 930 896 931 from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); ··· 905 928 } 906 929 } 907 930 931 + static void fib6_drop_pcpu_from(struct fib6_info *f6i, 932 + const struct fib6_table *table) 933 + { 934 + struct fib6_nh *fib6_nh; 935 + 936 + /* Make sure rt6_make_pcpu_route() wont add other percpu routes 937 + * while we are cleaning them here. 938 + */ 939 + f6i->fib6_destroying = 1; 940 + mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */ 941 + 942 + fib6_nh = f6i->fib6_nh; 943 + __fib6_drop_pcpu_from(fib6_nh, f6i, table); 944 + } 945 + 908 946 static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, 909 947 struct net *net) 910 948 { 911 949 struct fib6_table *table = rt->fib6_table; 912 950 913 - if (rt->rt6i_pcpu) 914 - fib6_drop_pcpu_from(rt, table); 951 + fib6_drop_pcpu_from(rt, table); 915 952 916 953 if (refcount_read(&rt->fib6_ref) != 1) { 917 954 /* This route is used as dummy address holder in some split ··· 2305 2314 #else 2306 2315 seq_puts(seq, "00000000000000000000000000000000 00 "); 2307 2316 #endif 2308 - if (rt->fib6_nh.fib_nh_gw_family) { 2317 + if (rt->fib6_nh->fib_nh_gw_family) { 2309 2318 flags |= RTF_GATEWAY; 2310 - seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6); 2319 + seq_printf(seq, "%pi6", &rt->fib6_nh->fib_nh_gw6); 2311 2320 } else { 2312 2321 seq_puts(seq, "00000000000000000000000000000000"); 2313 2322 } 2314 2323 2315 - dev = rt->fib6_nh.fib_nh_dev; 2324 + dev = rt->fib6_nh->fib_nh_dev; 2316 2325 seq_printf(seq, " %08x %08x %08x %08x %8s\n", 2317 2326 rt->fib6_metric, refcount_read(&rt->fib6_ref), 0, 2318 2327 flags, dev ? dev->name : "");
+4 -4
net/ipv6/ndisc.c
··· 1293 1293 rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); 1294 1294 1295 1295 if (rt) { 1296 - neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6, 1297 - rt->fib6_nh.fib_nh_dev, NULL, 1296 + neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6, 1297 + rt->fib6_nh->fib_nh_dev, NULL, 1298 1298 &ipv6_hdr(skb)->saddr); 1299 1299 if (!neigh) { 1300 1300 ND_PRINTK(0, err, ··· 1323 1323 return; 1324 1324 } 1325 1325 1326 - neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6, 1327 - rt->fib6_nh.fib_nh_dev, NULL, 1326 + neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6, 1327 + rt->fib6_nh->fib_nh_dev, NULL, 1328 1328 &ipv6_hdr(skb)->saddr); 1329 1329 if (!neigh) { 1330 1330 ND_PRINTK(0, err,
+291 -161
net/ipv6/route.c
··· 441 441 if (!fl6->mp_hash) 442 442 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); 443 443 444 - if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound)) 444 + if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) 445 445 goto out; 446 446 447 447 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, 448 448 fib6_siblings) { 449 - const struct fib6_nh *nh = &sibling->fib6_nh; 449 + const struct fib6_nh *nh = sibling->fib6_nh; 450 450 int nh_upper_bound; 451 451 452 452 nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); ··· 460 460 461 461 out: 462 462 res->f6i = match; 463 - res->nh = &match->fib6_nh; 463 + res->nh = match->fib6_nh; 464 464 } 465 465 466 466 /* ··· 496 496 struct fib6_nh *nh; 497 497 498 498 if (!oif && ipv6_addr_any(saddr)) { 499 - nh = &f6i->fib6_nh; 499 + nh = f6i->fib6_nh; 500 500 if (!(nh->fib_nh_flags & RTNH_F_DEAD)) 501 501 goto out; 502 502 } 503 503 504 504 for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) { 505 - nh = &spf6i->fib6_nh; 505 + nh = spf6i->fib6_nh; 506 506 if (__rt6_device_match(net, nh, saddr, oif, flags)) { 507 507 res->f6i = spf6i; 508 508 goto out; ··· 511 511 512 512 if (oif && flags & RT6_LOOKUP_F_IFACE) { 513 513 res->f6i = net->ipv6.fib6_null_entry; 514 - nh = &res->f6i->fib6_nh; 514 + nh = res->f6i->fib6_nh; 515 515 goto out; 516 516 } 517 517 518 - nh = &f6i->fib6_nh; 518 + nh = f6i->fib6_nh; 519 519 if (nh->fib_nh_flags & RTNH_F_DEAD) { 520 520 res->f6i = net->ipv6.fib6_null_entry; 521 - nh = &res->f6i->fib6_nh; 521 + nh = res->f6i->fib6_nh; 522 522 } 523 523 out: 524 524 res->nh = nh; ··· 714 714 if (fib6_check_expired(f6i)) 715 715 continue; 716 716 717 - nh = &f6i->fib6_nh; 717 + nh = f6i->fib6_nh; 718 718 if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) { 719 719 res->f6i = f6i; 720 720 res->nh = nh; ··· 796 796 out: 797 797 if (!res->f6i) { 798 798 res->f6i = net->ipv6.fib6_null_entry; 799 - res->nh = &res->f6i->fib6_nh; 799 + res->nh = res->f6i->fib6_nh; 800 800 res->fib6_flags = res->f6i->fib6_flags; 801 801 res->fib6_type = res->f6i->fib6_type; 802 802 } ··· 1270 1270 { 1271 1271 struct rt6_info *pcpu_rt, **p; 1272 1272 1273 - p = this_cpu_ptr(res->f6i->rt6i_pcpu); 1273 + p = this_cpu_ptr(res->nh->rt6i_pcpu); 1274 1274 pcpu_rt = *p; 1275 1275 1276 1276 if (pcpu_rt) ··· 1291 1291 } 1292 1292 1293 1293 dst_hold(&pcpu_rt->dst); 1294 - p = this_cpu_ptr(res->f6i->rt6i_pcpu); 1294 + p = this_cpu_ptr(res->nh->rt6i_pcpu); 1295 1295 prev = cmpxchg(p, NULL, pcpu_rt); 1296 1296 BUG_ON(prev); 1297 1297 ··· 1461 1461 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu); 1462 1462 } 1463 1463 1464 + #define FIB6_EXCEPTION_BUCKET_FLUSHED 0x1UL 1465 + 1466 + /* used when the flushed bit is not relevant, only access to the bucket 1467 + * (ie., all bucket users except rt6_insert_exception); 1468 + * 1469 + * called under rcu lock; sometimes called with rt6_exception_lock held 1470 + */ 1471 + static 1472 + struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh, 1473 + spinlock_t *lock) 1474 + { 1475 + struct rt6_exception_bucket *bucket; 1476 + 1477 + if (lock) 1478 + bucket = rcu_dereference_protected(nh->rt6i_exception_bucket, 1479 + lockdep_is_held(lock)); 1480 + else 1481 + bucket = rcu_dereference(nh->rt6i_exception_bucket); 1482 + 1483 + /* remove bucket flushed bit if set */ 1484 + if (bucket) { 1485 + unsigned long p = (unsigned long)bucket; 1486 + 1487 + p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED; 1488 + bucket = (struct rt6_exception_bucket *)p; 1489 + } 1490 + 1491 + return bucket; 1492 + } 1493 + 1494 + static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket) 1495 + { 1496 + unsigned long p = (unsigned long)bucket; 1497 + 1498 + return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED); 1499 + } 1500 + 1501 + /* called with rt6_exception_lock held */ 1502 + static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh, 1503 + spinlock_t *lock) 1504 + { 1505 + struct rt6_exception_bucket *bucket; 1506 + unsigned long p; 1507 + 1508 + bucket = rcu_dereference_protected(nh->rt6i_exception_bucket, 1509 + lockdep_is_held(lock)); 1510 + 1511 + p = (unsigned long)bucket; 1512 + p |= FIB6_EXCEPTION_BUCKET_FLUSHED; 1513 + bucket = (struct rt6_exception_bucket *)p; 1514 + rcu_assign_pointer(nh->rt6i_exception_bucket, bucket); 1515 + } 1516 + 1464 1517 static int rt6_insert_exception(struct rt6_info *nrt, 1465 1518 const struct fib6_result *res) 1466 1519 { 1467 1520 struct net *net = dev_net(nrt->dst.dev); 1468 1521 struct rt6_exception_bucket *bucket; 1522 + struct fib6_info *f6i = res->f6i; 1469 1523 struct in6_addr *src_key = NULL; 1470 1524 struct rt6_exception *rt6_ex; 1471 - struct fib6_info *f6i = res->f6i; 1525 + struct fib6_nh *nh = res->nh; 1472 1526 int err = 0; 1473 1527 1474 1528 spin_lock_bh(&rt6_exception_lock); 1475 1529 1476 - if (f6i->exception_bucket_flushed) { 1477 - err = -EINVAL; 1478 - goto out; 1479 - } 1480 - 1481 - bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1482 - lockdep_is_held(&rt6_exception_lock)); 1530 + bucket = rcu_dereference_protected(nh->rt6i_exception_bucket, 1531 + lockdep_is_held(&rt6_exception_lock)); 1483 1532 if (!bucket) { 1484 1533 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket), 1485 1534 GFP_ATOMIC); ··· 1536 1487 err = -ENOMEM; 1537 1488 goto out; 1538 1489 } 1539 - rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket); 1490 + rcu_assign_pointer(nh->rt6i_exception_bucket, bucket); 1491 + } else if (fib6_nh_excptn_bucket_flushed(bucket)) { 1492 + err = -EINVAL; 1493 + goto out; 1540 1494 } 1541 1495 1542 1496 #ifdef CONFIG_IPV6_SUBTREES ··· 1594 1542 return err; 1595 1543 } 1596 1544 1597 - void rt6_flush_exceptions(struct fib6_info *rt) 1545 + static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from) 1598 1546 { 1599 1547 struct rt6_exception_bucket *bucket; 1600 1548 struct rt6_exception *rt6_ex; ··· 1602 1550 int i; 1603 1551 1604 1552 spin_lock_bh(&rt6_exception_lock); 1605 - /* Prevent rt6_insert_exception() to recreate the bucket list */ 1606 - rt->exception_bucket_flushed = 1; 1607 1553 1608 - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1609 - lockdep_is_held(&rt6_exception_lock)); 1554 + bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock); 1610 1555 if (!bucket) 1611 1556 goto out; 1612 1557 1558 + /* Prevent rt6_insert_exception() to recreate the bucket list */ 1559 + if (!from) 1560 + fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock); 1561 + 1613 1562 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1614 - hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) 1615 - rt6_remove_exception(bucket, rt6_ex); 1616 - WARN_ON_ONCE(bucket->depth); 1563 + hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) { 1564 + if (!from || 1565 + rcu_access_pointer(rt6_ex->rt6i->from) == from) 1566 + rt6_remove_exception(bucket, rt6_ex); 1567 + } 1568 + WARN_ON_ONCE(!from && bucket->depth); 1617 1569 bucket++; 1618 1570 } 1619 - 1620 1571 out: 1621 1572 spin_unlock_bh(&rt6_exception_lock); 1573 + } 1574 + 1575 + void rt6_flush_exceptions(struct fib6_info *f6i) 1576 + { 1577 + fib6_nh_flush_exceptions(f6i->fib6_nh, f6i); 1622 1578 } 1623 1579 1624 1580 /* Find cached rt in the hash table inside passed in rt ··· 1657 1597 src_key = saddr; 1658 1598 find_ex: 1659 1599 #endif 1660 - bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); 1600 + bucket = fib6_nh_get_excptn_bucket(res->nh, NULL); 1661 1601 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); 1662 1602 1663 1603 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) ··· 1675 1615 } 1676 1616 1677 1617 /* Remove the passed in cached rt from the hash table that contains it */ 1678 - static int rt6_remove_exception_rt(struct rt6_info *rt) 1618 + static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen, 1619 + const struct rt6_info *rt) 1679 1620 { 1621 + const struct in6_addr *src_key = NULL; 1680 1622 struct rt6_exception_bucket *bucket; 1681 - struct in6_addr *src_key = NULL; 1682 1623 struct rt6_exception *rt6_ex; 1683 - struct fib6_info *from; 1684 1624 int err; 1685 1625 1686 - from = rcu_dereference(rt->from); 1687 - if (!from || 1688 - !(rt->rt6i_flags & RTF_CACHE)) 1689 - return -EINVAL; 1690 - 1691 - if (!rcu_access_pointer(from->rt6i_exception_bucket)) 1626 + if (!rcu_access_pointer(nh->rt6i_exception_bucket)) 1692 1627 return -ENOENT; 1693 1628 1694 1629 spin_lock_bh(&rt6_exception_lock); 1695 - bucket = rcu_dereference_protected(from->rt6i_exception_bucket, 1696 - lockdep_is_held(&rt6_exception_lock)); 1630 + bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock); 1631 + 1697 1632 #ifdef CONFIG_IPV6_SUBTREES 1698 1633 /* rt6i_src.plen != 0 indicates 'from' is in subtree 1699 1634 * and exception table is indexed by a hash of ··· 1696 1641 * Otherwise, the exception table is indexed by 1697 1642 * a hash of only rt6i_dst. 1698 1643 */ 1699 - if (from->fib6_src.plen) 1644 + if (plen) 1700 1645 src_key = &rt->rt6i_src.addr; 1701 1646 #endif 1702 1647 rt6_ex = __rt6_find_exception_spinlock(&bucket, ··· 1713 1658 return err; 1714 1659 } 1715 1660 1661 + static int rt6_remove_exception_rt(struct rt6_info *rt) 1662 + { 1663 + struct fib6_info *from; 1664 + 1665 + from = rcu_dereference(rt->from); 1666 + if (!from || !(rt->rt6i_flags & RTF_CACHE)) 1667 + return -EINVAL; 1668 + 1669 + return fib6_nh_remove_exception(from->fib6_nh, 1670 + from->fib6_src.plen, rt); 1671 + } 1672 + 1716 1673 /* Find rt6_ex which contains the passed in rt cache and 1717 1674 * refresh its stamp 1718 1675 */ 1719 - static void rt6_update_exception_stamp_rt(struct rt6_info *rt) 1676 + static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen, 1677 + const struct rt6_info *rt) 1720 1678 { 1679 + const struct in6_addr *src_key = NULL; 1721 1680 struct rt6_exception_bucket *bucket; 1722 - struct in6_addr *src_key = NULL; 1723 1681 struct rt6_exception *rt6_ex; 1724 - struct fib6_info *from; 1725 1682 1726 - rcu_read_lock(); 1727 - from = rcu_dereference(rt->from); 1728 - if (!from || !(rt->rt6i_flags & RTF_CACHE)) 1729 - goto unlock; 1730 - 1731 - bucket = rcu_dereference(from->rt6i_exception_bucket); 1732 - 1683 + bucket = fib6_nh_get_excptn_bucket(nh, NULL); 1733 1684 #ifdef CONFIG_IPV6_SUBTREES 1734 1685 /* rt6i_src.plen != 0 indicates 'from' is in subtree 1735 1686 * and exception table is indexed by a hash of ··· 1743 1682 * Otherwise, the exception table is indexed by 1744 1683 * a hash of only rt6i_dst. 1745 1684 */ 1746 - if (from->fib6_src.plen) 1685 + if (plen) 1747 1686 src_key = &rt->rt6i_src.addr; 1748 1687 #endif 1749 - rt6_ex = __rt6_find_exception_rcu(&bucket, 1750 - &rt->rt6i_dst.addr, 1751 - src_key); 1688 + rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key); 1752 1689 if (rt6_ex) 1753 1690 rt6_ex->stamp = jiffies; 1691 + } 1754 1692 1693 + static void rt6_update_exception_stamp_rt(struct rt6_info *rt) 1694 + { 1695 + struct fib6_info *from; 1696 + 1697 + rcu_read_lock(); 1698 + 1699 + from = rcu_dereference(rt->from); 1700 + if (!from || !(rt->rt6i_flags & RTF_CACHE)) 1701 + goto unlock; 1702 + 1703 + fib6_nh_update_exception(from->fib6_nh, from->fib6_src.plen, rt); 1755 1704 unlock: 1756 1705 rcu_read_unlock(); 1757 1706 } ··· 1789 1718 } 1790 1719 1791 1720 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, 1792 - struct fib6_info *rt, int mtu) 1721 + const struct fib6_nh *nh, int mtu) 1793 1722 { 1794 1723 struct rt6_exception_bucket *bucket; 1795 1724 struct rt6_exception *rt6_ex; 1796 1725 int i; 1797 1726 1798 - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1799 - lockdep_is_held(&rt6_exception_lock)); 1800 - 1727 + bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock); 1801 1728 if (!bucket) 1802 1729 return; 1803 1730 ··· 1817 1748 1818 1749 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) 1819 1750 1820 - static void rt6_exceptions_clean_tohost(struct fib6_info *rt, 1821 - struct in6_addr *gateway) 1751 + static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh, 1752 + const struct in6_addr *gateway) 1822 1753 { 1823 1754 struct rt6_exception_bucket *bucket; 1824 1755 struct rt6_exception *rt6_ex; 1825 1756 struct hlist_node *tmp; 1826 1757 int i; 1827 1758 1828 - if (!rcu_access_pointer(rt->rt6i_exception_bucket)) 1759 + if (!rcu_access_pointer(nh->rt6i_exception_bucket)) 1829 1760 return; 1830 1761 1831 1762 spin_lock_bh(&rt6_exception_lock); 1832 - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1833 - lockdep_is_held(&rt6_exception_lock)); 1834 - 1763 + bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock); 1835 1764 if (bucket) { 1836 1765 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1837 1766 hlist_for_each_entry_safe(rt6_ex, tmp, ··· 1894 1827 gc_args->more++; 1895 1828 } 1896 1829 1897 - void rt6_age_exceptions(struct fib6_info *rt, 1898 - struct fib6_gc_args *gc_args, 1899 - unsigned long now) 1830 + static void fib6_nh_age_exceptions(const struct fib6_nh *nh, 1831 + struct fib6_gc_args *gc_args, 1832 + unsigned long now) 1900 1833 { 1901 1834 struct rt6_exception_bucket *bucket; 1902 1835 struct rt6_exception *rt6_ex; 1903 1836 struct hlist_node *tmp; 1904 1837 int i; 1905 1838 1906 - if (!rcu_access_pointer(rt->rt6i_exception_bucket)) 1839 + if (!rcu_access_pointer(nh->rt6i_exception_bucket)) 1907 1840 return; 1908 1841 1909 1842 rcu_read_lock_bh(); 1910 1843 spin_lock(&rt6_exception_lock); 1911 - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1912 - lockdep_is_held(&rt6_exception_lock)); 1913 - 1844 + bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock); 1914 1845 if (bucket) { 1915 1846 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1916 1847 hlist_for_each_entry_safe(rt6_ex, tmp, ··· 1921 1856 } 1922 1857 spin_unlock(&rt6_exception_lock); 1923 1858 rcu_read_unlock_bh(); 1859 + } 1860 + 1861 + void rt6_age_exceptions(struct fib6_info *f6i, 1862 + struct fib6_gc_args *gc_args, 1863 + unsigned long now) 1864 + { 1865 + fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now); 1924 1866 } 1925 1867 1926 1868 /* must be called with rcu lock held */ ··· 2456 2384 rcu_read_unlock(); 2457 2385 return; 2458 2386 } 2459 - res.nh = &res.f6i->fib6_nh; 2387 + res.nh = res.f6i->fib6_nh; 2460 2388 res.fib6_flags = res.f6i->fib6_flags; 2461 2389 res.fib6_type = res.f6i->fib6_type; 2462 2390 ··· 2599 2527 restart: 2600 2528 for_each_fib6_node_rt_rcu(fn) { 2601 2529 res.f6i = rt; 2602 - res.nh = &rt->fib6_nh; 2530 + res.nh = rt->fib6_nh; 2603 2531 2604 2532 if (fib6_check_expired(rt)) 2605 2533 continue; ··· 2623 2551 } 2624 2552 2625 2553 res.f6i = rt; 2626 - res.nh = &rt->fib6_nh; 2554 + res.nh = rt->fib6_nh; 2627 2555 out: 2628 2556 if (ret) { 2629 2557 ip6_hold_safe(net, &ret); ··· 3140 3068 !netif_carrier_ok(dev)) 3141 3069 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; 3142 3070 3071 + fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); 3072 + if (!fib6_nh->rt6i_pcpu) { 3073 + err = -ENOMEM; 3074 + goto out; 3075 + } 3076 + 3143 3077 err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap, 3144 3078 cfg->fc_encap_type, cfg, gfp_flags, extack); 3145 3079 if (err) ··· 3170 3092 3171 3093 void fib6_nh_release(struct fib6_nh *fib6_nh) 3172 3094 { 3095 + struct rt6_exception_bucket *bucket; 3096 + 3097 + rcu_read_lock(); 3098 + 3099 + fib6_nh_flush_exceptions(fib6_nh, NULL); 3100 + bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL); 3101 + if (bucket) { 3102 + rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL); 3103 + kfree(bucket); 3104 + } 3105 + 3106 + rcu_read_unlock(); 3107 + 3108 + if (fib6_nh->rt6i_pcpu) { 3109 + int cpu; 3110 + 3111 + for_each_possible_cpu(cpu) { 3112 + struct rt6_info **ppcpu_rt; 3113 + struct rt6_info *pcpu_rt; 3114 + 3115 + ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); 3116 + pcpu_rt = *ppcpu_rt; 3117 + if (pcpu_rt) { 3118 + dst_dev_put(&pcpu_rt->dst); 3119 + dst_release(&pcpu_rt->dst); 3120 + *ppcpu_rt = NULL; 3121 + } 3122 + } 3123 + 3124 + free_percpu(fib6_nh->rt6i_pcpu); 3125 + } 3126 + 3173 3127 fib_nh_common_release(&fib6_nh->nh_common); 3174 3128 } 3175 3129 ··· 3264 3154 goto out; 3265 3155 3266 3156 err = -ENOMEM; 3267 - rt = fib6_info_alloc(gfp_flags); 3157 + rt = fib6_info_alloc(gfp_flags, true); 3268 3158 if (!rt) 3269 3159 goto out; 3270 3160 ··· 3304 3194 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); 3305 3195 rt->fib6_src.plen = cfg->fc_src_len; 3306 3196 #endif 3307 - err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack); 3197 + err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack); 3308 3198 if (err) 3309 3199 goto out; 3310 3200 ··· 3312 3202 * they would result in kernel looping; promote them to reject routes 3313 3203 */ 3314 3204 addr_type = ipv6_addr_type(&cfg->fc_dst); 3315 - if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type)) 3205 + if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, addr_type)) 3316 3206 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP; 3317 3207 3318 3208 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { ··· 3430 3320 return err; 3431 3321 } 3432 3322 3433 - static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) 3323 + static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) 3434 3324 { 3435 3325 int rc = -ESRCH; 3436 3326 ··· 3446 3336 return rc; 3447 3337 } 3448 3338 3339 + static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt, 3340 + struct fib6_nh *nh) 3341 + { 3342 + struct fib6_result res = { 3343 + .f6i = rt, 3344 + .nh = nh, 3345 + }; 3346 + struct rt6_info *rt_cache; 3347 + 3348 + rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src); 3349 + if (rt_cache) 3350 + return __ip6_del_cached_rt(rt_cache, cfg); 3351 + 3352 + return 0; 3353 + } 3354 + 3449 3355 static int ip6_route_del(struct fib6_config *cfg, 3450 3356 struct netlink_ext_ack *extack) 3451 3357 { 3452 - struct rt6_info *rt_cache; 3453 3358 struct fib6_table *table; 3454 3359 struct fib6_info *rt; 3455 3360 struct fib6_node *fn; ··· 3487 3362 for_each_fib6_node_rt_rcu(fn) { 3488 3363 struct fib6_nh *nh; 3489 3364 3365 + nh = rt->fib6_nh; 3490 3366 if (cfg->fc_flags & RTF_CACHE) { 3491 - struct fib6_result res = { 3492 - .f6i = rt, 3493 - }; 3494 3367 int rc; 3495 3368 3496 - rt_cache = rt6_find_cached_rt(&res, 3497 - &cfg->fc_dst, 3498 - &cfg->fc_src); 3499 - if (rt_cache) { 3500 - rc = ip6_del_cached_rt(rt_cache, cfg); 3501 - if (rc != -ESRCH) { 3502 - rcu_read_unlock(); 3503 - return rc; 3504 - } 3369 + rc = ip6_del_cached_rt(cfg, rt, nh); 3370 + if (rc != -ESRCH) { 3371 + rcu_read_unlock(); 3372 + return rc; 3505 3373 } 3506 3374 continue; 3507 3375 } 3508 3376 3509 - nh = &rt->fib6_nh; 3510 3377 if (cfg->fc_ifindex && 3511 3378 (!nh->fib_nh_dev || 3512 3379 nh->fib_nh_dev->ifindex != cfg->fc_ifindex)) ··· 3620 3503 if (!res.f6i) 3621 3504 goto out; 3622 3505 3623 - res.nh = &res.f6i->fib6_nh; 3506 + res.nh = res.f6i->fib6_nh; 3624 3507 res.fib6_flags = res.f6i->fib6_flags; 3625 3508 res.fib6_type = res.f6i->fib6_type; 3626 3509 nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL); ··· 3672 3555 goto out; 3673 3556 3674 3557 for_each_fib6_node_rt_rcu(fn) { 3675 - if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex) 3558 + if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex) 3676 3559 continue; 3677 3560 if (!(rt->fib6_flags & RTF_ROUTEINFO) || 3678 - !rt->fib6_nh.fib_nh_gw_family) 3561 + !rt->fib6_nh->fib_nh_gw_family) 3679 3562 continue; 3680 - if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr)) 3563 + if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr)) 3681 3564 continue; 3682 3565 if (!fib6_info_hold_safe(rt)) 3683 3566 continue; ··· 3735 3618 3736 3619 rcu_read_lock(); 3737 3620 for_each_fib6_node_rt_rcu(&table->tb6_root) { 3738 - struct fib6_nh *nh = &rt->fib6_nh; 3621 + struct fib6_nh *nh = rt->fib6_nh; 3739 3622 3740 3623 if (dev == nh->fib_nh_dev && 3741 3624 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ··· 3987 3870 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 3988 3871 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 3989 3872 3990 - if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) && 3873 + if (((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) && 3991 3874 rt != net->ipv6.fib6_null_entry && 3992 3875 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { 3993 3876 spin_lock_bh(&rt6_exception_lock); ··· 4015 3898 static int fib6_clean_tohost(struct fib6_info *rt, void *arg) 4016 3899 { 4017 3900 struct in6_addr *gateway = (struct in6_addr *)arg; 3901 + struct fib6_nh *nh = rt->fib6_nh; 4018 3902 4019 3903 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && 4020 - rt->fib6_nh.fib_nh_gw_family && 4021 - ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) { 3904 + nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6)) 4022 3905 return -1; 4023 - } 4024 3906 4025 3907 /* Further clean up cached routes in exception table. 4026 3908 * This is needed because cached route may have a different 4027 3909 * gateway than its 'parent' in the case of an ip redirect. 4028 3910 */ 4029 - rt6_exceptions_clean_tohost(rt, gateway); 3911 + fib6_nh_exceptions_clean_tohost(nh, gateway); 4030 3912 4031 3913 return 0; 4032 3914 } ··· 4065 3949 4066 3950 static bool rt6_is_dead(const struct fib6_info *rt) 4067 3951 { 4068 - if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD || 4069 - (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN && 4070 - ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev))) 3952 + if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD || 3953 + (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN && 3954 + ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev))) 4071 3955 return true; 4072 3956 4073 3957 return false; ··· 4079 3963 int total = 0; 4080 3964 4081 3965 if (!rt6_is_dead(rt)) 4082 - total += rt->fib6_nh.fib_nh_weight; 3966 + total += rt->fib6_nh->fib_nh_weight; 4083 3967 4084 3968 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { 4085 3969 if (!rt6_is_dead(iter)) 4086 - total += iter->fib6_nh.fib_nh_weight; 3970 + total += iter->fib6_nh->fib_nh_weight; 4087 3971 } 4088 3972 4089 3973 return total; ··· 4094 3978 int upper_bound = -1; 4095 3979 4096 3980 if (!rt6_is_dead(rt)) { 4097 - *weight += rt->fib6_nh.fib_nh_weight; 3981 + *weight += rt->fib6_nh->fib_nh_weight; 4098 3982 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, 4099 3983 total) - 1; 4100 3984 } 4101 - atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound); 3985 + atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound); 4102 3986 } 4103 3987 4104 3988 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) ··· 4142 4026 struct net *net = dev_net(arg->dev); 4143 4027 4144 4028 if (rt != net->ipv6.fib6_null_entry && 4145 - rt->fib6_nh.fib_nh_dev == arg->dev) { 4146 - rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags; 4029 + rt->fib6_nh->fib_nh_dev == arg->dev) { 4030 + rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags; 4147 4031 fib6_update_sernum_upto_root(net, rt); 4148 4032 rt6_multipath_rebalance(rt); 4149 4033 } ··· 4171 4055 { 4172 4056 struct fib6_info *iter; 4173 4057 4174 - if (rt->fib6_nh.fib_nh_dev == dev) 4058 + if (rt->fib6_nh->fib_nh_dev == dev) 4175 4059 return true; 4176 4060 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 4177 - if (iter->fib6_nh.fib_nh_dev == dev) 4061 + if (iter->fib6_nh->fib_nh_dev == dev) 4178 4062 return true; 4179 4063 4180 4064 return false; ··· 4195 4079 struct fib6_info *iter; 4196 4080 unsigned int dead = 0; 4197 4081 4198 - if (rt->fib6_nh.fib_nh_dev == down_dev || 4199 - rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) 4082 + if (rt->fib6_nh->fib_nh_dev == down_dev || 4083 + rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD) 4200 4084 dead++; 4201 4085 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 4202 - if (iter->fib6_nh.fib_nh_dev == down_dev || 4203 - iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD) 4086 + if (iter->fib6_nh->fib_nh_dev == down_dev || 4087 + iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD) 4204 4088 dead++; 4205 4089 4206 4090 return dead; ··· 4212 4096 { 4213 4097 struct fib6_info *iter; 4214 4098 4215 - if (rt->fib6_nh.fib_nh_dev == dev) 4216 - rt->fib6_nh.fib_nh_flags |= nh_flags; 4099 + if (rt->fib6_nh->fib_nh_dev == dev) 4100 + rt->fib6_nh->fib_nh_flags |= nh_flags; 4217 4101 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 4218 - if (iter->fib6_nh.fib_nh_dev == dev) 4219 - iter->fib6_nh.fib_nh_flags |= nh_flags; 4102 + if (iter->fib6_nh->fib_nh_dev == dev) 4103 + iter->fib6_nh->fib_nh_flags |= nh_flags; 4220 4104 } 4221 4105 4222 4106 /* called with write lock held for table with rt */ ··· 4231 4115 4232 4116 switch (arg->event) { 4233 4117 case NETDEV_UNREGISTER: 4234 - return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; 4118 + return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0; 4235 4119 case NETDEV_DOWN: 4236 4120 if (rt->should_flush) 4237 4121 return -1; 4238 4122 if (!rt->fib6_nsiblings) 4239 - return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; 4123 + return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0; 4240 4124 if (rt6_multipath_uses_dev(rt, dev)) { 4241 4125 unsigned int count; 4242 4126 ··· 4252 4136 } 4253 4137 return -2; 4254 4138 case NETDEV_CHANGE: 4255 - if (rt->fib6_nh.fib_nh_dev != dev || 4139 + if (rt->fib6_nh->fib_nh_dev != dev || 4256 4140 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) 4257 4141 break; 4258 - rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN; 4142 + rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; 4259 4143 rt6_multipath_rebalance(rt); 4260 4144 break; 4261 4145 } ··· 4289 4173 struct rt6_mtu_change_arg { 4290 4174 struct net_device *dev; 4291 4175 unsigned int mtu; 4176 + struct fib6_info *f6i; 4292 4177 }; 4293 4178 4294 - static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg) 4179 + static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg) 4180 + { 4181 + struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg; 4182 + struct fib6_info *f6i = arg->f6i; 4183 + 4184 + /* For administrative MTU increase, there is no way to discover 4185 + * IPv6 PMTU increase, so PMTU increase should be updated here. 4186 + * Since RFC 1981 doesn't include administrative MTU increase 4187 + * update PMTU increase is a MUST. (i.e. jumbo frame) 4188 + */ 4189 + if (nh->fib_nh_dev == arg->dev) { 4190 + struct inet6_dev *idev = __in6_dev_get(arg->dev); 4191 + u32 mtu = f6i->fib6_pmtu; 4192 + 4193 + if (mtu >= arg->mtu || 4194 + (mtu < arg->mtu && mtu == idev->cnf.mtu6)) 4195 + fib6_metric_set(f6i, RTAX_MTU, arg->mtu); 4196 + 4197 + spin_lock_bh(&rt6_exception_lock); 4198 + rt6_exceptions_update_pmtu(idev, nh, arg->mtu); 4199 + spin_unlock_bh(&rt6_exception_lock); 4200 + } 4201 + 4202 + return 0; 4203 + } 4204 + 4205 + static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg) 4295 4206 { 4296 4207 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 4297 4208 struct inet6_dev *idev; ··· 4333 4190 if (!idev) 4334 4191 return 0; 4335 4192 4336 - /* For administrative MTU increase, there is no way to discover 4337 - IPv6 PMTU increase, so PMTU increase should be updated here. 4338 - Since RFC 1981 doesn't include administrative MTU increase 4339 - update PMTU increase is a MUST. (i.e. jumbo frame) 4340 - */ 4341 - if (rt->fib6_nh.fib_nh_dev == arg->dev && 4342 - !fib6_metric_locked(rt, RTAX_MTU)) { 4343 - u32 mtu = rt->fib6_pmtu; 4193 + if (fib6_metric_locked(f6i, RTAX_MTU)) 4194 + return 0; 4344 4195 4345 - if (mtu >= arg->mtu || 4346 - (mtu < arg->mtu && mtu == idev->cnf.mtu6)) 4347 - fib6_metric_set(rt, RTAX_MTU, arg->mtu); 4348 - 4349 - spin_lock_bh(&rt6_exception_lock); 4350 - rt6_exceptions_update_pmtu(idev, rt, arg->mtu); 4351 - spin_unlock_bh(&rt6_exception_lock); 4352 - } 4353 - return 0; 4196 + arg->f6i = f6i; 4197 + return fib6_nh_mtu_change(f6i->fib6_nh, arg); 4354 4198 } 4355 4199 4356 4200 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) ··· 4617 4487 goto cleanup; 4618 4488 } 4619 4489 4620 - rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1; 4490 + rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1; 4621 4491 4622 4492 err = ip6_route_info_append(info->nl_net, &rt6_nh_list, 4623 4493 rt, &r_cfg); ··· 4784 4654 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ 4785 4655 + NLA_ALIGN(sizeof(struct rtnexthop)) 4786 4656 + nla_total_size(16) /* RTA_GATEWAY */ 4787 - + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws); 4657 + + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws); 4788 4658 4789 4659 nexthop_len *= rt->fib6_nsiblings; 4790 4660 } ··· 4802 4672 + nla_total_size(sizeof(struct rta_cacheinfo)) 4803 4673 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 4804 4674 + nla_total_size(1) /* RTA_PREF */ 4805 - + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws) 4675 + + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws) 4806 4676 + nexthop_len; 4807 4677 } 4808 4678 ··· 4922 4792 if (!mp) 4923 4793 goto nla_put_failure; 4924 4794 4925 - if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common, 4926 - rt->fib6_nh.fib_nh_weight) < 0) 4795 + if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common, 4796 + rt->fib6_nh->fib_nh_weight) < 0) 4927 4797 goto nla_put_failure; 4928 4798 4929 4799 list_for_each_entry_safe(sibling, next_sibling, 4930 4800 &rt->fib6_siblings, fib6_siblings) { 4931 - if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common, 4932 - sibling->fib6_nh.fib_nh_weight) < 0) 4801 + if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, 4802 + sibling->fib6_nh->fib_nh_weight) < 0) 4933 4803 goto nla_put_failure; 4934 4804 } 4935 4805 ··· 4937 4807 } else { 4938 4808 unsigned char nh_flags = 0; 4939 4809 4940 - if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common, 4810 + if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, 4941 4811 &nh_flags, false) < 0) 4942 4812 goto nla_put_failure; 4943 4813 ··· 4967 4837 static bool fib6_info_uses_dev(const struct fib6_info *f6i, 4968 4838 const struct net_device *dev) 4969 4839 { 4970 - if (f6i->fib6_nh.fib_nh_dev == dev) 4840 + if (f6i->fib6_nh->fib_nh_dev == dev) 4971 4841 return true; 4972 4842 4973 4843 if (f6i->fib6_nsiblings) { ··· 4975 4845 4976 4846 list_for_each_entry_safe(sibling, next_sibling, 4977 4847 &f6i->fib6_siblings, fib6_siblings) { 4978 - if (sibling->fib6_nh.fib_nh_dev == dev) 4848 + if (sibling->fib6_nh->fib_nh_dev == dev) 4979 4849 return true; 4980 4850 } 4981 4851 } ··· 5296 5166 return NOTIFY_OK; 5297 5167 5298 5168 if (event == NETDEV_REGISTER) { 5299 - net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev; 5169 + net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev; 5300 5170 net->ipv6.ip6_null_entry->dst.dev = dev; 5301 5171 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 5302 5172 #ifdef CONFIG_IPV6_MULTIPLE_TABLES ··· 5490 5360 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 5491 5361 goto out_ip6_dst_ops; 5492 5362 5493 - net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template, 5494 - sizeof(*net->ipv6.fib6_null_entry), 5495 - GFP_KERNEL); 5363 + net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true); 5496 5364 if (!net->ipv6.fib6_null_entry) 5497 5365 goto out_ip6_dst_entries; 5366 + memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template, 5367 + sizeof(*net->ipv6.fib6_null_entry)); 5498 5368 5499 5369 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 5500 5370 sizeof(*net->ipv6.ip6_null_entry), ··· 5631 5501 /* Registering of the loopback is done before this portion of code, 5632 5502 * the loopback reference in rt6_info will not be taken, do it 5633 5503 * manually for init_net */ 5634 - init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev; 5504 + init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev; 5635 5505 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 5636 5506 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 5637 5507 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+455
tools/testing/selftests/net/icmp_redirect.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + # 4 + # redirect test 5 + # 6 + # .253 +----+ 7 + # +----| r1 | 8 + # | +----+ 9 + # +----+ | |.1 10 + # | h1 |--------------+ | 10.1.1.0/30 2001:db8:1::0/126 11 + # +----+ .1 | |.2 12 + # 172.16.1/24 | +----+ +----+ 13 + # 2001:db8:16:1/64 +----| r2 |-------------------| h2 | 14 + # .254 +----+ .254 .2 +----+ 15 + # 172.16.2/24 16 + # 2001:db8:16:2/64 17 + # 18 + # Route from h1 to h2 goes through r1, eth1 - connection between r1 and r2. 19 + # Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent 20 + # from r1 to h1 telling h1 to use r2 when talking to h2. 21 + 22 + VERBOSE=0 23 + PAUSE_ON_FAIL=no 24 + 25 + H1_N1_IP=172.16.1.1 26 + R1_N1_IP=172.16.1.253 27 + R2_N1_IP=172.16.1.254 28 + 29 + H1_N1_IP6=2001:db8:16:1::1 30 + R1_N1_IP6=2001:db8:16:1::253 31 + R2_N1_IP6=2001:db8:16:1::254 32 + 33 + R1_R2_N1_IP=10.1.1.1 34 + R2_R1_N1_IP=10.1.1.2 35 + 36 + R1_R2_N1_IP6=2001:db8:1::1 37 + R2_R1_N1_IP6=2001:db8:1::2 38 + 39 + H2_N2=172.16.2.0/24 40 + H2_N2_6=2001:db8:16:2::/64 41 + H2_N2_IP=172.16.2.2 42 + R2_N2_IP=172.16.2.254 43 + H2_N2_IP6=2001:db8:16:2::2 44 + R2_N2_IP6=2001:db8:16:2::254 45 + 46 + VRF=red 47 + VRF_TABLE=1111 48 + 49 + ################################################################################ 50 + # helpers 51 + 52 + log_section() 53 + { 54 + echo 55 + echo "###########################################################################" 56 + echo "$*" 57 + echo "###########################################################################" 58 + echo 59 + } 60 + 61 + log_test() 62 + { 63 + local rc=$1 64 + local expected=$2 65 + local msg="$3" 66 + 67 + if [ ${rc} -eq ${expected} ]; then 68 + printf "TEST: %-60s [ OK ]\n" "${msg}" 69 + nsuccess=$((nsuccess+1)) 70 + else 71 + ret=1 72 + nfail=$((nfail+1)) 73 + printf "TEST: %-60s [FAIL]\n" "${msg}" 74 + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then 75 + echo 76 + echo "hit enter to continue, 'q' to quit" 77 + read a 78 + [ "$a" = "q" ] && exit 1 79 + fi 80 + fi 81 + } 82 + 83 + run_cmd() 84 + { 85 + local cmd="$*" 86 + local out 87 + local rc 88 + 89 + if [ "$VERBOSE" = "1" ]; then 90 + echo "COMMAND: $cmd" 91 + fi 92 + 93 + out=$(eval $cmd 2>&1) 94 + rc=$? 95 + if [ "$VERBOSE" = "1" -a -n "$out" ]; then 96 + echo "$out" 97 + fi 98 + 99 + [ "$VERBOSE" = "1" ] && echo 100 + 101 + return $rc 102 + } 103 + 104 + get_linklocal() 105 + { 106 + local ns=$1 107 + local dev=$2 108 + local addr 109 + 110 + addr=$(ip -netns $ns -6 -br addr show dev ${dev} | \ 111 + awk '{ 112 + for (i = 3; i <= NF; ++i) { 113 + if ($i ~ /^fe80/) 114 + print $i 115 + } 116 + }' 117 + ) 118 + addr=${addr/\/*} 119 + 120 + [ -z "$addr" ] && return 1 121 + 122 + echo $addr 123 + 124 + return 0 125 + } 126 + 127 + ################################################################################ 128 + # setup and teardown 129 + 130 + cleanup() 131 + { 132 + local ns 133 + 134 + for ns in h1 h2 r1 r2; do 135 + ip netns del $ns 2>/dev/null 136 + done 137 + } 138 + 139 + create_vrf() 140 + { 141 + local ns=$1 142 + 143 + ip -netns ${ns} link add ${VRF} type vrf table ${VRF_TABLE} 144 + ip -netns ${ns} link set ${VRF} up 145 + ip -netns ${ns} route add vrf ${VRF} unreachable default metric 8192 146 + ip -netns ${ns} -6 route add vrf ${VRF} unreachable default metric 8192 147 + 148 + ip -netns ${ns} addr add 127.0.0.1/8 dev ${VRF} 149 + ip -netns ${ns} -6 addr add ::1 dev ${VRF} nodad 150 + 151 + ip -netns ${ns} ru del pref 0 152 + ip -netns ${ns} ru add pref 32765 from all lookup local 153 + ip -netns ${ns} -6 ru del pref 0 154 + ip -netns ${ns} -6 ru add pref 32765 from all lookup local 155 + } 156 + 157 + setup() 158 + { 159 + local ns 160 + 161 + # 162 + # create nodes as namespaces 163 + # 164 + for ns in h1 h2 r1 r2; do 165 + ip netns add $ns 166 + ip -netns $ns li set lo up 167 + 168 + case "${ns}" in 169 + h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1 170 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1 171 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 172 + ;; 173 + r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 174 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 175 + 176 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 177 + ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 178 + esac 179 + done 180 + 181 + # 182 + # create interconnects 183 + # 184 + ip -netns h1 li add eth0 type veth peer name r1h1 185 + ip -netns h1 li set r1h1 netns r1 name eth0 up 186 + 187 + ip -netns h1 li add eth1 type veth peer name r2h1 188 + ip -netns h1 li set r2h1 netns r2 name eth0 up 189 + 190 + ip -netns h2 li add eth0 type veth peer name r2h2 191 + ip -netns h2 li set eth0 up 192 + ip -netns h2 li set r2h2 netns r2 name eth2 up 193 + 194 + ip -netns r1 li add eth1 type veth peer name r2r1 195 + ip -netns r1 li set eth1 up 196 + ip -netns r1 li set r2r1 netns r2 name eth1 up 197 + 198 + # 199 + # h1 200 + # 201 + if [ "${WITH_VRF}" = "yes" ]; then 202 + create_vrf "h1" 203 + H1_VRF_ARG="vrf ${VRF}" 204 + H1_PING_ARG="-I ${VRF}" 205 + else 206 + H1_VRF_ARG= 207 + H1_PING_ARG= 208 + fi 209 + ip -netns h1 li add br0 type bridge 210 + if [ "${WITH_VRF}" = "yes" ]; then 211 + ip -netns h1 li set br0 vrf ${VRF} up 212 + else 213 + ip -netns h1 li set br0 up 214 + fi 215 + ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 216 + ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad 217 + ip -netns h1 li set eth0 master br0 up 218 + ip -netns h1 li set eth1 master br0 up 219 + 220 + # 221 + # h2 222 + # 223 + ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 224 + ip -netns h2 ro add default via ${R2_N2_IP} dev eth0 225 + ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad 226 + ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0 227 + 228 + # 229 + # r1 230 + # 231 + ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 232 + ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad 233 + ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30 234 + ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad 235 + 236 + # 237 + # r2 238 + # 239 + ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 240 + ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad 241 + ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30 242 + ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad 243 + ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24 244 + ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad 245 + 246 + sleep 2 247 + 248 + R1_LLADDR=$(get_linklocal r1 eth0) 249 + if [ $? -ne 0 ]; then 250 + echo "Error: Failed to get link-local address of r1's eth0" 251 + exit 1 252 + fi 253 + 254 + R2_LLADDR=$(get_linklocal r2 eth0) 255 + if [ $? -ne 0 ]; then 256 + echo "Error: Failed to get link-local address of r2's eth0" 257 + exit 1 258 + fi 259 + } 260 + 261 + change_h2_mtu() 262 + { 263 + local mtu=$1 264 + 265 + run_cmd ip -netns h2 li set eth0 mtu ${mtu} 266 + run_cmd ip -netns r2 li set eth2 mtu ${mtu} 267 + } 268 + 269 + check_exception() 270 + { 271 + local mtu="$1" 272 + local with_redirect="$2" 273 + local desc="$3" 274 + 275 + # From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102) 276 + if [ "$VERBOSE" = "1" ]; then 277 + echo "Commands to check for exception:" 278 + run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} 279 + run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} 280 + fi 281 + 282 + if [ -n "${mtu}" ]; then 283 + mtu=" mtu ${mtu}" 284 + fi 285 + if [ "$with_redirect" = "yes" ]; then 286 + ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ 287 + grep -q "cache <redirected> expires [0-9]*sec${mtu}" 288 + elif [ -n "${mtu}" ]; then 289 + ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ 290 + grep -q "cache expires [0-9]*sec${mtu}" 291 + else 292 + ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ 293 + grep -q "cache" 294 + fi 295 + log_test $? 0 "IPv4: ${desc}" 296 + 297 + if [ "$with_redirect" = "yes" ]; then 298 + ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}" 299 + else 300 + ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | grep -q "${mtu}" 301 + fi 302 + log_test $? 0 "IPv6: ${desc}" 303 + } 304 + 305 + run_ping() 306 + { 307 + local sz=$1 308 + 309 + run_cmd ip netns exec h1 ping -q -M want -i 0.2 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP} 310 + run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.2 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6} 311 + } 312 + 313 + replace_route_legacy() 314 + { 315 + # r1 to h2 via r2 and eth0 316 + run_cmd ip -netns r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0 317 + run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0 318 + } 319 + 320 + initial_route_legacy() 321 + { 322 + # r1 to h2 via r2 and eth1 323 + run_cmd ip -netns r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1 324 + run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1 325 + 326 + # h1 to h2 via r1 327 + # - IPv6 redirect only works if gateway is the LLA 328 + run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0 329 + run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0 330 + } 331 + 332 + check_connectivity() 333 + { 334 + local rc 335 + 336 + run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP} 337 + rc=$? 338 + run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6} 339 + [ $? -ne 0 ] && rc=$? 340 + 341 + return $rc 342 + } 343 + 344 + do_test() 345 + { 346 + local ttype="$1" 347 + 348 + eval initial_route_${ttype} 349 + 350 + # verify connectivity 351 + check_connectivity 352 + if [ $? -ne 0 ]; then 353 + echo "Error: Basic connectivity is broken" 354 + ret=1 355 + return 356 + fi 357 + 358 + # redirect exception followed by mtu 359 + eval replace_route_${ttype} 360 + run_ping 64 361 + check_exception "" "yes" "redirect exception" 362 + 363 + check_connectivity 364 + if [ $? -ne 0 ]; then 365 + echo "Error: Basic connectivity is broken after redirect" 366 + ret=1 367 + return 368 + fi 369 + 370 + change_h2_mtu 1300 371 + run_ping 1350 372 + check_exception "1300" "yes" "redirect exception plus mtu" 373 + 374 + # remove exceptions and restore routing 375 + change_h2_mtu 1500 376 + ip -netns h1 li set br0 down 377 + ip -netns h1 li set br0 up 378 + eval initial_route_${ttype} 379 + 380 + check_connectivity 381 + if [ $? -ne 0 ]; then 382 + echo "Error: Basic connectivity is broken after reset" 383 + ret=1 384 + return 385 + fi 386 + check_exception "" "no" "routing reset" 387 + 388 + # MTU exception followed by redirect 389 + change_h2_mtu 1300 390 + run_ping 1350 391 + check_exception "1300" "no" "mtu exception" 392 + 393 + eval replace_route_${ttype} 394 + run_ping 64 395 + check_exception "1300" "yes" "mtu exception plus redirect" 396 + 397 + check_connectivity 398 + if [ $? -ne 0 ]; then 399 + echo "Error: Basic connectivity is broken after redirect" 400 + ret=1 401 + return 402 + fi 403 + } 404 + 405 + ################################################################################ 406 + # usage 407 + 408 + usage() 409 + { 410 + cat <<EOF 411 + usage: ${0##*/} OPTS 412 + 413 + -p Pause on fail 414 + -v verbose mode (show commands and output) 415 + EOF 416 + } 417 + 418 + ################################################################################ 419 + # main 420 + 421 + # Some systems don't have a ping6 binary anymore 422 + which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) 423 + 424 + ret=0 425 + nsuccess=0 426 + nfail=0 427 + 428 + while getopts :pv o 429 + do 430 + case $o in 431 + p) PAUSE_ON_FAIL=yes;; 432 + v) VERBOSE=$(($VERBOSE + 1));; 433 + *) usage; exit 1;; 434 + esac 435 + done 436 + 437 + trap cleanup EXIT 438 + 439 + cleanup 440 + WITH_VRF=no 441 + setup 442 + 443 + log_section "Legacy routing" 444 + do_test "legacy" 445 + 446 + cleanup 447 + log_section "Legacy routing with VRF" 448 + WITH_VRF=yes 449 + setup 450 + do_test "legacy" 451 + 452 + printf "\nTests passed: %3d\n" ${nsuccess} 453 + printf "Tests failed: %3d\n" ${nfail} 454 + 455 + exit $ret