at master 6979 lines 175 kB view raw
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Linux INET6 implementation 4 * FIB front-end. 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 */ 9 10/* Changes: 11 * 12 * YOSHIFUJI Hideaki @USAGI 13 * reworked default router selection. 14 * - respect outgoing interface 15 * - select from (probably) reachable routers (i.e. 16 * routers in REACHABLE, STALE, DELAY or PROBE states). 17 * - always select the same router if it is (probably) 18 * reachable. otherwise, round-robin the list. 19 * Ville Nuorvala 20 * Fixed routing subtrees. 21 */ 22 23#define pr_fmt(fmt) "IPv6: " fmt 24 25#include <linux/capability.h> 26#include <linux/errno.h> 27#include <linux/export.h> 28#include <linux/types.h> 29#include <linux/times.h> 30#include <linux/socket.h> 31#include <linux/sockios.h> 32#include <linux/net.h> 33#include <linux/route.h> 34#include <linux/netdevice.h> 35#include <linux/in6.h> 36#include <linux/mroute6.h> 37#include <linux/init.h> 38#include <linux/if_arp.h> 39#include <linux/proc_fs.h> 40#include <linux/seq_file.h> 41#include <linux/nsproxy.h> 42#include <linux/slab.h> 43#include <linux/jhash.h> 44#include <linux/siphash.h> 45#include <net/net_namespace.h> 46#include <net/snmp.h> 47#include <net/ipv6.h> 48#include <net/ip6_fib.h> 49#include <net/ip6_route.h> 50#include <net/ndisc.h> 51#include <net/addrconf.h> 52#include <net/tcp.h> 53#include <linux/rtnetlink.h> 54#include <net/dst.h> 55#include <net/dst_metadata.h> 56#include <net/xfrm.h> 57#include <net/netevent.h> 58#include <net/netlink.h> 59#include <net/rtnh.h> 60#include <net/lwtunnel.h> 61#include <net/ip_tunnels.h> 62#include <net/l3mdev.h> 63#include <net/ip.h> 64#include <linux/uaccess.h> 65#include <linux/btf_ids.h> 66 67#ifdef CONFIG_SYSCTL 68#include <linux/sysctl.h> 69#endif 70 71static int ip6_rt_type_to_error(u8 fib6_type); 72 73#define CREATE_TRACE_POINTS 74#include <trace/events/fib6.h> 75EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); 76#undef CREATE_TRACE_POINTS 77 78enum rt6_nud_state { 79 RT6_NUD_FAIL_HARD = -3, 80 RT6_NUD_FAIL_PROBE = -2, 81 RT6_NUD_FAIL_DO_RR = -1, 82 RT6_NUD_SUCCEED = 1 83}; 84 85INDIRECT_CALLABLE_SCOPE 86struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 87static unsigned int ip6_default_advmss(const struct dst_entry *dst); 88INDIRECT_CALLABLE_SCOPE 89unsigned int ip6_mtu(const struct dst_entry *dst); 90static void ip6_negative_advice(struct sock *sk, 91 struct dst_entry *dst); 92static void ip6_dst_destroy(struct dst_entry *); 93static void ip6_dst_ifdown(struct dst_entry *, 94 struct net_device *dev); 95static void ip6_dst_gc(struct dst_ops *ops); 96 97static int ip6_pkt_discard(struct sk_buff *skb); 98static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); 99static int ip6_pkt_prohibit(struct sk_buff *skb); 100static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); 101static void ip6_link_failure(struct sk_buff *skb); 102static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 103 struct sk_buff *skb, u32 mtu, 104 bool confirm_neigh); 105static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 106 struct sk_buff *skb); 107static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, 108 int strict); 109static size_t rt6_nlmsg_size(struct fib6_info *f6i); 110static int rt6_fill_node(struct net *net, struct sk_buff *skb, 111 struct fib6_info *rt, struct dst_entry *dst, 112 struct in6_addr *dest, struct in6_addr *src, 113 int iif, int type, u32 portid, u32 seq, 114 unsigned int flags); 115static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, 116 const struct in6_addr *daddr, 117 const struct in6_addr *saddr); 118 119#ifdef CONFIG_IPV6_ROUTE_INFO 120static struct fib6_info *rt6_add_route_info(struct net *net, 121 const struct in6_addr *prefix, int prefixlen, 122 const struct in6_addr *gwaddr, 123 struct net_device *dev, 124 unsigned int pref); 125static struct fib6_info *rt6_get_route_info(struct net *net, 126 const struct in6_addr *prefix, int prefixlen, 127 const struct in6_addr *gwaddr, 128 struct net_device *dev); 129#endif 130 131struct uncached_list { 132 spinlock_t lock; 133 struct list_head head; 134}; 135 136static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); 137 138void rt6_uncached_list_add(struct rt6_info *rt) 139{ 140 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); 141 142 rt->dst.rt_uncached_list = ul; 143 144 spin_lock_bh(&ul->lock); 145 list_add_tail(&rt->dst.rt_uncached, &ul->head); 146 spin_unlock_bh(&ul->lock); 147} 148 149void rt6_uncached_list_del(struct rt6_info *rt) 150{ 151 struct uncached_list *ul = rt->dst.rt_uncached_list; 152 153 if (ul) { 154 spin_lock_bh(&ul->lock); 155 list_del_init(&rt->dst.rt_uncached); 156 spin_unlock_bh(&ul->lock); 157 } 158} 159 160static void rt6_uncached_list_flush_dev(struct net_device *dev) 161{ 162 int cpu; 163 164 for_each_possible_cpu(cpu) { 165 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 166 struct rt6_info *rt, *safe; 167 168 if (list_empty(&ul->head)) 169 continue; 170 171 spin_lock_bh(&ul->lock); 172 list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { 173 struct inet6_dev *rt_idev = rt->rt6i_idev; 174 struct net_device *rt_dev = rt->dst.dev; 175 bool handled = false; 176 177 if (rt_idev && rt_idev->dev == dev) { 178 rt->rt6i_idev = in6_dev_get(blackhole_netdev); 179 in6_dev_put(rt_idev); 180 handled = true; 181 } 182 183 if (rt_dev == dev) { 184 rt->dst.dev = blackhole_netdev; 185 netdev_ref_replace(rt_dev, blackhole_netdev, 186 &rt->dst.dev_tracker, 187 GFP_ATOMIC); 188 handled = true; 189 } 190 if (handled) 191 list_del_init(&rt->dst.rt_uncached); 192 } 193 spin_unlock_bh(&ul->lock); 194 } 195} 196 197static inline const void *choose_neigh_daddr(const struct in6_addr *p, 198 struct sk_buff *skb, 199 const void *daddr) 200{ 201 if (!ipv6_addr_any(p)) 202 return (const void *) p; 203 else if (skb) 204 return &ipv6_hdr(skb)->daddr; 205 return daddr; 206} 207 208struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, 209 struct net_device *dev, 210 struct sk_buff *skb, 211 const void *daddr) 212{ 213 struct neighbour *n; 214 215 daddr = choose_neigh_daddr(gw, skb, daddr); 216 n = __ipv6_neigh_lookup(dev, daddr); 217 if (n) 218 return n; 219 220 n = neigh_create(&nd_tbl, daddr, dev); 221 return IS_ERR(n) ? NULL : n; 222} 223 224static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, 225 struct sk_buff *skb, 226 const void *daddr) 227{ 228 const struct rt6_info *rt = dst_rt6_info(dst); 229 230 return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any), 231 dst_dev(dst), skb, daddr); 232} 233 234static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) 235{ 236 const struct rt6_info *rt = dst_rt6_info(dst); 237 struct net_device *dev = dst_dev(dst); 238 239 daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr); 240 if (!daddr) 241 return; 242 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) 243 return; 244 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr)) 245 return; 246 __ipv6_confirm_neigh(dev, daddr); 247} 248 249static struct dst_ops ip6_dst_ops_template = { 250 .family = AF_INET6, 251 .gc = ip6_dst_gc, 252 .gc_thresh = 1024, 253 .check = ip6_dst_check, 254 .default_advmss = ip6_default_advmss, 255 .mtu = ip6_mtu, 256 .cow_metrics = dst_cow_metrics_generic, 257 .destroy = ip6_dst_destroy, 258 .ifdown = ip6_dst_ifdown, 259 .negative_advice = ip6_negative_advice, 260 .link_failure = ip6_link_failure, 261 .update_pmtu = ip6_rt_update_pmtu, 262 .redirect = rt6_do_redirect, 263 .local_out = __ip6_local_out, 264 .neigh_lookup = ip6_dst_neigh_lookup, 265 .confirm_neigh = ip6_confirm_neigh, 266}; 267 268static struct dst_ops ip6_dst_blackhole_ops = { 269 .family = AF_INET6, 270 .default_advmss = ip6_default_advmss, 271 .neigh_lookup = ip6_dst_neigh_lookup, 272 .check = ip6_dst_check, 273 .destroy = ip6_dst_destroy, 274 .cow_metrics = dst_cow_metrics_generic, 275 .update_pmtu = dst_blackhole_update_pmtu, 276 .redirect = dst_blackhole_redirect, 277 .mtu = dst_blackhole_mtu, 278}; 279 280static const u32 ip6_template_metrics[RTAX_MAX] = { 281 [RTAX_HOPLIMIT - 1] = 0, 282}; 283 284static const struct fib6_info fib6_null_entry_template = { 285 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP), 286 .fib6_protocol = RTPROT_KERNEL, 287 .fib6_metric = ~(u32)0, 288 .fib6_ref = REFCOUNT_INIT(1), 289 .fib6_type = RTN_UNREACHABLE, 290 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics, 291}; 292 293static const struct rt6_info ip6_null_entry_template = { 294 .dst = { 295 .__rcuref = RCUREF_INIT(1), 296 .__use = 1, 297 .obsolete = DST_OBSOLETE_FORCE_CHK, 298 .error = -ENETUNREACH, 299 .input = ip6_pkt_discard, 300 .output = ip6_pkt_discard_out, 301 }, 302 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 303}; 304 305#ifdef CONFIG_IPV6_MULTIPLE_TABLES 306 307static const struct rt6_info ip6_prohibit_entry_template = { 308 .dst = { 309 .__rcuref = RCUREF_INIT(1), 310 .__use = 1, 311 .obsolete = DST_OBSOLETE_FORCE_CHK, 312 .error = -EACCES, 313 .input = ip6_pkt_prohibit, 314 .output = ip6_pkt_prohibit_out, 315 }, 316 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 317}; 318 319static const struct rt6_info ip6_blk_hole_entry_template = { 320 .dst = { 321 .__rcuref = RCUREF_INIT(1), 322 .__use = 1, 323 .obsolete = DST_OBSOLETE_FORCE_CHK, 324 .error = -EINVAL, 325 .input = dst_discard, 326 .output = dst_discard_out, 327 }, 328 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 329}; 330 331#endif 332 333static void rt6_info_init(struct rt6_info *rt) 334{ 335 memset_after(rt, 0, dst); 336} 337 338/* allocate dst with ip6_dst_ops */ 339struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, 340 int flags) 341{ 342 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 343 DST_OBSOLETE_FORCE_CHK, flags); 344 345 if (rt) { 346 rt6_info_init(rt); 347 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); 348 } 349 350 return rt; 351} 352EXPORT_SYMBOL(ip6_dst_alloc); 353 354static void ip6_dst_destroy(struct dst_entry *dst) 355{ 356 struct rt6_info *rt = dst_rt6_info(dst); 357 struct fib6_info *from; 358 struct inet6_dev *idev; 359 360 ip_dst_metrics_put(dst); 361 rt6_uncached_list_del(rt); 362 363 idev = rt->rt6i_idev; 364 if (idev) { 365 rt->rt6i_idev = NULL; 366 in6_dev_put(idev); 367 } 368 369 from = unrcu_pointer(xchg(&rt->from, NULL)); 370 fib6_info_release(from); 371} 372 373static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 374{ 375 struct rt6_info *rt = dst_rt6_info(dst); 376 struct inet6_dev *idev = rt->rt6i_idev; 377 struct fib6_info *from; 378 379 if (idev && idev->dev != blackhole_netdev) { 380 struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev); 381 382 if (blackhole_idev) { 383 rt->rt6i_idev = blackhole_idev; 384 in6_dev_put(idev); 385 } 386 } 387 from = unrcu_pointer(xchg(&rt->from, NULL)); 388 fib6_info_release(from); 389} 390 391static bool __rt6_check_expired(const struct rt6_info *rt) 392{ 393 if (rt->rt6i_flags & RTF_EXPIRES) 394 return time_after(jiffies, READ_ONCE(rt->dst.expires)); 395 return false; 396} 397 398static bool rt6_check_expired(const struct rt6_info *rt) 399{ 400 struct fib6_info *from; 401 402 from = rcu_dereference(rt->from); 403 404 if (rt->rt6i_flags & RTF_EXPIRES) { 405 if (time_after(jiffies, READ_ONCE(rt->dst.expires))) 406 return true; 407 } else if (from) { 408 return READ_ONCE(rt->dst.obsolete) != DST_OBSOLETE_FORCE_CHK || 409 fib6_check_expired(from); 410 } 411 return false; 412} 413 414static struct fib6_info * 415rt6_multipath_first_sibling_rcu(const struct fib6_info *rt) 416{ 417 struct fib6_info *iter; 418 struct fib6_node *fn; 419 420 fn = rcu_dereference(rt->fib6_node); 421 if (!fn) 422 goto out; 423 iter = rcu_dereference(fn->leaf); 424 if (!iter) 425 goto out; 426 427 while (iter) { 428 if (iter->fib6_metric == rt->fib6_metric && 429 rt6_qualify_for_ecmp(iter)) 430 return iter; 431 iter = rcu_dereference(iter->fib6_next); 432 } 433 434out: 435 return NULL; 436} 437 438void fib6_select_path(const struct net *net, struct fib6_result *res, 439 struct flowi6 *fl6, int oif, bool have_oif_match, 440 const struct sk_buff *skb, int strict) 441{ 442 struct fib6_info *first, *match = res->f6i; 443 struct fib6_info *sibling; 444 int hash; 445 446 if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) 447 goto out; 448 449 if (match->nh && have_oif_match && res->nh) 450 return; 451 452 if (skb) 453 IP6CB(skb)->flags |= IP6SKB_MULTIPATH; 454 455 /* We might have already computed the hash for ICMPv6 errors. In such 456 * case it will always be non-zero. Otherwise now is the time to do it. 457 */ 458 if (!fl6->mp_hash && 459 (!match->nh || nexthop_is_multipath(match->nh))) 460 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); 461 462 if (unlikely(match->nh)) { 463 nexthop_path_fib6_result(res, fl6->mp_hash); 464 return; 465 } 466 467 first = rt6_multipath_first_sibling_rcu(match); 468 if (!first) 469 goto out; 470 471 hash = fl6->mp_hash; 472 if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound)) { 473 if (rt6_score_route(first->fib6_nh, first->fib6_flags, oif, 474 strict) >= 0) 475 match = first; 476 goto out; 477 } 478 479 list_for_each_entry_rcu(sibling, &first->fib6_siblings, 480 fib6_siblings) { 481 const struct fib6_nh *nh = sibling->fib6_nh; 482 int nh_upper_bound; 483 484 nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); 485 if (hash > nh_upper_bound) 486 continue; 487 if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0) 488 break; 489 match = sibling; 490 break; 491 } 492 493out: 494 res->f6i = match; 495 res->nh = match->fib6_nh; 496} 497 498/* 499 * Route lookup. rcu_read_lock() should be held. 500 */ 501 502static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh, 503 const struct in6_addr *saddr, int oif, int flags) 504{ 505 const struct net_device *dev; 506 507 if (nh->fib_nh_flags & RTNH_F_DEAD) 508 return false; 509 510 dev = nh->fib_nh_dev; 511 if (oif) { 512 if (dev->ifindex == oif) 513 return true; 514 } else { 515 if (ipv6_chk_addr(net, saddr, dev, 516 flags & RT6_LOOKUP_F_IFACE)) 517 return true; 518 } 519 520 return false; 521} 522 523struct fib6_nh_dm_arg { 524 struct net *net; 525 const struct in6_addr *saddr; 526 int oif; 527 int flags; 528 struct fib6_nh *nh; 529}; 530 531static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg) 532{ 533 struct fib6_nh_dm_arg *arg = _arg; 534 535 arg->nh = nh; 536 return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif, 537 arg->flags); 538} 539 540/* returns fib6_nh from nexthop or NULL */ 541static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh, 542 struct fib6_result *res, 543 const struct in6_addr *saddr, 544 int oif, int flags) 545{ 546 struct fib6_nh_dm_arg arg = { 547 .net = net, 548 .saddr = saddr, 549 .oif = oif, 550 .flags = flags, 551 }; 552 553 if (nexthop_is_blackhole(nh)) 554 return NULL; 555 556 if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg)) 557 return arg.nh; 558 559 return NULL; 560} 561 562static void rt6_device_match(struct net *net, struct fib6_result *res, 563 const struct in6_addr *saddr, int oif, int flags) 564{ 565 struct fib6_info *f6i = res->f6i; 566 struct fib6_info *spf6i; 567 struct fib6_nh *nh; 568 569 if (!oif && ipv6_addr_any(saddr)) { 570 if (unlikely(f6i->nh)) { 571 nh = nexthop_fib6_nh(f6i->nh); 572 if (nexthop_is_blackhole(f6i->nh)) 573 goto out_blackhole; 574 } else { 575 nh = f6i->fib6_nh; 576 } 577 if (!(nh->fib_nh_flags & RTNH_F_DEAD)) 578 goto out; 579 } 580 581 for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) { 582 bool matched = false; 583 584 if (unlikely(spf6i->nh)) { 585 nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr, 586 oif, flags); 587 if (nh) 588 matched = true; 589 } else { 590 nh = spf6i->fib6_nh; 591 if (__rt6_device_match(net, nh, saddr, oif, flags)) 592 matched = true; 593 } 594 if (matched) { 595 res->f6i = spf6i; 596 goto out; 597 } 598 } 599 600 if (oif && flags & RT6_LOOKUP_F_IFACE) { 601 res->f6i = net->ipv6.fib6_null_entry; 602 nh = res->f6i->fib6_nh; 603 goto out; 604 } 605 606 if (unlikely(f6i->nh)) { 607 nh = nexthop_fib6_nh(f6i->nh); 608 if (nexthop_is_blackhole(f6i->nh)) 609 goto out_blackhole; 610 } else { 611 nh = f6i->fib6_nh; 612 } 613 614 if (nh->fib_nh_flags & RTNH_F_DEAD) { 615 res->f6i = net->ipv6.fib6_null_entry; 616 nh = res->f6i->fib6_nh; 617 } 618out: 619 res->nh = nh; 620 res->fib6_type = res->f6i->fib6_type; 621 res->fib6_flags = res->f6i->fib6_flags; 622 return; 623 624out_blackhole: 625 res->fib6_flags |= RTF_REJECT; 626 res->fib6_type = RTN_BLACKHOLE; 627 res->nh = nh; 628} 629 630#ifdef CONFIG_IPV6_ROUTER_PREF 631struct __rt6_probe_work { 632 struct work_struct work; 633 struct in6_addr target; 634 struct net_device *dev; 635 netdevice_tracker dev_tracker; 636}; 637 638static void rt6_probe_deferred(struct work_struct *w) 639{ 640 struct in6_addr mcaddr; 641 struct __rt6_probe_work *work = 642 container_of(w, struct __rt6_probe_work, work); 643 644 addrconf_addr_solict_mult(&work->target, &mcaddr); 645 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); 646 netdev_put(work->dev, &work->dev_tracker); 647 kfree(work); 648} 649 650static void rt6_probe(struct fib6_nh *fib6_nh) 651{ 652 struct __rt6_probe_work *work = NULL; 653 const struct in6_addr *nh_gw; 654 unsigned long last_probe; 655 struct neighbour *neigh; 656 struct net_device *dev; 657 struct inet6_dev *idev; 658 659 /* 660 * Okay, this does not seem to be appropriate 661 * for now, however, we need to check if it 662 * is really so; aka Router Reachability Probing. 663 * 664 * Router Reachability Probe MUST be rate-limited 665 * to no more than one per minute. 666 */ 667 if (!fib6_nh->fib_nh_gw_family) 668 return; 669 670 nh_gw = &fib6_nh->fib_nh_gw6; 671 dev = fib6_nh->fib_nh_dev; 672 rcu_read_lock(); 673 last_probe = READ_ONCE(fib6_nh->last_probe); 674 idev = __in6_dev_get(dev); 675 if (!idev) 676 goto out; 677 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); 678 if (neigh) { 679 if (READ_ONCE(neigh->nud_state) & NUD_VALID) 680 goto out; 681 682 write_lock_bh(&neigh->lock); 683 if (!(neigh->nud_state & NUD_VALID) && 684 time_after(jiffies, 685 neigh->updated + 686 READ_ONCE(idev->cnf.rtr_probe_interval))) { 687 work = kmalloc(sizeof(*work), GFP_ATOMIC); 688 if (work) 689 __neigh_set_probe_once(neigh); 690 } 691 write_unlock_bh(&neigh->lock); 692 } else if (time_after(jiffies, last_probe + 693 READ_ONCE(idev->cnf.rtr_probe_interval))) { 694 work = kmalloc(sizeof(*work), GFP_ATOMIC); 695 } 696 697 if (!work || cmpxchg(&fib6_nh->last_probe, 698 last_probe, jiffies) != last_probe) { 699 kfree(work); 700 } else { 701 INIT_WORK(&work->work, rt6_probe_deferred); 702 work->target = *nh_gw; 703 netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC); 704 work->dev = dev; 705 schedule_work(&work->work); 706 } 707 708out: 709 rcu_read_unlock(); 710} 711#else 712static inline void rt6_probe(struct fib6_nh *fib6_nh) 713{ 714} 715#endif 716 717/* 718 * Default Router Selection (RFC 2461 6.3.6) 719 */ 720static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh) 721{ 722 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 723 struct neighbour *neigh; 724 725 rcu_read_lock(); 726 neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev, 727 &fib6_nh->fib_nh_gw6); 728 if (neigh) { 729 u8 nud_state = READ_ONCE(neigh->nud_state); 730 731 if (nud_state & NUD_VALID) 732 ret = RT6_NUD_SUCCEED; 733#ifdef CONFIG_IPV6_ROUTER_PREF 734 else if (!(nud_state & NUD_FAILED)) 735 ret = RT6_NUD_SUCCEED; 736 else 737 ret = RT6_NUD_FAIL_PROBE; 738#endif 739 } else { 740 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? 741 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; 742 } 743 rcu_read_unlock(); 744 745 return ret; 746} 747 748static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, 749 int strict) 750{ 751 int m = 0; 752 753 if (!oif || nh->fib_nh_dev->ifindex == oif) 754 m = 2; 755 756 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 757 return RT6_NUD_FAIL_HARD; 758#ifdef CONFIG_IPV6_ROUTER_PREF 759 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2; 760#endif 761 if ((strict & RT6_LOOKUP_F_REACHABLE) && 762 !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) { 763 int n = rt6_check_neigh(nh); 764 if (n < 0) 765 return n; 766 } 767 return m; 768} 769 770static bool find_match(struct fib6_nh *nh, u32 fib6_flags, 771 int oif, int strict, int *mpri, bool *do_rr) 772{ 773 bool match_do_rr = false; 774 bool rc = false; 775 int m; 776 777 if (nh->fib_nh_flags & RTNH_F_DEAD) 778 goto out; 779 780 if (ip6_ignore_linkdown(nh->fib_nh_dev) && 781 nh->fib_nh_flags & RTNH_F_LINKDOWN && 782 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) 783 goto out; 784 785 m = rt6_score_route(nh, fib6_flags, oif, strict); 786 if (m == RT6_NUD_FAIL_DO_RR) { 787 match_do_rr = true; 788 m = 0; /* lowest valid score */ 789 } else if (m == RT6_NUD_FAIL_HARD) { 790 goto out; 791 } 792 793 if (strict & RT6_LOOKUP_F_REACHABLE) 794 rt6_probe(nh); 795 796 /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 797 if (m > *mpri) { 798 *do_rr = match_do_rr; 799 *mpri = m; 800 rc = true; 801 } 802out: 803 return rc; 804} 805 806struct fib6_nh_frl_arg { 807 u32 flags; 808 int oif; 809 int strict; 810 int *mpri; 811 bool *do_rr; 812 struct fib6_nh *nh; 813}; 814 815static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg) 816{ 817 struct fib6_nh_frl_arg *arg = _arg; 818 819 arg->nh = nh; 820 return find_match(nh, arg->flags, arg->oif, arg->strict, 821 arg->mpri, arg->do_rr); 822} 823 824static void __find_rr_leaf(struct fib6_info *f6i_start, 825 struct fib6_info *nomatch, u32 metric, 826 struct fib6_result *res, struct fib6_info **cont, 827 int oif, int strict, bool *do_rr, int *mpri) 828{ 829 struct fib6_info *f6i; 830 831 for (f6i = f6i_start; 832 f6i && f6i != nomatch; 833 f6i = rcu_dereference(f6i->fib6_next)) { 834 bool matched = false; 835 struct fib6_nh *nh; 836 837 if (cont && f6i->fib6_metric != metric) { 838 *cont = f6i; 839 return; 840 } 841 842 if (fib6_check_expired(f6i)) 843 continue; 844 845 if (unlikely(f6i->nh)) { 846 struct fib6_nh_frl_arg arg = { 847 .flags = f6i->fib6_flags, 848 .oif = oif, 849 .strict = strict, 850 .mpri = mpri, 851 .do_rr = do_rr 852 }; 853 854 if (nexthop_is_blackhole(f6i->nh)) { 855 res->fib6_flags = RTF_REJECT; 856 res->fib6_type = RTN_BLACKHOLE; 857 res->f6i = f6i; 858 res->nh = nexthop_fib6_nh(f6i->nh); 859 return; 860 } 861 if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match, 862 &arg)) { 863 matched = true; 864 nh = arg.nh; 865 } 866 } else { 867 nh = f6i->fib6_nh; 868 if (find_match(nh, f6i->fib6_flags, oif, strict, 869 mpri, do_rr)) 870 matched = true; 871 } 872 if (matched) { 873 res->f6i = f6i; 874 res->nh = nh; 875 res->fib6_flags = f6i->fib6_flags; 876 res->fib6_type = f6i->fib6_type; 877 } 878 } 879} 880 881static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf, 882 struct fib6_info *rr_head, int oif, int strict, 883 bool *do_rr, struct fib6_result *res) 884{ 885 u32 metric = rr_head->fib6_metric; 886 struct fib6_info *cont = NULL; 887 int mpri = -1; 888 889 __find_rr_leaf(rr_head, NULL, metric, res, &cont, 890 oif, strict, do_rr, &mpri); 891 892 __find_rr_leaf(leaf, rr_head, metric, res, &cont, 893 oif, strict, do_rr, &mpri); 894 895 if (res->f6i || !cont) 896 return; 897 898 __find_rr_leaf(cont, NULL, metric, res, NULL, 899 oif, strict, do_rr, &mpri); 900} 901 902static void rt6_select(struct net *net, struct fib6_node *fn, int oif, 903 struct fib6_result *res, int strict) 904{ 905 struct fib6_info *leaf = rcu_dereference(fn->leaf); 906 struct fib6_info *rt0; 907 bool do_rr = false; 908 int key_plen; 909 910 /* make sure this function or its helpers sets f6i */ 911 res->f6i = NULL; 912 913 if (!leaf || leaf == net->ipv6.fib6_null_entry) 914 goto out; 915 916 rt0 = rcu_dereference(fn->rr_ptr); 917 if (!rt0) 918 rt0 = leaf; 919 920 /* Double check to make sure fn is not an intermediate node 921 * and fn->leaf does not points to its child's leaf 922 * (This might happen if all routes under fn are deleted from 923 * the tree and fib6_repair_tree() is called on the node.) 924 */ 925 key_plen = rt0->fib6_dst.plen; 926#ifdef CONFIG_IPV6_SUBTREES 927 if (rt0->fib6_src.plen) 928 key_plen = rt0->fib6_src.plen; 929#endif 930 if (fn->fn_bit != key_plen) 931 goto out; 932 933 find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res); 934 if (do_rr) { 935 struct fib6_info *next = rcu_dereference(rt0->fib6_next); 936 937 /* no entries matched; do round-robin */ 938 if (!next || next->fib6_metric != rt0->fib6_metric) 939 next = leaf; 940 941 if (next != rt0) { 942 spin_lock_bh(&leaf->fib6_table->tb6_lock); 943 /* make sure next is not being deleted from the tree */ 944 if (next->fib6_node) 945 rcu_assign_pointer(fn->rr_ptr, next); 946 spin_unlock_bh(&leaf->fib6_table->tb6_lock); 947 } 948 } 949 950out: 951 if (!res->f6i) { 952 res->f6i = net->ipv6.fib6_null_entry; 953 res->nh = res->f6i->fib6_nh; 954 res->fib6_flags = res->f6i->fib6_flags; 955 res->fib6_type = res->f6i->fib6_type; 956 } 957} 958 959static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res) 960{ 961 return (res->f6i->fib6_flags & RTF_NONEXTHOP) || 962 res->nh->fib_nh_gw_family; 963} 964 965#ifdef CONFIG_IPV6_ROUTE_INFO 966int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 967 const struct in6_addr *gwaddr) 968{ 969 struct net *net = dev_net(dev); 970 struct route_info *rinfo = (struct route_info *) opt; 971 struct in6_addr prefix_buf, *prefix; 972 struct fib6_table *table; 973 unsigned int pref; 974 unsigned long lifetime; 975 struct fib6_info *rt; 976 977 if (len < sizeof(struct route_info)) { 978 return -EINVAL; 979 } 980 981 /* Sanity check for prefix_len and length */ 982 if (rinfo->length > 3) { 983 return -EINVAL; 984 } else if (rinfo->prefix_len > 128) { 985 return -EINVAL; 986 } else if (rinfo->prefix_len > 64) { 987 if (rinfo->length < 2) { 988 return -EINVAL; 989 } 990 } else if (rinfo->prefix_len > 0) { 991 if (rinfo->length < 1) { 992 return -EINVAL; 993 } 994 } 995 996 pref = rinfo->route_pref; 997 if (pref == ICMPV6_ROUTER_PREF_INVALID) 998 return -EINVAL; 999 1000 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 1001 1002 if (rinfo->length == 3) 1003 prefix = (struct in6_addr *)rinfo->prefix; 1004 else { 1005 /* this function is safe */ 1006 ipv6_addr_prefix(&prefix_buf, 1007 (struct in6_addr *)rinfo->prefix, 1008 rinfo->prefix_len); 1009 prefix = &prefix_buf; 1010 } 1011 1012 if (rinfo->prefix_len == 0) 1013 rt = rt6_get_dflt_router(net, gwaddr, dev); 1014 else 1015 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, 1016 gwaddr, dev); 1017 1018 if (rt && !lifetime) { 1019 ip6_del_rt(net, rt, false); 1020 rt = NULL; 1021 } 1022 1023 if (!rt && lifetime) 1024 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, 1025 dev, pref); 1026 else if (rt) 1027 rt->fib6_flags = RTF_ROUTEINFO | 1028 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 1029 1030 if (rt) { 1031 table = rt->fib6_table; 1032 spin_lock_bh(&table->tb6_lock); 1033 1034 if (!addrconf_finite_timeout(lifetime)) { 1035 fib6_clean_expires(rt); 1036 fib6_remove_gc_list(rt); 1037 } else { 1038 fib6_set_expires(rt, jiffies + HZ * lifetime); 1039 fib6_add_gc_list(rt); 1040 } 1041 1042 spin_unlock_bh(&table->tb6_lock); 1043 1044 fib6_info_release(rt); 1045 } 1046 return 0; 1047} 1048#endif 1049 1050/* 1051 * Misc support functions 1052 */ 1053 1054/* called with rcu_lock held */ 1055static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res) 1056{ 1057 struct net_device *dev = res->nh->fib_nh_dev; 1058 1059 if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { 1060 /* for copies of local routes, dst->dev needs to be the 1061 * device if it is a master device, the master device if 1062 * device is enslaved, and the loopback as the default 1063 */ 1064 if (netif_is_l3_slave(dev) && 1065 !rt6_need_strict(&res->f6i->fib6_dst.addr)) 1066 dev = l3mdev_master_dev_rcu(dev); 1067 else if (!netif_is_l3_master(dev)) 1068 dev = dev_net(dev)->loopback_dev; 1069 /* last case is netif_is_l3_master(dev) is true in which 1070 * case we want dev returned to be dev 1071 */ 1072 } 1073 1074 return dev; 1075} 1076 1077static const int fib6_prop[RTN_MAX + 1] = { 1078 [RTN_UNSPEC] = 0, 1079 [RTN_UNICAST] = 0, 1080 [RTN_LOCAL] = 0, 1081 [RTN_BROADCAST] = 0, 1082 [RTN_ANYCAST] = 0, 1083 [RTN_MULTICAST] = 0, 1084 [RTN_BLACKHOLE] = -EINVAL, 1085 [RTN_UNREACHABLE] = -EHOSTUNREACH, 1086 [RTN_PROHIBIT] = -EACCES, 1087 [RTN_THROW] = -EAGAIN, 1088 [RTN_NAT] = -EINVAL, 1089 [RTN_XRESOLVE] = -EINVAL, 1090}; 1091 1092static int ip6_rt_type_to_error(u8 fib6_type) 1093{ 1094 return fib6_prop[fib6_type]; 1095} 1096 1097static unsigned short fib6_info_dst_flags(struct fib6_info *rt) 1098{ 1099 unsigned short flags = 0; 1100 1101 if (rt->dst_nocount) 1102 flags |= DST_NOCOUNT; 1103 if (rt->dst_nopolicy) 1104 flags |= DST_NOPOLICY; 1105 1106 return flags; 1107} 1108 1109static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type) 1110{ 1111 rt->dst.error = ip6_rt_type_to_error(fib6_type); 1112 1113 switch (fib6_type) { 1114 case RTN_BLACKHOLE: 1115 rt->dst.output = dst_discard_out; 1116 rt->dst.input = dst_discard; 1117 break; 1118 case RTN_PROHIBIT: 1119 rt->dst.output = ip6_pkt_prohibit_out; 1120 rt->dst.input = ip6_pkt_prohibit; 1121 break; 1122 case RTN_THROW: 1123 case RTN_UNREACHABLE: 1124 default: 1125 rt->dst.output = ip6_pkt_discard_out; 1126 rt->dst.input = ip6_pkt_discard; 1127 break; 1128 } 1129} 1130 1131static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res) 1132{ 1133 struct fib6_info *f6i = res->f6i; 1134 1135 if (res->fib6_flags & RTF_REJECT) { 1136 ip6_rt_init_dst_reject(rt, res->fib6_type); 1137 return; 1138 } 1139 1140 rt->dst.error = 0; 1141 rt->dst.output = ip6_output; 1142 1143 if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) { 1144 rt->dst.input = ip6_input; 1145 } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { 1146 rt->dst.input = ip6_mc_input; 1147 rt->dst.output = ip6_mr_output; 1148 } else { 1149 rt->dst.input = ip6_forward; 1150 } 1151 1152 if (res->nh->fib_nh_lws) { 1153 rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws); 1154 lwtunnel_set_redirect(&rt->dst); 1155 } 1156 1157 rt->dst.lastuse = jiffies; 1158} 1159 1160/* Caller must already hold reference to @from */ 1161static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) 1162{ 1163 rt->rt6i_flags &= ~RTF_EXPIRES; 1164 rcu_assign_pointer(rt->from, from); 1165 ip_dst_init_metrics(&rt->dst, from->fib6_metrics); 1166} 1167 1168/* Caller must already hold reference to f6i in result */ 1169static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res) 1170{ 1171 const struct fib6_nh *nh = res->nh; 1172 const struct net_device *dev = nh->fib_nh_dev; 1173 struct fib6_info *f6i = res->f6i; 1174 1175 ip6_rt_init_dst(rt, res); 1176 1177 rt->rt6i_dst = f6i->fib6_dst; 1178 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; 1179 rt->rt6i_flags = res->fib6_flags; 1180 if (nh->fib_nh_gw_family) { 1181 rt->rt6i_gateway = nh->fib_nh_gw6; 1182 rt->rt6i_flags |= RTF_GATEWAY; 1183 } 1184 rt6_set_from(rt, f6i); 1185#ifdef CONFIG_IPV6_SUBTREES 1186 rt->rt6i_src = f6i->fib6_src; 1187#endif 1188} 1189 1190static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 1191 struct in6_addr *saddr) 1192{ 1193 struct fib6_node *pn, *sn; 1194 while (1) { 1195 if (fn->fn_flags & RTN_TL_ROOT) 1196 return NULL; 1197 pn = rcu_dereference(fn->parent); 1198 sn = FIB6_SUBTREE(pn); 1199 if (sn && sn != fn) 1200 fn = fib6_node_lookup(sn, NULL, saddr); 1201 else 1202 fn = pn; 1203 if (fn->fn_flags & RTN_RTINFO) 1204 return fn; 1205 } 1206} 1207 1208static bool ip6_hold_safe(struct net *net, struct rt6_info **prt) 1209{ 1210 struct rt6_info *rt = *prt; 1211 1212 if (dst_hold_safe(&rt->dst)) 1213 return true; 1214 if (net) { 1215 rt = net->ipv6.ip6_null_entry; 1216 dst_hold(&rt->dst); 1217 } else { 1218 rt = NULL; 1219 } 1220 *prt = rt; 1221 return false; 1222} 1223 1224/* called with rcu_lock held */ 1225static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res) 1226{ 1227 struct net_device *dev = res->nh->fib_nh_dev; 1228 struct fib6_info *f6i = res->f6i; 1229 unsigned short flags; 1230 struct rt6_info *nrt; 1231 1232 if (!fib6_info_hold_safe(f6i)) 1233 goto fallback; 1234 1235 flags = fib6_info_dst_flags(f6i); 1236 nrt = ip6_dst_alloc(dev_net(dev), dev, flags); 1237 if (!nrt) { 1238 fib6_info_release(f6i); 1239 goto fallback; 1240 } 1241 1242 ip6_rt_copy_init(nrt, res); 1243 return nrt; 1244 1245fallback: 1246 nrt = dev_net(dev)->ipv6.ip6_null_entry; 1247 dst_hold(&nrt->dst); 1248 return nrt; 1249} 1250 1251INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net, 1252 struct fib6_table *table, 1253 struct flowi6 *fl6, 1254 const struct sk_buff *skb, 1255 int flags) 1256{ 1257 struct fib6_result res = {}; 1258 struct fib6_node *fn; 1259 struct rt6_info *rt; 1260 1261 rcu_read_lock(); 1262 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1263restart: 1264 res.f6i = rcu_dereference(fn->leaf); 1265 if (!res.f6i) 1266 res.f6i = net->ipv6.fib6_null_entry; 1267 else 1268 rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif, 1269 flags); 1270 1271 if (res.f6i == net->ipv6.fib6_null_entry) { 1272 fn = fib6_backtrack(fn, &fl6->saddr); 1273 if (fn) 1274 goto restart; 1275 1276 rt = net->ipv6.ip6_null_entry; 1277 dst_hold(&rt->dst); 1278 goto out; 1279 } else if (res.fib6_flags & RTF_REJECT) { 1280 goto do_create; 1281 } 1282 1283 fib6_select_path(net, &res, fl6, fl6->flowi6_oif, 1284 fl6->flowi6_oif != 0, skb, flags); 1285 1286 /* Search through exception table */ 1287 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr); 1288 if (rt) { 1289 if (ip6_hold_safe(net, &rt)) 1290 dst_use_noref(&rt->dst, jiffies); 1291 } else { 1292do_create: 1293 rt = ip6_create_rt_rcu(&res); 1294 } 1295 1296out: 1297 trace_fib6_table_lookup(net, &res, table, fl6); 1298 1299 rcu_read_unlock(); 1300 1301 return rt; 1302} 1303 1304struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, 1305 const struct sk_buff *skb, int flags) 1306{ 1307 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup); 1308} 1309EXPORT_SYMBOL_GPL(ip6_route_lookup); 1310 1311struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 1312 const struct in6_addr *saddr, int oif, 1313 const struct sk_buff *skb, int strict) 1314{ 1315 struct flowi6 fl6 = { 1316 .flowi6_oif = oif, 1317 .daddr = *daddr, 1318 }; 1319 struct dst_entry *dst; 1320 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 1321 1322 if (saddr) { 1323 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 1324 flags |= RT6_LOOKUP_F_HAS_SADDR; 1325 } 1326 1327 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup); 1328 if (dst->error == 0) 1329 return dst_rt6_info(dst); 1330 1331 dst_release(dst); 1332 1333 return NULL; 1334} 1335EXPORT_SYMBOL(rt6_lookup); 1336 1337/* ip6_ins_rt is called with FREE table->tb6_lock. 1338 * It takes new route entry, the addition fails by any reason the 1339 * route is released. 1340 * Caller must hold dst before calling it. 1341 */ 1342 1343static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info, 1344 struct netlink_ext_ack *extack) 1345{ 1346 int err; 1347 struct fib6_table *table; 1348 1349 table = rt->fib6_table; 1350 spin_lock_bh(&table->tb6_lock); 1351 err = fib6_add(&table->tb6_root, rt, info, extack); 1352 spin_unlock_bh(&table->tb6_lock); 1353 1354 return err; 1355} 1356 1357int ip6_ins_rt(struct net *net, struct fib6_info *rt) 1358{ 1359 struct nl_info info = { .nl_net = net, }; 1360 1361 return __ip6_ins_rt(rt, &info, NULL); 1362} 1363 1364static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res, 1365 const struct in6_addr *daddr, 1366 const struct in6_addr *saddr) 1367{ 1368 struct fib6_info *f6i = res->f6i; 1369 struct net_device *dev; 1370 struct rt6_info *rt; 1371 1372 /* 1373 * Clone the route. 1374 */ 1375 1376 if (!fib6_info_hold_safe(f6i)) 1377 return NULL; 1378 1379 dev = ip6_rt_get_dev_rcu(res); 1380 rt = ip6_dst_alloc(dev_net(dev), dev, 0); 1381 if (!rt) { 1382 fib6_info_release(f6i); 1383 return NULL; 1384 } 1385 1386 ip6_rt_copy_init(rt, res); 1387 rt->rt6i_flags |= RTF_CACHE; 1388 rt->rt6i_dst.addr = *daddr; 1389 rt->rt6i_dst.plen = 128; 1390 1391 if (!rt6_is_gw_or_nonexthop(res)) { 1392 if (f6i->fib6_dst.plen != 128 && 1393 ipv6_addr_equal(&f6i->fib6_dst.addr, daddr)) 1394 rt->rt6i_flags |= RTF_ANYCAST; 1395#ifdef CONFIG_IPV6_SUBTREES 1396 if (rt->rt6i_src.plen && saddr) { 1397 rt->rt6i_src.addr = *saddr; 1398 rt->rt6i_src.plen = 128; 1399 } 1400#endif 1401 } 1402 1403 return rt; 1404} 1405 1406static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res) 1407{ 1408 struct fib6_info *f6i = res->f6i; 1409 unsigned short flags = fib6_info_dst_flags(f6i); 1410 struct net_device *dev; 1411 struct rt6_info *pcpu_rt; 1412 1413 if (!fib6_info_hold_safe(f6i)) 1414 return NULL; 1415 1416 rcu_read_lock(); 1417 dev = ip6_rt_get_dev_rcu(res); 1418 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT); 1419 rcu_read_unlock(); 1420 if (!pcpu_rt) { 1421 fib6_info_release(f6i); 1422 return NULL; 1423 } 1424 ip6_rt_copy_init(pcpu_rt, res); 1425 pcpu_rt->rt6i_flags |= RTF_PCPU; 1426 1427 if (f6i->nh) 1428 pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev)); 1429 1430 return pcpu_rt; 1431} 1432 1433static bool rt6_is_valid(const struct rt6_info *rt6) 1434{ 1435 return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev)); 1436} 1437 1438/* It should be called with rcu_read_lock() acquired */ 1439static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) 1440{ 1441 struct rt6_info *pcpu_rt; 1442 1443 pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu); 1444 1445 if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) { 1446 struct rt6_info *prev, **p; 1447 1448 p = this_cpu_ptr(res->nh->rt6i_pcpu); 1449 /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */ 1450 prev = xchg(p, NULL); 1451 if (prev) { 1452 dst_dev_put(&prev->dst); 1453 dst_release(&prev->dst); 1454 } 1455 1456 pcpu_rt = NULL; 1457 } 1458 1459 return pcpu_rt; 1460} 1461 1462static struct rt6_info *rt6_make_pcpu_route(struct net *net, 1463 const struct fib6_result *res) 1464{ 1465 struct rt6_info *pcpu_rt, *prev, **p; 1466 1467 pcpu_rt = ip6_rt_pcpu_alloc(res); 1468 if (!pcpu_rt) 1469 return NULL; 1470 1471 p = this_cpu_ptr(res->nh->rt6i_pcpu); 1472 prev = cmpxchg(p, NULL, pcpu_rt); 1473 if (unlikely(prev)) { 1474 /* 1475 * Another task on this CPU already installed a pcpu_rt. 1476 * This can happen on PREEMPT_RT where preemption is possible. 1477 * Free our allocation and return the existing one. 1478 */ 1479 WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RT)); 1480 1481 dst_dev_put(&pcpu_rt->dst); 1482 dst_release(&pcpu_rt->dst); 1483 return prev; 1484 } 1485 1486 if (res->f6i->fib6_destroying) { 1487 struct fib6_info *from; 1488 1489 from = unrcu_pointer(xchg(&pcpu_rt->from, NULL)); 1490 fib6_info_release(from); 1491 } 1492 1493 return pcpu_rt; 1494} 1495 1496/* exception hash table implementation 1497 */ 1498static DEFINE_SPINLOCK(rt6_exception_lock); 1499 1500/* Remove rt6_ex from hash table and free the memory 1501 * Caller must hold rt6_exception_lock 1502 */ 1503static void rt6_remove_exception(struct rt6_exception_bucket *bucket, 1504 struct rt6_exception *rt6_ex) 1505{ 1506 struct net *net; 1507 1508 if (!bucket || !rt6_ex) 1509 return; 1510 1511 net = dev_net(rt6_ex->rt6i->dst.dev); 1512 net->ipv6.rt6_stats->fib_rt_cache--; 1513 1514 /* purge completely the exception to allow releasing the held resources: 1515 * some [sk] cache may keep the dst around for unlimited time 1516 */ 1517 dst_dev_put(&rt6_ex->rt6i->dst); 1518 1519 hlist_del_rcu(&rt6_ex->hlist); 1520 dst_release(&rt6_ex->rt6i->dst); 1521 kfree_rcu(rt6_ex, rcu); 1522 WARN_ON_ONCE(!bucket->depth); 1523 bucket->depth--; 1524} 1525 1526/* Remove oldest rt6_ex in bucket and free the memory 1527 * Caller must hold rt6_exception_lock 1528 */ 1529static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) 1530{ 1531 struct rt6_exception *rt6_ex, *oldest = NULL; 1532 1533 if (!bucket) 1534 return; 1535 1536 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 1537 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp)) 1538 oldest = rt6_ex; 1539 } 1540 rt6_remove_exception(bucket, oldest); 1541} 1542 1543static u32 rt6_exception_hash(const struct in6_addr *dst, 1544 const struct in6_addr *src) 1545{ 1546 static siphash_aligned_key_t rt6_exception_key; 1547 struct { 1548 struct in6_addr dst; 1549 struct in6_addr src; 1550 } __aligned(SIPHASH_ALIGNMENT) combined = { 1551 .dst = *dst, 1552 }; 1553 u64 val; 1554 1555 net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key)); 1556 1557#ifdef CONFIG_IPV6_SUBTREES 1558 if (src) 1559 combined.src = *src; 1560#endif 1561 val = siphash(&combined, sizeof(combined), &rt6_exception_key); 1562 1563 return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); 1564} 1565 1566/* Helper function to find the cached rt in the hash table 1567 * and update bucket pointer to point to the bucket for this 1568 * (daddr, saddr) pair 1569 * Caller must hold rt6_exception_lock 1570 */ 1571static struct rt6_exception * 1572__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket, 1573 const struct in6_addr *daddr, 1574 const struct in6_addr *saddr) 1575{ 1576 struct rt6_exception *rt6_ex; 1577 u32 hval; 1578 1579 if (!(*bucket) || !daddr) 1580 return NULL; 1581 1582 hval = rt6_exception_hash(daddr, saddr); 1583 *bucket += hval; 1584 1585 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) { 1586 struct rt6_info *rt6 = rt6_ex->rt6i; 1587 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); 1588 1589#ifdef CONFIG_IPV6_SUBTREES 1590 if (matched && saddr) 1591 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); 1592#endif 1593 if (matched) 1594 return rt6_ex; 1595 } 1596 return NULL; 1597} 1598 1599/* Helper function to find the cached rt in the hash table 1600 * and update bucket pointer to point to the bucket for this 1601 * (daddr, saddr) pair 1602 * Caller must hold rcu_read_lock() 1603 */ 1604static struct rt6_exception * 1605__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, 1606 const struct in6_addr *daddr, 1607 const struct in6_addr *saddr) 1608{ 1609 struct rt6_exception *rt6_ex; 1610 u32 hval; 1611 1612 WARN_ON_ONCE(!rcu_read_lock_held()); 1613 1614 if (!(*bucket) || !daddr) 1615 return NULL; 1616 1617 hval = rt6_exception_hash(daddr, saddr); 1618 *bucket += hval; 1619 1620 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) { 1621 struct rt6_info *rt6 = rt6_ex->rt6i; 1622 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); 1623 1624#ifdef CONFIG_IPV6_SUBTREES 1625 if (matched && saddr) 1626 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); 1627#endif 1628 if (matched) 1629 return rt6_ex; 1630 } 1631 return NULL; 1632} 1633 1634static unsigned int fib6_mtu(const struct fib6_result *res) 1635{ 1636 const struct fib6_nh *nh = res->nh; 1637 unsigned int mtu; 1638 1639 if (res->f6i->fib6_pmtu) { 1640 mtu = res->f6i->fib6_pmtu; 1641 } else { 1642 struct net_device *dev = nh->fib_nh_dev; 1643 struct inet6_dev *idev; 1644 1645 rcu_read_lock(); 1646 idev = __in6_dev_get(dev); 1647 mtu = READ_ONCE(idev->cnf.mtu6); 1648 rcu_read_unlock(); 1649 } 1650 1651 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); 1652 1653 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu); 1654} 1655 1656#define FIB6_EXCEPTION_BUCKET_FLUSHED 0x1UL 1657 1658/* used when the flushed bit is not relevant, only access to the bucket 1659 * (ie., all bucket users except rt6_insert_exception); 1660 * 1661 * called under rcu lock; sometimes called with rt6_exception_lock held 1662 */ 1663static 1664struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh, 1665 spinlock_t *lock) 1666{ 1667 struct rt6_exception_bucket *bucket; 1668 1669 if (lock) 1670 bucket = rcu_dereference_protected(nh->rt6i_exception_bucket, 1671 lockdep_is_held(lock)); 1672 else 1673 bucket = rcu_dereference(nh->rt6i_exception_bucket); 1674 1675 /* remove bucket flushed bit if set */ 1676 if (bucket) { 1677 unsigned long p = (unsigned long)bucket; 1678 1679 p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED; 1680 bucket = (struct rt6_exception_bucket *)p; 1681 } 1682 1683 return bucket; 1684} 1685 1686static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket) 1687{ 1688 unsigned long p = (unsigned long)bucket; 1689 1690 return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED); 1691} 1692 1693/* called with rt6_exception_lock held */ 1694static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh, 1695 spinlock_t *lock) 1696{ 1697 struct rt6_exception_bucket *bucket; 1698 unsigned long p; 1699 1700 bucket = rcu_dereference_protected(nh->rt6i_exception_bucket, 1701 lockdep_is_held(lock)); 1702 1703 p = (unsigned long)bucket; 1704 p |= FIB6_EXCEPTION_BUCKET_FLUSHED; 1705 bucket = (struct rt6_exception_bucket *)p; 1706 rcu_assign_pointer(nh->rt6i_exception_bucket, bucket); 1707} 1708 1709static int rt6_insert_exception(struct rt6_info *nrt, 1710 const struct fib6_result *res) 1711{ 1712 struct net *net = dev_net(nrt->dst.dev); 1713 struct rt6_exception_bucket *bucket; 1714 struct fib6_info *f6i = res->f6i; 1715 struct in6_addr *src_key = NULL; 1716 struct rt6_exception *rt6_ex; 1717 struct fib6_nh *nh = res->nh; 1718 int max_depth; 1719 int err = 0; 1720 1721 spin_lock_bh(&rt6_exception_lock); 1722 1723 bucket = rcu_dereference_protected(nh->rt6i_exception_bucket, 1724 lockdep_is_held(&rt6_exception_lock)); 1725 if (!bucket) { 1726 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket), 1727 GFP_ATOMIC); 1728 if (!bucket) { 1729 err = -ENOMEM; 1730 goto out; 1731 } 1732 rcu_assign_pointer(nh->rt6i_exception_bucket, bucket); 1733 } else if (fib6_nh_excptn_bucket_flushed(bucket)) { 1734 err = -EINVAL; 1735 goto out; 1736 } 1737 1738#ifdef CONFIG_IPV6_SUBTREES 1739 /* fib6_src.plen != 0 indicates f6i is in subtree 1740 * and exception table is indexed by a hash of 1741 * both fib6_dst and fib6_src. 1742 * Otherwise, the exception table is indexed by 1743 * a hash of only fib6_dst. 1744 */ 1745 if (f6i->fib6_src.plen) 1746 src_key = &nrt->rt6i_src.addr; 1747#endif 1748 /* rt6_mtu_change() might lower mtu on f6i. 1749 * Only insert this exception route if its mtu 1750 * is less than f6i's mtu value. 1751 */ 1752 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) { 1753 err = -EINVAL; 1754 goto out; 1755 } 1756 1757 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr, 1758 src_key); 1759 if (rt6_ex) 1760 rt6_remove_exception(bucket, rt6_ex); 1761 1762 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC); 1763 if (!rt6_ex) { 1764 err = -ENOMEM; 1765 goto out; 1766 } 1767 rt6_ex->rt6i = nrt; 1768 rt6_ex->stamp = jiffies; 1769 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); 1770 bucket->depth++; 1771 net->ipv6.rt6_stats->fib_rt_cache++; 1772 1773 /* Randomize max depth to avoid some side channels attacks. */ 1774 max_depth = FIB6_MAX_DEPTH + get_random_u32_below(FIB6_MAX_DEPTH); 1775 while (bucket->depth > max_depth) 1776 rt6_exception_remove_oldest(bucket); 1777 1778out: 1779 spin_unlock_bh(&rt6_exception_lock); 1780 1781 /* Update fn->fn_sernum to invalidate all cached dst */ 1782 if (!err) { 1783 spin_lock_bh(&f6i->fib6_table->tb6_lock); 1784 fib6_update_sernum(net, f6i); 1785 fib6_add_gc_list(f6i); 1786 spin_unlock_bh(&f6i->fib6_table->tb6_lock); 1787 fib6_force_start_gc(net); 1788 } 1789 1790 return err; 1791} 1792 1793static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from) 1794{ 1795 struct rt6_exception_bucket *bucket; 1796 struct rt6_exception *rt6_ex; 1797 struct hlist_node *tmp; 1798 int i; 1799 1800 spin_lock_bh(&rt6_exception_lock); 1801 1802 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock); 1803 if (!bucket) 1804 goto out; 1805 1806 /* Prevent rt6_insert_exception() to recreate the bucket list */ 1807 if (!from) 1808 fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock); 1809 1810 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1811 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) { 1812 if (!from || 1813 rcu_access_pointer(rt6_ex->rt6i->from) == from) 1814 rt6_remove_exception(bucket, rt6_ex); 1815 } 1816 WARN_ON_ONCE(!from && bucket->depth); 1817 bucket++; 1818 } 1819out: 1820 spin_unlock_bh(&rt6_exception_lock); 1821} 1822 1823static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg) 1824{ 1825 struct fib6_info *f6i = arg; 1826 1827 fib6_nh_flush_exceptions(nh, f6i); 1828 1829 return 0; 1830} 1831 1832void rt6_flush_exceptions(struct fib6_info *f6i) 1833{ 1834 if (f6i->nh) { 1835 rcu_read_lock(); 1836 nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions, f6i); 1837 rcu_read_unlock(); 1838 } else { 1839 fib6_nh_flush_exceptions(f6i->fib6_nh, f6i); 1840 } 1841} 1842 1843/* Find cached rt in the hash table inside passed in rt 1844 * Caller has to hold rcu_read_lock() 1845 */ 1846static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, 1847 const struct in6_addr *daddr, 1848 const struct in6_addr *saddr) 1849{ 1850 const struct in6_addr *src_key = NULL; 1851 struct rt6_exception_bucket *bucket; 1852 struct rt6_exception *rt6_ex; 1853 struct rt6_info *ret = NULL; 1854 1855#ifdef CONFIG_IPV6_SUBTREES 1856 /* fib6i_src.plen != 0 indicates f6i is in subtree 1857 * and exception table is indexed by a hash of 1858 * both fib6_dst and fib6_src. 1859 * However, the src addr used to create the hash 1860 * might not be exactly the passed in saddr which 1861 * is a /128 addr from the flow. 1862 * So we need to use f6i->fib6_src to redo lookup 1863 * if the passed in saddr does not find anything. 1864 * (See the logic in ip6_rt_cache_alloc() on how 1865 * rt->rt6i_src is updated.) 1866 */ 1867 if (res->f6i->fib6_src.plen) 1868 src_key = saddr; 1869find_ex: 1870#endif 1871 bucket = fib6_nh_get_excptn_bucket(res->nh, NULL); 1872 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); 1873 1874 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) 1875 ret = rt6_ex->rt6i; 1876 1877#ifdef CONFIG_IPV6_SUBTREES 1878 /* Use fib6_src as src_key and redo lookup */ 1879 if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) { 1880 src_key = &res->f6i->fib6_src.addr; 1881 goto find_ex; 1882 } 1883#endif 1884 1885 return ret; 1886} 1887 1888/* Remove the passed in cached rt from the hash table that contains it */ 1889static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen, 1890 const struct rt6_info *rt) 1891{ 1892 const struct in6_addr *src_key = NULL; 1893 struct rt6_exception_bucket *bucket; 1894 struct rt6_exception *rt6_ex; 1895 int err; 1896 1897 if (!rcu_access_pointer(nh->rt6i_exception_bucket)) 1898 return -ENOENT; 1899 1900 spin_lock_bh(&rt6_exception_lock); 1901 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock); 1902 1903#ifdef CONFIG_IPV6_SUBTREES 1904 /* rt6i_src.plen != 0 indicates 'from' is in subtree 1905 * and exception table is indexed by a hash of 1906 * both rt6i_dst and rt6i_src. 1907 * Otherwise, the exception table is indexed by 1908 * a hash of only rt6i_dst. 1909 */ 1910 if (plen) 1911 src_key = &rt->rt6i_src.addr; 1912#endif 1913 rt6_ex = __rt6_find_exception_spinlock(&bucket, 1914 &rt->rt6i_dst.addr, 1915 src_key); 1916 if (rt6_ex) { 1917 rt6_remove_exception(bucket, rt6_ex); 1918 err = 0; 1919 } else { 1920 err = -ENOENT; 1921 } 1922 1923 spin_unlock_bh(&rt6_exception_lock); 1924 return err; 1925} 1926 1927struct fib6_nh_excptn_arg { 1928 struct rt6_info *rt; 1929 int plen; 1930}; 1931 1932static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg) 1933{ 1934 struct fib6_nh_excptn_arg *arg = _arg; 1935 int err; 1936 1937 err = fib6_nh_remove_exception(nh, arg->plen, arg->rt); 1938 if (err == 0) 1939 return 1; 1940 1941 return 0; 1942} 1943 1944static int rt6_remove_exception_rt(struct rt6_info *rt) 1945{ 1946 struct fib6_info *from; 1947 1948 from = rcu_dereference(rt->from); 1949 if (!from || !(rt->rt6i_flags & RTF_CACHE)) 1950 return -EINVAL; 1951 1952 if (from->nh) { 1953 struct fib6_nh_excptn_arg arg = { 1954 .rt = rt, 1955 .plen = from->fib6_src.plen 1956 }; 1957 int rc; 1958 1959 /* rc = 1 means an entry was found */ 1960 rc = nexthop_for_each_fib6_nh(from->nh, 1961 rt6_nh_remove_exception_rt, 1962 &arg); 1963 return rc ? 0 : -ENOENT; 1964 } 1965 1966 return fib6_nh_remove_exception(from->fib6_nh, 1967 from->fib6_src.plen, rt); 1968} 1969 1970/* Find rt6_ex which contains the passed in rt cache and 1971 * refresh its stamp 1972 */ 1973static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen, 1974 const struct rt6_info *rt) 1975{ 1976 const struct in6_addr *src_key = NULL; 1977 struct rt6_exception_bucket *bucket; 1978 struct rt6_exception *rt6_ex; 1979 1980 bucket = fib6_nh_get_excptn_bucket(nh, NULL); 1981#ifdef CONFIG_IPV6_SUBTREES 1982 /* rt6i_src.plen != 0 indicates 'from' is in subtree 1983 * and exception table is indexed by a hash of 1984 * both rt6i_dst and rt6i_src. 1985 * Otherwise, the exception table is indexed by 1986 * a hash of only rt6i_dst. 1987 */ 1988 if (plen) 1989 src_key = &rt->rt6i_src.addr; 1990#endif 1991 rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key); 1992 if (rt6_ex) 1993 rt6_ex->stamp = jiffies; 1994} 1995 1996struct fib6_nh_match_arg { 1997 const struct net_device *dev; 1998 const struct in6_addr *gw; 1999 struct fib6_nh *match; 2000}; 2001 2002/* determine if fib6_nh has given device and gateway */ 2003static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg) 2004{ 2005 struct fib6_nh_match_arg *arg = _arg; 2006 2007 if (arg->dev != nh->fib_nh_dev || 2008 (arg->gw && !nh->fib_nh_gw_family) || 2009 (!arg->gw && nh->fib_nh_gw_family) || 2010 (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6))) 2011 return 0; 2012 2013 arg->match = nh; 2014 2015 /* found a match, break the loop */ 2016 return 1; 2017} 2018 2019static void rt6_update_exception_stamp_rt(struct rt6_info *rt) 2020{ 2021 struct fib6_info *from; 2022 struct fib6_nh *fib6_nh; 2023 2024 rcu_read_lock(); 2025 2026 from = rcu_dereference(rt->from); 2027 if (!from || !(rt->rt6i_flags & RTF_CACHE)) 2028 goto unlock; 2029 2030 if (from->nh) { 2031 struct fib6_nh_match_arg arg = { 2032 .dev = rt->dst.dev, 2033 .gw = &rt->rt6i_gateway, 2034 }; 2035 2036 nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg); 2037 2038 if (!arg.match) 2039 goto unlock; 2040 fib6_nh = arg.match; 2041 } else { 2042 fib6_nh = from->fib6_nh; 2043 } 2044 fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt); 2045unlock: 2046 rcu_read_unlock(); 2047} 2048 2049static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, 2050 struct rt6_info *rt, int mtu) 2051{ 2052 u32 dmtu = dst6_mtu(&rt->dst); 2053 2054 /* If the new MTU is lower than the route PMTU, this new MTU will be the 2055 * lowest MTU in the path: always allow updating the route PMTU to 2056 * reflect PMTU decreases. 2057 * 2058 * If the new MTU is higher, and the route PMTU is equal to the local 2059 * MTU, this means the old MTU is the lowest in the path, so allow 2060 * updating it: if other nodes now have lower MTUs, PMTU discovery will 2061 * handle this. 2062 */ 2063 2064 if (dmtu >= mtu) 2065 return true; 2066 2067 if (dmtu == idev->cnf.mtu6) 2068 return true; 2069 2070 return false; 2071} 2072 2073static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, 2074 const struct fib6_nh *nh, int mtu) 2075{ 2076 struct rt6_exception_bucket *bucket; 2077 struct rt6_exception *rt6_ex; 2078 int i; 2079 2080 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock); 2081 if (!bucket) 2082 return; 2083 2084 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 2085 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 2086 struct rt6_info *entry = rt6_ex->rt6i; 2087 2088 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected 2089 * route), the metrics of its rt->from have already 2090 * been updated. 2091 */ 2092 if (dst_metric_raw(&entry->dst, RTAX_MTU) && 2093 rt6_mtu_change_route_allowed(idev, entry, mtu)) 2094 dst_metric_set(&entry->dst, RTAX_MTU, mtu); 2095 } 2096 bucket++; 2097 } 2098} 2099 2100#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) 2101 2102static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh, 2103 const struct in6_addr *gateway) 2104{ 2105 struct rt6_exception_bucket *bucket; 2106 struct rt6_exception *rt6_ex; 2107 struct hlist_node *tmp; 2108 int i; 2109 2110 if (!rcu_access_pointer(nh->rt6i_exception_bucket)) 2111 return; 2112 2113 spin_lock_bh(&rt6_exception_lock); 2114 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock); 2115 if (bucket) { 2116 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 2117 hlist_for_each_entry_safe(rt6_ex, tmp, 2118 &bucket->chain, hlist) { 2119 struct rt6_info *entry = rt6_ex->rt6i; 2120 2121 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) == 2122 RTF_CACHE_GATEWAY && 2123 ipv6_addr_equal(gateway, 2124 &entry->rt6i_gateway)) { 2125 rt6_remove_exception(bucket, rt6_ex); 2126 } 2127 } 2128 bucket++; 2129 } 2130 } 2131 2132 spin_unlock_bh(&rt6_exception_lock); 2133} 2134 2135static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, 2136 struct rt6_exception *rt6_ex, 2137 struct fib6_gc_args *gc_args, 2138 unsigned long now) 2139{ 2140 struct rt6_info *rt = rt6_ex->rt6i; 2141 2142 /* we are pruning and obsoleting aged-out and non gateway exceptions 2143 * even if others have still references to them, so that on next 2144 * dst_check() such references can be dropped. 2145 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when 2146 * expired, independently from their aging, as per RFC 8201 section 4 2147 */ 2148 if (!(rt->rt6i_flags & RTF_EXPIRES)) { 2149 if (time_after_eq(now, READ_ONCE(rt->dst.lastuse) + 2150 gc_args->timeout)) { 2151 pr_debug("aging clone %p\n", rt); 2152 rt6_remove_exception(bucket, rt6_ex); 2153 return; 2154 } 2155 } else if (time_after(jiffies, READ_ONCE(rt->dst.expires))) { 2156 pr_debug("purging expired route %p\n", rt); 2157 rt6_remove_exception(bucket, rt6_ex); 2158 return; 2159 } 2160 2161 if (rt->rt6i_flags & RTF_GATEWAY) { 2162 struct neighbour *neigh; 2163 2164 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 2165 2166 if (!(neigh && (neigh->flags & NTF_ROUTER))) { 2167 pr_debug("purging route %p via non-router but gateway\n", 2168 rt); 2169 rt6_remove_exception(bucket, rt6_ex); 2170 return; 2171 } 2172 } 2173 2174 gc_args->more++; 2175} 2176 2177static void fib6_nh_age_exceptions(const struct fib6_nh *nh, 2178 struct fib6_gc_args *gc_args, 2179 unsigned long now) 2180{ 2181 struct rt6_exception_bucket *bucket; 2182 struct rt6_exception *rt6_ex; 2183 struct hlist_node *tmp; 2184 int i; 2185 2186 if (!rcu_access_pointer(nh->rt6i_exception_bucket)) 2187 return; 2188 2189 rcu_read_lock_bh(); 2190 spin_lock(&rt6_exception_lock); 2191 bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock); 2192 if (bucket) { 2193 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 2194 hlist_for_each_entry_safe(rt6_ex, tmp, 2195 &bucket->chain, hlist) { 2196 rt6_age_examine_exception(bucket, rt6_ex, 2197 gc_args, now); 2198 } 2199 bucket++; 2200 } 2201 } 2202 spin_unlock(&rt6_exception_lock); 2203 rcu_read_unlock_bh(); 2204} 2205 2206struct fib6_nh_age_excptn_arg { 2207 struct fib6_gc_args *gc_args; 2208 unsigned long now; 2209}; 2210 2211static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg) 2212{ 2213 struct fib6_nh_age_excptn_arg *arg = _arg; 2214 2215 fib6_nh_age_exceptions(nh, arg->gc_args, arg->now); 2216 return 0; 2217} 2218 2219void rt6_age_exceptions(struct fib6_info *f6i, 2220 struct fib6_gc_args *gc_args, 2221 unsigned long now) 2222{ 2223 if (f6i->nh) { 2224 struct fib6_nh_age_excptn_arg arg = { 2225 .gc_args = gc_args, 2226 .now = now 2227 }; 2228 2229 nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions, 2230 &arg); 2231 } else { 2232 fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now); 2233 } 2234} 2235 2236/* must be called with rcu lock held */ 2237int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, 2238 struct flowi6 *fl6, struct fib6_result *res, int strict) 2239{ 2240 struct fib6_node *fn, *saved_fn; 2241 2242 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 2243 saved_fn = fn; 2244 2245redo_rt6_select: 2246 rt6_select(net, fn, oif, res, strict); 2247 if (res->f6i == net->ipv6.fib6_null_entry) { 2248 fn = fib6_backtrack(fn, &fl6->saddr); 2249 if (fn) 2250 goto redo_rt6_select; 2251 else if (strict & RT6_LOOKUP_F_REACHABLE) { 2252 /* also consider unreachable route */ 2253 strict &= ~RT6_LOOKUP_F_REACHABLE; 2254 fn = saved_fn; 2255 goto redo_rt6_select; 2256 } 2257 } 2258 2259 trace_fib6_table_lookup(net, res, table, fl6); 2260 2261 return 0; 2262} 2263 2264struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, 2265 int oif, struct flowi6 *fl6, 2266 const struct sk_buff *skb, int flags) 2267{ 2268 struct fib6_result res = {}; 2269 struct rt6_info *rt = NULL; 2270 int strict = 0; 2271 2272 WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) && 2273 !rcu_read_lock_held()); 2274 2275 strict |= flags & RT6_LOOKUP_F_IFACE; 2276 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; 2277 if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) 2278 strict |= RT6_LOOKUP_F_REACHABLE; 2279 2280 rcu_read_lock(); 2281 2282 fib6_table_lookup(net, table, oif, fl6, &res, strict); 2283 if (res.f6i == net->ipv6.fib6_null_entry) 2284 goto out; 2285 2286 fib6_select_path(net, &res, fl6, oif, false, skb, strict); 2287 2288 /*Search through exception table */ 2289 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr); 2290 if (rt) { 2291 goto out; 2292 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 2293 !res.nh->fib_nh_gw_family)) { 2294 /* Create a RTF_CACHE clone which will not be 2295 * owned by the fib6 tree. It is for the special case where 2296 * the daddr in the skb during the neighbor look-up is different 2297 * from the fl6->daddr used to look-up route here. 2298 */ 2299 rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL); 2300 2301 if (rt) { 2302 /* 1 refcnt is taken during ip6_rt_cache_alloc(). 2303 * As rt6_uncached_list_add() does not consume refcnt, 2304 * this refcnt is always returned to the caller even 2305 * if caller sets RT6_LOOKUP_F_DST_NOREF flag. 2306 */ 2307 rt6_uncached_list_add(rt); 2308 rcu_read_unlock(); 2309 2310 return rt; 2311 } 2312 } else { 2313 /* Get a percpu copy */ 2314 local_bh_disable(); 2315 rt = rt6_get_pcpu_route(&res); 2316 2317 if (!rt) 2318 rt = rt6_make_pcpu_route(net, &res); 2319 2320 local_bh_enable(); 2321 } 2322out: 2323 if (!rt) 2324 rt = net->ipv6.ip6_null_entry; 2325 if (!(flags & RT6_LOOKUP_F_DST_NOREF)) 2326 ip6_hold_safe(net, &rt); 2327 rcu_read_unlock(); 2328 2329 return rt; 2330} 2331EXPORT_SYMBOL_GPL(ip6_pol_route); 2332 2333INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net, 2334 struct fib6_table *table, 2335 struct flowi6 *fl6, 2336 const struct sk_buff *skb, 2337 int flags) 2338{ 2339 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags); 2340} 2341 2342struct dst_entry *ip6_route_input_lookup(struct net *net, 2343 struct net_device *dev, 2344 struct flowi6 *fl6, 2345 const struct sk_buff *skb, 2346 int flags) 2347{ 2348 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 2349 flags |= RT6_LOOKUP_F_IFACE; 2350 2351 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input); 2352} 2353EXPORT_SYMBOL_GPL(ip6_route_input_lookup); 2354 2355static void ip6_multipath_l3_keys(const struct sk_buff *skb, 2356 struct flow_keys *keys, 2357 struct flow_keys *flkeys) 2358{ 2359 const struct ipv6hdr *outer_iph = ipv6_hdr(skb); 2360 const struct ipv6hdr *key_iph = outer_iph; 2361 struct flow_keys *_flkeys = flkeys; 2362 const struct ipv6hdr *inner_iph; 2363 const struct icmp6hdr *icmph; 2364 struct ipv6hdr _inner_iph; 2365 struct icmp6hdr _icmph; 2366 2367 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6)) 2368 goto out; 2369 2370 icmph = skb_header_pointer(skb, skb_transport_offset(skb), 2371 sizeof(_icmph), &_icmph); 2372 if (!icmph) 2373 goto out; 2374 2375 if (!icmpv6_is_err(icmph->icmp6_type)) 2376 goto out; 2377 2378 inner_iph = skb_header_pointer(skb, 2379 skb_transport_offset(skb) + sizeof(*icmph), 2380 sizeof(_inner_iph), &_inner_iph); 2381 if (!inner_iph) 2382 goto out; 2383 2384 key_iph = inner_iph; 2385 _flkeys = NULL; 2386out: 2387 if (_flkeys) { 2388 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src; 2389 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst; 2390 keys->tags.flow_label = _flkeys->tags.flow_label; 2391 keys->basic.ip_proto = _flkeys->basic.ip_proto; 2392 } else { 2393 keys->addrs.v6addrs.src = key_iph->saddr; 2394 keys->addrs.v6addrs.dst = key_iph->daddr; 2395 keys->tags.flow_label = ip6_flowlabel(key_iph); 2396 keys->basic.ip_proto = key_iph->nexthdr; 2397 } 2398} 2399 2400static u32 rt6_multipath_custom_hash_outer(const struct net *net, 2401 const struct sk_buff *skb, 2402 bool *p_has_inner) 2403{ 2404 u32 hash_fields = ip6_multipath_hash_fields(net); 2405 struct flow_keys keys, hash_keys; 2406 2407 if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) 2408 return 0; 2409 2410 memset(&hash_keys, 0, sizeof(hash_keys)); 2411 skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); 2412 2413 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2414 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) 2415 hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; 2416 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) 2417 hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; 2418 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) 2419 hash_keys.basic.ip_proto = keys.basic.ip_proto; 2420 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) 2421 hash_keys.tags.flow_label = keys.tags.flow_label; 2422 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) 2423 hash_keys.ports.src = keys.ports.src; 2424 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) 2425 hash_keys.ports.dst = keys.ports.dst; 2426 2427 *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); 2428 return fib_multipath_hash_from_keys(net, &hash_keys); 2429} 2430 2431static u32 rt6_multipath_custom_hash_inner(const struct net *net, 2432 const struct sk_buff *skb, 2433 bool has_inner) 2434{ 2435 u32 hash_fields = ip6_multipath_hash_fields(net); 2436 struct flow_keys keys, hash_keys; 2437 2438 /* We assume the packet carries an encapsulation, but if none was 2439 * encountered during dissection of the outer flow, then there is no 2440 * point in calling the flow dissector again. 2441 */ 2442 if (!has_inner) 2443 return 0; 2444 2445 if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) 2446 return 0; 2447 2448 memset(&hash_keys, 0, sizeof(hash_keys)); 2449 skb_flow_dissect_flow_keys(skb, &keys, 0); 2450 2451 if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) 2452 return 0; 2453 2454 if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 2455 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 2456 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) 2457 hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; 2458 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) 2459 hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; 2460 } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 2461 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2462 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) 2463 hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; 2464 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) 2465 hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; 2466 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) 2467 hash_keys.tags.flow_label = keys.tags.flow_label; 2468 } 2469 2470 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) 2471 hash_keys.basic.ip_proto = keys.basic.ip_proto; 2472 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) 2473 hash_keys.ports.src = keys.ports.src; 2474 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) 2475 hash_keys.ports.dst = keys.ports.dst; 2476 2477 return fib_multipath_hash_from_keys(net, &hash_keys); 2478} 2479 2480static u32 rt6_multipath_custom_hash_skb(const struct net *net, 2481 const struct sk_buff *skb) 2482{ 2483 u32 mhash, mhash_inner; 2484 bool has_inner = true; 2485 2486 mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner); 2487 mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner); 2488 2489 return jhash_2words(mhash, mhash_inner, 0); 2490} 2491 2492static u32 rt6_multipath_custom_hash_fl6(const struct net *net, 2493 const struct flowi6 *fl6) 2494{ 2495 u32 hash_fields = ip6_multipath_hash_fields(net); 2496 struct flow_keys hash_keys; 2497 2498 if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) 2499 return 0; 2500 2501 memset(&hash_keys, 0, sizeof(hash_keys)); 2502 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2503 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) 2504 hash_keys.addrs.v6addrs.src = fl6->saddr; 2505 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) 2506 hash_keys.addrs.v6addrs.dst = fl6->daddr; 2507 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) 2508 hash_keys.basic.ip_proto = fl6->flowi6_proto; 2509 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) 2510 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); 2511 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) { 2512 if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT) 2513 hash_keys.ports.src = (__force __be16)get_random_u16(); 2514 else 2515 hash_keys.ports.src = fl6->fl6_sport; 2516 } 2517 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) 2518 hash_keys.ports.dst = fl6->fl6_dport; 2519 2520 return fib_multipath_hash_from_keys(net, &hash_keys); 2521} 2522 2523/* if skb is set it will be used and fl6 can be NULL */ 2524u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, 2525 const struct sk_buff *skb, struct flow_keys *flkeys) 2526{ 2527 struct flow_keys hash_keys; 2528 u32 mhash = 0; 2529 2530 switch (ip6_multipath_hash_policy(net)) { 2531 case 0: 2532 memset(&hash_keys, 0, sizeof(hash_keys)); 2533 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2534 if (skb) { 2535 ip6_multipath_l3_keys(skb, &hash_keys, flkeys); 2536 } else { 2537 hash_keys.addrs.v6addrs.src = fl6->saddr; 2538 hash_keys.addrs.v6addrs.dst = fl6->daddr; 2539 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); 2540 hash_keys.basic.ip_proto = fl6->flowi6_proto; 2541 } 2542 mhash = fib_multipath_hash_from_keys(net, &hash_keys); 2543 break; 2544 case 1: 2545 if (skb) { 2546 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; 2547 struct flow_keys keys; 2548 2549 /* short-circuit if we already have L4 hash present */ 2550 if (skb->l4_hash) 2551 return skb_get_hash_raw(skb) >> 1; 2552 2553 memset(&hash_keys, 0, sizeof(hash_keys)); 2554 2555 if (!flkeys) { 2556 skb_flow_dissect_flow_keys(skb, &keys, flag); 2557 flkeys = &keys; 2558 } 2559 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2560 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src; 2561 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst; 2562 hash_keys.ports.src = flkeys->ports.src; 2563 hash_keys.ports.dst = flkeys->ports.dst; 2564 hash_keys.basic.ip_proto = flkeys->basic.ip_proto; 2565 } else { 2566 memset(&hash_keys, 0, sizeof(hash_keys)); 2567 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2568 hash_keys.addrs.v6addrs.src = fl6->saddr; 2569 hash_keys.addrs.v6addrs.dst = fl6->daddr; 2570 if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT) 2571 hash_keys.ports.src = (__force __be16)get_random_u16(); 2572 else 2573 hash_keys.ports.src = fl6->fl6_sport; 2574 hash_keys.ports.dst = fl6->fl6_dport; 2575 hash_keys.basic.ip_proto = fl6->flowi6_proto; 2576 } 2577 mhash = fib_multipath_hash_from_keys(net, &hash_keys); 2578 break; 2579 case 2: 2580 memset(&hash_keys, 0, sizeof(hash_keys)); 2581 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2582 if (skb) { 2583 struct flow_keys keys; 2584 2585 if (!flkeys) { 2586 skb_flow_dissect_flow_keys(skb, &keys, 0); 2587 flkeys = &keys; 2588 } 2589 2590 /* Inner can be v4 or v6 */ 2591 if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 2592 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 2593 hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; 2594 hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; 2595 } else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 2596 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2597 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src; 2598 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst; 2599 hash_keys.tags.flow_label = flkeys->tags.flow_label; 2600 hash_keys.basic.ip_proto = flkeys->basic.ip_proto; 2601 } else { 2602 /* Same as case 0 */ 2603 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2604 ip6_multipath_l3_keys(skb, &hash_keys, flkeys); 2605 } 2606 } else { 2607 /* Same as case 0 */ 2608 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2609 hash_keys.addrs.v6addrs.src = fl6->saddr; 2610 hash_keys.addrs.v6addrs.dst = fl6->daddr; 2611 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); 2612 hash_keys.basic.ip_proto = fl6->flowi6_proto; 2613 } 2614 mhash = fib_multipath_hash_from_keys(net, &hash_keys); 2615 break; 2616 case 3: 2617 if (skb) 2618 mhash = rt6_multipath_custom_hash_skb(net, skb); 2619 else 2620 mhash = rt6_multipath_custom_hash_fl6(net, fl6); 2621 break; 2622 } 2623 2624 return mhash >> 1; 2625} 2626 2627/* Called with rcu held */ 2628void ip6_route_input(struct sk_buff *skb) 2629{ 2630 const struct ipv6hdr *iph = ipv6_hdr(skb); 2631 struct net *net = dev_net(skb->dev); 2632 int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF; 2633 struct ip_tunnel_info *tun_info; 2634 struct flowi6 fl6 = { 2635 .flowi6_iif = skb->dev->ifindex, 2636 .daddr = iph->daddr, 2637 .saddr = iph->saddr, 2638 .flowlabel = ip6_flowinfo(iph), 2639 .flowi6_mark = skb->mark, 2640 .flowi6_proto = iph->nexthdr, 2641 }; 2642 struct flow_keys *flkeys = NULL, _flkeys; 2643 2644 tun_info = skb_tunnel_info(skb); 2645 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) 2646 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; 2647 2648 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys)) 2649 flkeys = &_flkeys; 2650 2651 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) 2652 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys); 2653 skb_dst_drop(skb); 2654 skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev, 2655 &fl6, skb, flags)); 2656} 2657 2658INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net, 2659 struct fib6_table *table, 2660 struct flowi6 *fl6, 2661 const struct sk_buff *skb, 2662 int flags) 2663{ 2664 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); 2665} 2666 2667static struct dst_entry *ip6_route_output_flags_noref(struct net *net, 2668 const struct sock *sk, 2669 struct flowi6 *fl6, 2670 int flags) 2671{ 2672 bool any_src; 2673 2674 if (ipv6_addr_type(&fl6->daddr) & 2675 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) { 2676 struct dst_entry *dst; 2677 2678 /* This function does not take refcnt on the dst */ 2679 dst = l3mdev_link_scope_lookup(net, fl6); 2680 if (dst) 2681 return dst; 2682 } 2683 2684 fl6->flowi6_iif = LOOPBACK_IFINDEX; 2685 2686 flags |= RT6_LOOKUP_F_DST_NOREF; 2687 any_src = ipv6_addr_any(&fl6->saddr); 2688 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 2689 (fl6->flowi6_oif && any_src)) 2690 flags |= RT6_LOOKUP_F_IFACE; 2691 2692 if (!any_src) 2693 flags |= RT6_LOOKUP_F_HAS_SADDR; 2694 else if (sk) 2695 flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs)); 2696 2697 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); 2698} 2699 2700struct dst_entry *ip6_route_output_flags(struct net *net, 2701 const struct sock *sk, 2702 struct flowi6 *fl6, 2703 int flags) 2704{ 2705 struct dst_entry *dst; 2706 struct rt6_info *rt6; 2707 2708 rcu_read_lock(); 2709 dst = ip6_route_output_flags_noref(net, sk, fl6, flags); 2710 rt6 = dst_rt6_info(dst); 2711 /* For dst cached in uncached_list, refcnt is already taken. */ 2712 if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) { 2713 dst = &net->ipv6.ip6_null_entry->dst; 2714 dst_hold(dst); 2715 } 2716 rcu_read_unlock(); 2717 2718 return dst; 2719} 2720EXPORT_SYMBOL_GPL(ip6_route_output_flags); 2721 2722struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 2723{ 2724 struct rt6_info *rt, *ort = dst_rt6_info(dst_orig); 2725 struct net_device *loopback_dev = net->loopback_dev; 2726 struct dst_entry *new = NULL; 2727 2728 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 2729 DST_OBSOLETE_DEAD, 0); 2730 if (rt) { 2731 rt6_info_init(rt); 2732 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); 2733 2734 new = &rt->dst; 2735 new->__use = 1; 2736 new->input = dst_discard; 2737 new->output = dst_discard_out; 2738 2739 dst_copy_metrics(new, &ort->dst); 2740 2741 rt->rt6i_idev = in6_dev_get(loopback_dev); 2742 rt->rt6i_gateway = ort->rt6i_gateway; 2743 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; 2744 2745 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 2746#ifdef CONFIG_IPV6_SUBTREES 2747 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 2748#endif 2749 } 2750 2751 dst_release(dst_orig); 2752 return new ? new : ERR_PTR(-ENOMEM); 2753} 2754 2755/* 2756 * Destination cache support functions 2757 */ 2758 2759static bool fib6_check(struct fib6_info *f6i, u32 cookie) 2760{ 2761 u32 rt_cookie = 0; 2762 2763 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie) 2764 return false; 2765 2766 if (fib6_check_expired(f6i)) 2767 return false; 2768 2769 return true; 2770} 2771 2772static struct dst_entry *rt6_check(struct rt6_info *rt, 2773 struct fib6_info *from, 2774 u32 cookie) 2775{ 2776 u32 rt_cookie = 0; 2777 2778 if (!from || !fib6_get_cookie_safe(from, &rt_cookie) || 2779 rt_cookie != cookie) 2780 return NULL; 2781 2782 if (rt6_check_expired(rt)) 2783 return NULL; 2784 2785 return &rt->dst; 2786} 2787 2788static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, 2789 struct fib6_info *from, 2790 u32 cookie) 2791{ 2792 if (!__rt6_check_expired(rt) && 2793 READ_ONCE(rt->dst.obsolete) == DST_OBSOLETE_FORCE_CHK && 2794 fib6_check(from, cookie)) 2795 return &rt->dst; 2796 return NULL; 2797} 2798 2799INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, 2800 u32 cookie) 2801{ 2802 struct dst_entry *dst_ret; 2803 struct fib6_info *from; 2804 struct rt6_info *rt; 2805 2806 rt = dst_rt6_info(dst); 2807 2808 if (rt->sernum) 2809 return rt6_is_valid(rt) ? dst : NULL; 2810 2811 rcu_read_lock(); 2812 2813 /* All IPV6 dsts are created with ->obsolete set to the value 2814 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 2815 * into this function always. 2816 */ 2817 2818 from = rcu_dereference(rt->from); 2819 2820 if (from && (rt->rt6i_flags & RTF_PCPU || 2821 unlikely(!list_empty(&rt->dst.rt_uncached)))) 2822 dst_ret = rt6_dst_from_check(rt, from, cookie); 2823 else 2824 dst_ret = rt6_check(rt, from, cookie); 2825 2826 rcu_read_unlock(); 2827 2828 return dst_ret; 2829} 2830EXPORT_INDIRECT_CALLABLE(ip6_dst_check); 2831 2832static void ip6_negative_advice(struct sock *sk, 2833 struct dst_entry *dst) 2834{ 2835 struct rt6_info *rt = dst_rt6_info(dst); 2836 2837 if (rt->rt6i_flags & RTF_CACHE) { 2838 rcu_read_lock(); 2839 if (rt6_check_expired(rt)) { 2840 /* rt/dst can not be destroyed yet, 2841 * because of rcu_read_lock() 2842 */ 2843 sk_dst_reset(sk); 2844 rt6_remove_exception_rt(rt); 2845 } 2846 rcu_read_unlock(); 2847 return; 2848 } 2849 sk_dst_reset(sk); 2850} 2851 2852static void ip6_link_failure(struct sk_buff *skb) 2853{ 2854 struct rt6_info *rt; 2855 2856 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 2857 2858 rt = dst_rt6_info(skb_dst(skb)); 2859 if (rt) { 2860 rcu_read_lock(); 2861 if (rt->rt6i_flags & RTF_CACHE) { 2862 rt6_remove_exception_rt(rt); 2863 } else { 2864 struct fib6_info *from; 2865 struct fib6_node *fn; 2866 2867 from = rcu_dereference(rt->from); 2868 if (from) { 2869 fn = rcu_dereference(from->fib6_node); 2870 if (fn && (rt->rt6i_flags & RTF_DEFAULT)) 2871 WRITE_ONCE(fn->fn_sernum, -1); 2872 } 2873 } 2874 rcu_read_unlock(); 2875 } 2876} 2877 2878static void rt6_update_expires(struct rt6_info *rt0, int timeout) 2879{ 2880 if (!(rt0->rt6i_flags & RTF_EXPIRES)) { 2881 struct fib6_info *from; 2882 2883 rcu_read_lock(); 2884 from = rcu_dereference(rt0->from); 2885 if (from) 2886 WRITE_ONCE(rt0->dst.expires, from->expires); 2887 rcu_read_unlock(); 2888 } 2889 2890 dst_set_expires(&rt0->dst, timeout); 2891 rt0->rt6i_flags |= RTF_EXPIRES; 2892} 2893 2894static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) 2895{ 2896 struct net *net = dev_net(rt->dst.dev); 2897 2898 dst_metric_set(&rt->dst, RTAX_MTU, mtu); 2899 rt->rt6i_flags |= RTF_MODIFIED; 2900 rt6_update_expires(rt, READ_ONCE(net->ipv6.sysctl.ip6_rt_mtu_expires)); 2901} 2902 2903static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) 2904{ 2905 return !(rt->rt6i_flags & RTF_CACHE) && 2906 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from)); 2907} 2908 2909static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 2910 const struct ipv6hdr *iph, u32 mtu, 2911 bool confirm_neigh) 2912{ 2913 const struct in6_addr *daddr, *saddr; 2914 struct rt6_info *rt6 = dst_rt6_info(dst); 2915 2916 /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU) 2917 * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it. 2918 * [see also comment in rt6_mtu_change_route()] 2919 */ 2920 2921 if (iph) { 2922 daddr = &iph->daddr; 2923 saddr = &iph->saddr; 2924 } else if (sk) { 2925 daddr = &sk->sk_v6_daddr; 2926 saddr = &inet6_sk(sk)->saddr; 2927 } else { 2928 daddr = NULL; 2929 saddr = NULL; 2930 } 2931 2932 if (confirm_neigh) 2933 dst_confirm_neigh(dst, daddr); 2934 2935 if (mtu < IPV6_MIN_MTU) 2936 return; 2937 if (mtu >= dst6_mtu(dst)) 2938 return; 2939 2940 if (!rt6_cache_allowed_for_pmtu(rt6)) { 2941 rt6_do_update_pmtu(rt6, mtu); 2942 /* update rt6_ex->stamp for cache */ 2943 if (rt6->rt6i_flags & RTF_CACHE) 2944 rt6_update_exception_stamp_rt(rt6); 2945 } else if (daddr) { 2946 struct fib6_result res = {}; 2947 struct rt6_info *nrt6; 2948 2949 rcu_read_lock(); 2950 res.f6i = rcu_dereference(rt6->from); 2951 if (!res.f6i) 2952 goto out_unlock; 2953 2954 res.fib6_flags = res.f6i->fib6_flags; 2955 res.fib6_type = res.f6i->fib6_type; 2956 2957 if (res.f6i->nh) { 2958 struct fib6_nh_match_arg arg = { 2959 .dev = dst_dev_rcu(dst), 2960 .gw = &rt6->rt6i_gateway, 2961 }; 2962 2963 nexthop_for_each_fib6_nh(res.f6i->nh, 2964 fib6_nh_find_match, &arg); 2965 2966 /* fib6_info uses a nexthop that does not have fib6_nh 2967 * using the dst->dev + gw. Should be impossible. 2968 */ 2969 if (!arg.match) 2970 goto out_unlock; 2971 2972 res.nh = arg.match; 2973 } else { 2974 res.nh = res.f6i->fib6_nh; 2975 } 2976 2977 nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr); 2978 if (nrt6) { 2979 rt6_do_update_pmtu(nrt6, mtu); 2980 if (rt6_insert_exception(nrt6, &res)) 2981 dst_release_immediate(&nrt6->dst); 2982 } 2983out_unlock: 2984 rcu_read_unlock(); 2985 } 2986} 2987 2988static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 2989 struct sk_buff *skb, u32 mtu, 2990 bool confirm_neigh) 2991{ 2992 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu, 2993 confirm_neigh); 2994} 2995 2996void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 2997 int oif, u32 mark, kuid_t uid) 2998{ 2999 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 3000 struct dst_entry *dst; 3001 struct flowi6 fl6 = { 3002 .flowi6_oif = oif, 3003 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark), 3004 .daddr = iph->daddr, 3005 .saddr = iph->saddr, 3006 .flowlabel = ip6_flowinfo(iph), 3007 .flowi6_uid = uid, 3008 }; 3009 3010 dst = ip6_route_output(net, NULL, &fl6); 3011 if (!dst->error) 3012 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true); 3013 dst_release(dst); 3014} 3015EXPORT_SYMBOL_GPL(ip6_update_pmtu); 3016 3017void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 3018{ 3019 int oif = sk->sk_bound_dev_if; 3020 struct dst_entry *dst; 3021 3022 if (!oif && skb->dev) 3023 oif = l3mdev_master_ifindex(skb->dev); 3024 3025 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark), 3026 sk_uid(sk)); 3027 3028 dst = __sk_dst_get(sk); 3029 if (!dst || !READ_ONCE(dst->obsolete) || 3030 dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) 3031 return; 3032 3033 bh_lock_sock(sk); 3034 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) 3035 ip6_datagram_dst_update(sk, false); 3036 bh_unlock_sock(sk); 3037} 3038EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 3039 3040void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst, 3041 const struct flowi6 *fl6) 3042{ 3043#ifdef CONFIG_IPV6_SUBTREES 3044 struct ipv6_pinfo *np = inet6_sk(sk); 3045#endif 3046 3047 ip6_dst_store(sk, dst, 3048 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr), 3049#ifdef CONFIG_IPV6_SUBTREES 3050 ipv6_addr_equal(&fl6->saddr, &np->saddr) ? 3051 true : 3052#endif 3053 false); 3054} 3055 3056static bool ip6_redirect_nh_match(const struct fib6_result *res, 3057 struct flowi6 *fl6, 3058 const struct in6_addr *gw, 3059 struct rt6_info **ret) 3060{ 3061 const struct fib6_nh *nh = res->nh; 3062 3063 if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family || 3064 fl6->flowi6_oif != nh->fib_nh_dev->ifindex) 3065 return false; 3066 3067 /* rt_cache's gateway might be different from its 'parent' 3068 * in the case of an ip redirect. 3069 * So we keep searching in the exception table if the gateway 3070 * is different. 3071 */ 3072 if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) { 3073 struct rt6_info *rt_cache; 3074 3075 rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr); 3076 if (rt_cache && 3077 ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) { 3078 *ret = rt_cache; 3079 return true; 3080 } 3081 return false; 3082 } 3083 return true; 3084} 3085 3086struct fib6_nh_rd_arg { 3087 struct fib6_result *res; 3088 struct flowi6 *fl6; 3089 const struct in6_addr *gw; 3090 struct rt6_info **ret; 3091}; 3092 3093static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg) 3094{ 3095 struct fib6_nh_rd_arg *arg = _arg; 3096 3097 arg->res->nh = nh; 3098 return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret); 3099} 3100 3101/* Handle redirects */ 3102struct ip6rd_flowi { 3103 struct flowi6 fl6; 3104 struct in6_addr gateway; 3105}; 3106 3107INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net, 3108 struct fib6_table *table, 3109 struct flowi6 *fl6, 3110 const struct sk_buff *skb, 3111 int flags) 3112{ 3113 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 3114 struct rt6_info *ret = NULL; 3115 struct fib6_result res = {}; 3116 struct fib6_nh_rd_arg arg = { 3117 .res = &res, 3118 .fl6 = fl6, 3119 .gw = &rdfl->gateway, 3120 .ret = &ret 3121 }; 3122 struct fib6_info *rt; 3123 struct fib6_node *fn; 3124 3125 /* Get the "current" route for this destination and 3126 * check if the redirect has come from appropriate router. 3127 * 3128 * RFC 4861 specifies that redirects should only be 3129 * accepted if they come from the nexthop to the target. 3130 * Due to the way the routes are chosen, this notion 3131 * is a bit fuzzy and one might need to check all possible 3132 * routes. 3133 */ 3134 3135 rcu_read_lock(); 3136 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 3137restart: 3138 for_each_fib6_node_rt_rcu(fn) { 3139 res.f6i = rt; 3140 if (fib6_check_expired(rt)) 3141 continue; 3142 if (rt->fib6_flags & RTF_REJECT) 3143 break; 3144 if (unlikely(rt->nh)) { 3145 if (nexthop_is_blackhole(rt->nh)) 3146 continue; 3147 /* on match, res->nh is filled in and potentially ret */ 3148 if (nexthop_for_each_fib6_nh(rt->nh, 3149 fib6_nh_redirect_match, 3150 &arg)) 3151 goto out; 3152 } else { 3153 res.nh = rt->fib6_nh; 3154 if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, 3155 &ret)) 3156 goto out; 3157 } 3158 } 3159 3160 if (!rt) 3161 rt = net->ipv6.fib6_null_entry; 3162 else if (rt->fib6_flags & RTF_REJECT) { 3163 ret = net->ipv6.ip6_null_entry; 3164 goto out; 3165 } 3166 3167 if (rt == net->ipv6.fib6_null_entry) { 3168 fn = fib6_backtrack(fn, &fl6->saddr); 3169 if (fn) 3170 goto restart; 3171 } 3172 3173 res.f6i = rt; 3174 res.nh = rt->fib6_nh; 3175out: 3176 if (ret) { 3177 ip6_hold_safe(net, &ret); 3178 } else { 3179 res.fib6_flags = res.f6i->fib6_flags; 3180 res.fib6_type = res.f6i->fib6_type; 3181 ret = ip6_create_rt_rcu(&res); 3182 } 3183 3184 rcu_read_unlock(); 3185 3186 trace_fib6_table_lookup(net, &res, table, fl6); 3187 return ret; 3188}; 3189 3190static struct dst_entry *ip6_route_redirect(struct net *net, 3191 const struct flowi6 *fl6, 3192 const struct sk_buff *skb, 3193 const struct in6_addr *gateway) 3194{ 3195 int flags = RT6_LOOKUP_F_HAS_SADDR; 3196 struct ip6rd_flowi rdfl; 3197 3198 rdfl.fl6 = *fl6; 3199 rdfl.gateway = *gateway; 3200 3201 return fib6_rule_lookup(net, &rdfl.fl6, skb, 3202 flags, __ip6_route_redirect); 3203} 3204 3205void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, 3206 kuid_t uid) 3207{ 3208 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 3209 struct dst_entry *dst; 3210 struct flowi6 fl6 = { 3211 .flowi6_iif = LOOPBACK_IFINDEX, 3212 .flowi6_oif = oif, 3213 .flowi6_mark = mark, 3214 .daddr = iph->daddr, 3215 .saddr = iph->saddr, 3216 .flowlabel = ip6_flowinfo(iph), 3217 .flowi6_uid = uid, 3218 }; 3219 3220 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr); 3221 rt6_do_redirect(dst, NULL, skb); 3222 dst_release(dst); 3223} 3224EXPORT_SYMBOL_GPL(ip6_redirect); 3225 3226void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif) 3227{ 3228 const struct ipv6hdr *iph = ipv6_hdr(skb); 3229 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); 3230 struct dst_entry *dst; 3231 struct flowi6 fl6 = { 3232 .flowi6_iif = LOOPBACK_IFINDEX, 3233 .flowi6_oif = oif, 3234 .daddr = msg->dest, 3235 .saddr = iph->daddr, 3236 .flowi6_uid = sock_net_uid(net, NULL), 3237 }; 3238 3239 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr); 3240 rt6_do_redirect(dst, NULL, skb); 3241 dst_release(dst); 3242} 3243 3244void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 3245{ 3246 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, 3247 READ_ONCE(sk->sk_mark), sk_uid(sk)); 3248} 3249EXPORT_SYMBOL_GPL(ip6_sk_redirect); 3250 3251static unsigned int ip6_default_advmss(const struct dst_entry *dst) 3252{ 3253 unsigned int mtu = dst6_mtu(dst); 3254 struct net *net; 3255 3256 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 3257 3258 rcu_read_lock(); 3259 3260 net = dst_dev_net_rcu(dst); 3261 mtu = max_t(unsigned int, mtu, 3262 READ_ONCE(net->ipv6.sysctl.ip6_rt_min_advmss)); 3263 3264 rcu_read_unlock(); 3265 3266 /* 3267 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 3268 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 3269 * IPV6_MAXPLEN is also valid and means: "any MSS, 3270 * rely only on pmtu discovery" 3271 */ 3272 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 3273 mtu = IPV6_MAXPLEN; 3274 return mtu; 3275} 3276 3277INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst) 3278{ 3279 return ip6_dst_mtu_maybe_forward(dst, false); 3280} 3281EXPORT_INDIRECT_CALLABLE(ip6_mtu); 3282 3283/* MTU selection: 3284 * 1. mtu on route is locked - use it 3285 * 2. mtu from nexthop exception 3286 * 3. mtu from egress device 3287 * 3288 * based on ip6_dst_mtu_forward and exception logic of 3289 * rt6_find_cached_rt; called with rcu_read_lock 3290 */ 3291u32 ip6_mtu_from_fib6(const struct fib6_result *res, 3292 const struct in6_addr *daddr, 3293 const struct in6_addr *saddr) 3294{ 3295 const struct fib6_nh *nh = res->nh; 3296 struct fib6_info *f6i = res->f6i; 3297 struct inet6_dev *idev; 3298 struct rt6_info *rt; 3299 u32 mtu = 0; 3300 3301 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) { 3302 mtu = f6i->fib6_pmtu; 3303 if (mtu) 3304 goto out; 3305 } 3306 3307 rt = rt6_find_cached_rt(res, daddr, saddr); 3308 if (unlikely(rt)) { 3309 mtu = dst_metric_raw(&rt->dst, RTAX_MTU); 3310 } else { 3311 struct net_device *dev = nh->fib_nh_dev; 3312 3313 mtu = IPV6_MIN_MTU; 3314 idev = __in6_dev_get(dev); 3315 if (idev) 3316 mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6)); 3317 } 3318 3319 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); 3320out: 3321 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu); 3322} 3323 3324struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 3325 struct flowi6 *fl6) 3326{ 3327 struct dst_entry *dst; 3328 struct rt6_info *rt; 3329 struct inet6_dev *idev = in6_dev_get(dev); 3330 struct net *net = dev_net(dev); 3331 3332 if (unlikely(!idev)) 3333 return ERR_PTR(-ENODEV); 3334 3335 rt = ip6_dst_alloc(net, dev, 0); 3336 if (unlikely(!rt)) { 3337 in6_dev_put(idev); 3338 dst = ERR_PTR(-ENOMEM); 3339 goto out; 3340 } 3341 3342 rt->dst.input = ip6_input; 3343 rt->dst.output = ip6_output; 3344 rt->rt6i_gateway = fl6->daddr; 3345 rt->rt6i_dst.addr = fl6->daddr; 3346 rt->rt6i_dst.plen = 128; 3347 rt->rt6i_idev = idev; 3348 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 3349 3350 /* Add this dst into uncached_list so that rt6_disable_ip() can 3351 * do proper release of the net_device 3352 */ 3353 rt6_uncached_list_add(rt); 3354 3355 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 3356 3357out: 3358 return dst; 3359} 3360 3361static void ip6_dst_gc(struct dst_ops *ops) 3362{ 3363 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 3364 int rt_min_interval = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_min_interval); 3365 int rt_elasticity = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_elasticity); 3366 int rt_gc_timeout = READ_ONCE(net->ipv6.sysctl.ip6_rt_gc_timeout); 3367 unsigned long rt_last_gc = READ_ONCE(net->ipv6.ip6_rt_last_gc); 3368 unsigned int val; 3369 int entries; 3370 3371 if (time_after(rt_last_gc + rt_min_interval, jiffies)) 3372 goto out; 3373 3374 fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true); 3375 entries = dst_entries_get_slow(ops); 3376 if (entries < ops->gc_thresh) 3377 atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1); 3378out: 3379 val = atomic_read(&net->ipv6.ip6_rt_gc_expire); 3380 atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity)); 3381} 3382 3383static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg, 3384 const struct in6_addr *gw_addr, u32 tbid, 3385 int flags, struct fib6_result *res) 3386{ 3387 struct flowi6 fl6 = { 3388 .flowi6_oif = cfg->fc_ifindex, 3389 .daddr = *gw_addr, 3390 .saddr = cfg->fc_prefsrc, 3391 }; 3392 struct fib6_table *table; 3393 int err; 3394 3395 table = fib6_get_table(net, tbid); 3396 if (!table) 3397 return -EINVAL; 3398 3399 if (!ipv6_addr_any(&cfg->fc_prefsrc)) 3400 flags |= RT6_LOOKUP_F_HAS_SADDR; 3401 3402 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE; 3403 3404 err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags); 3405 if (!err && res->f6i != net->ipv6.fib6_null_entry) 3406 fib6_select_path(net, res, &fl6, cfg->fc_ifindex, 3407 cfg->fc_ifindex != 0, NULL, flags); 3408 3409 return err; 3410} 3411 3412static int ip6_route_check_nh_onlink(struct net *net, 3413 struct fib6_config *cfg, 3414 const struct net_device *dev, 3415 struct netlink_ext_ack *extack) 3416{ 3417 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; 3418 const struct in6_addr *gw_addr = &cfg->fc_gateway; 3419 struct fib6_result res = {}; 3420 int err; 3421 3422 err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res); 3423 if (!err && !(res.fib6_flags & RTF_REJECT) && 3424 res.fib6_type != RTN_UNICAST) { 3425 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 3426 err = -EINVAL; 3427 } 3428 3429 return err; 3430} 3431 3432static int ip6_route_check_nh(struct net *net, 3433 struct fib6_config *cfg, 3434 struct net_device **_dev, 3435 netdevice_tracker *dev_tracker, 3436 struct inet6_dev **idev) 3437{ 3438 const struct in6_addr *gw_addr = &cfg->fc_gateway; 3439 struct net_device *dev = _dev ? *_dev : NULL; 3440 int flags = RT6_LOOKUP_F_IFACE; 3441 struct fib6_result res = {}; 3442 int err = -EHOSTUNREACH; 3443 3444 if (cfg->fc_table) { 3445 err = ip6_nh_lookup_table(net, cfg, gw_addr, 3446 cfg->fc_table, flags, &res); 3447 /* gw_addr can not require a gateway or resolve to a reject 3448 * route. If a device is given, it must match the result. 3449 */ 3450 if (err || res.fib6_flags & RTF_REJECT || 3451 res.nh->fib_nh_gw_family || 3452 (dev && dev != res.nh->fib_nh_dev)) 3453 err = -EHOSTUNREACH; 3454 } 3455 3456 if (err < 0) { 3457 struct flowi6 fl6 = { 3458 .flowi6_oif = cfg->fc_ifindex, 3459 .daddr = *gw_addr, 3460 }; 3461 3462 err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags); 3463 if (err || res.fib6_flags & RTF_REJECT || 3464 res.nh->fib_nh_gw_family) 3465 err = -EHOSTUNREACH; 3466 3467 if (err) 3468 return err; 3469 3470 fib6_select_path(net, &res, &fl6, cfg->fc_ifindex, 3471 cfg->fc_ifindex != 0, NULL, flags); 3472 } 3473 3474 err = 0; 3475 if (dev) { 3476 if (dev != res.nh->fib_nh_dev) 3477 err = -EHOSTUNREACH; 3478 } else { 3479 *_dev = dev = res.nh->fib_nh_dev; 3480 netdev_hold(dev, dev_tracker, GFP_ATOMIC); 3481 *idev = in6_dev_get(dev); 3482 } 3483 3484 return err; 3485} 3486 3487static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, 3488 struct net_device **_dev, 3489 netdevice_tracker *dev_tracker, 3490 struct inet6_dev **idev, 3491 struct netlink_ext_ack *extack) 3492{ 3493 const struct in6_addr *gw_addr = &cfg->fc_gateway; 3494 int gwa_type = ipv6_addr_type(gw_addr); 3495 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true; 3496 const struct net_device *dev = *_dev; 3497 bool need_addr_check = !dev; 3498 int err = -EINVAL; 3499 3500 /* if gw_addr is local we will fail to detect this in case 3501 * address is still TENTATIVE (DAD in progress). rt6_lookup() 3502 * will return already-added prefix route via interface that 3503 * prefix route was assigned to, which might be non-loopback. 3504 */ 3505 if (dev && 3506 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { 3507 NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); 3508 goto out; 3509 } 3510 3511 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) { 3512 /* IPv6 strictly inhibits using not link-local 3513 * addresses as nexthop address. 3514 * Otherwise, router will not able to send redirects. 3515 * It is very good, but in some (rare!) circumstances 3516 * (SIT, PtP, NBMA NOARP links) it is handy to allow 3517 * some exceptions. --ANK 3518 * We allow IPv4-mapped nexthops to support RFC4798-type 3519 * addressing 3520 */ 3521 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) { 3522 NL_SET_ERR_MSG(extack, "Invalid gateway address"); 3523 goto out; 3524 } 3525 3526 rcu_read_lock(); 3527 3528 if (cfg->fc_flags & RTNH_F_ONLINK) 3529 err = ip6_route_check_nh_onlink(net, cfg, dev, extack); 3530 else 3531 err = ip6_route_check_nh(net, cfg, _dev, dev_tracker, 3532 idev); 3533 3534 rcu_read_unlock(); 3535 3536 if (err) 3537 goto out; 3538 } 3539 3540 /* reload in case device was changed */ 3541 dev = *_dev; 3542 3543 err = -EINVAL; 3544 if (!dev) { 3545 NL_SET_ERR_MSG(extack, "Egress device not specified"); 3546 goto out; 3547 } else if (dev->flags & IFF_LOOPBACK) { 3548 NL_SET_ERR_MSG(extack, 3549 "Egress device can not be loopback device for this route"); 3550 goto out; 3551 } 3552 3553 /* if we did not check gw_addr above, do so now that the 3554 * egress device has been resolved. 3555 */ 3556 if (need_addr_check && 3557 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { 3558 NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); 3559 goto out; 3560 } 3561 3562 err = 0; 3563out: 3564 return err; 3565} 3566 3567static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type) 3568{ 3569 if ((flags & RTF_REJECT) || 3570 (dev && (dev->flags & IFF_LOOPBACK) && 3571 !(addr_type & IPV6_ADDR_LOOPBACK) && 3572 !(flags & (RTF_ANYCAST | RTF_LOCAL)))) 3573 return true; 3574 3575 return false; 3576} 3577 3578int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, 3579 struct fib6_config *cfg, gfp_t gfp_flags, 3580 struct netlink_ext_ack *extack) 3581{ 3582 netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker; 3583 struct net_device *dev = NULL; 3584 struct inet6_dev *idev = NULL; 3585 int addr_type; 3586 int err; 3587 3588 fib6_nh->fib_nh_family = AF_INET6; 3589#ifdef CONFIG_IPV6_ROUTER_PREF 3590 fib6_nh->last_probe = jiffies; 3591#endif 3592 if (cfg->fc_is_fdb) { 3593 fib6_nh->fib_nh_gw6 = cfg->fc_gateway; 3594 fib6_nh->fib_nh_gw_family = AF_INET6; 3595 return 0; 3596 } 3597 3598 err = -ENODEV; 3599 if (cfg->fc_ifindex) { 3600 dev = netdev_get_by_index(net, cfg->fc_ifindex, 3601 dev_tracker, gfp_flags); 3602 if (!dev) 3603 goto out; 3604 idev = in6_dev_get(dev); 3605 if (!idev) 3606 goto out; 3607 } 3608 3609 if (cfg->fc_flags & RTNH_F_ONLINK) { 3610 if (!dev) { 3611 NL_SET_ERR_MSG(extack, 3612 "Nexthop device required for onlink"); 3613 goto out; 3614 } 3615 3616 if (!(dev->flags & IFF_UP)) { 3617 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 3618 err = -ENETDOWN; 3619 goto out; 3620 } 3621 3622 fib6_nh->fib_nh_flags |= RTNH_F_ONLINK; 3623 } 3624 3625 fib6_nh->fib_nh_weight = 1; 3626 3627 /* We cannot add true routes via loopback here, 3628 * they would result in kernel looping; promote them to reject routes 3629 */ 3630 addr_type = ipv6_addr_type(&cfg->fc_dst); 3631 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) { 3632 /* hold loopback dev/idev if we haven't done so. */ 3633 if (dev != net->loopback_dev) { 3634 if (dev) { 3635 netdev_put(dev, dev_tracker); 3636 in6_dev_put(idev); 3637 } 3638 dev = net->loopback_dev; 3639 netdev_hold(dev, dev_tracker, gfp_flags); 3640 idev = in6_dev_get(dev); 3641 if (!idev) { 3642 err = -ENODEV; 3643 goto out; 3644 } 3645 } 3646 goto pcpu_alloc; 3647 } 3648 3649 if (cfg->fc_flags & RTF_GATEWAY) { 3650 err = ip6_validate_gw(net, cfg, &dev, dev_tracker, 3651 &idev, extack); 3652 if (err) 3653 goto out; 3654 3655 fib6_nh->fib_nh_gw6 = cfg->fc_gateway; 3656 fib6_nh->fib_nh_gw_family = AF_INET6; 3657 } 3658 3659 err = -ENODEV; 3660 if (!dev) 3661 goto out; 3662 3663 if (!idev || idev->cnf.disable_ipv6) { 3664 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); 3665 err = -EACCES; 3666 goto out; 3667 } 3668 3669 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) { 3670 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 3671 err = -ENETDOWN; 3672 goto out; 3673 } 3674 3675 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) && 3676 !netif_carrier_ok(dev)) 3677 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; 3678 3679 err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap, 3680 cfg->fc_encap_type, cfg, gfp_flags, extack); 3681 if (err) 3682 goto out; 3683 3684pcpu_alloc: 3685 fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); 3686 if (!fib6_nh->rt6i_pcpu) { 3687 err = -ENOMEM; 3688 goto out; 3689 } 3690 3691 fib6_nh->fib_nh_dev = dev; 3692 fib6_nh->fib_nh_oif = dev->ifindex; 3693 err = 0; 3694out: 3695 if (idev) 3696 in6_dev_put(idev); 3697 3698 if (err) { 3699 fib_nh_common_release(&fib6_nh->nh_common); 3700 fib6_nh->nh_common.nhc_pcpu_rth_output = NULL; 3701 fib6_nh->fib_nh_lws = NULL; 3702 netdev_put(dev, dev_tracker); 3703 } 3704 3705 return err; 3706} 3707 3708void fib6_nh_release(struct fib6_nh *fib6_nh) 3709{ 3710 struct rt6_exception_bucket *bucket; 3711 3712 rcu_read_lock(); 3713 3714 fib6_nh_flush_exceptions(fib6_nh, NULL); 3715 bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL); 3716 if (bucket) { 3717 rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL); 3718 kfree(bucket); 3719 } 3720 3721 rcu_read_unlock(); 3722 3723 fib6_nh_release_dsts(fib6_nh); 3724 free_percpu(fib6_nh->rt6i_pcpu); 3725 3726 fib_nh_common_release(&fib6_nh->nh_common); 3727} 3728 3729void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) 3730{ 3731 int cpu; 3732 3733 if (!fib6_nh->rt6i_pcpu) 3734 return; 3735 3736 for_each_possible_cpu(cpu) { 3737 struct rt6_info *pcpu_rt, **ppcpu_rt; 3738 3739 ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); 3740 pcpu_rt = xchg(ppcpu_rt, NULL); 3741 if (pcpu_rt) { 3742 dst_dev_put(&pcpu_rt->dst); 3743 dst_release(&pcpu_rt->dst); 3744 } 3745 } 3746} 3747 3748static int fib6_config_validate(struct fib6_config *cfg, 3749 struct netlink_ext_ack *extack) 3750{ 3751 /* RTF_PCPU is an internal flag; can not be set by userspace */ 3752 if (cfg->fc_flags & RTF_PCPU) { 3753 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); 3754 goto errout; 3755 } 3756 3757 /* RTF_CACHE is an internal flag; can not be set by userspace */ 3758 if (cfg->fc_flags & RTF_CACHE) { 3759 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); 3760 goto errout; 3761 } 3762 3763 if (cfg->fc_type > RTN_MAX) { 3764 NL_SET_ERR_MSG(extack, "Invalid route type"); 3765 goto errout; 3766 } 3767 3768 if (cfg->fc_dst_len > 128) { 3769 NL_SET_ERR_MSG(extack, "Invalid prefix length"); 3770 goto errout; 3771 } 3772 3773#ifdef CONFIG_IPV6_SUBTREES 3774 if (cfg->fc_src_len > 128) { 3775 NL_SET_ERR_MSG(extack, "Invalid source address length"); 3776 goto errout; 3777 } 3778 3779 if (cfg->fc_nh_id && cfg->fc_src_len) { 3780 NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); 3781 goto errout; 3782 } 3783#else 3784 if (cfg->fc_src_len) { 3785 NL_SET_ERR_MSG(extack, 3786 "Specifying source address requires IPV6_SUBTREES to be enabled"); 3787 goto errout; 3788 } 3789#endif 3790 return 0; 3791errout: 3792 return -EINVAL; 3793} 3794 3795static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, 3796 gfp_t gfp_flags, 3797 struct netlink_ext_ack *extack) 3798{ 3799 struct net *net = cfg->fc_nlinfo.nl_net; 3800 struct fib6_table *table; 3801 struct fib6_info *rt; 3802 int err; 3803 3804 if (cfg->fc_nlinfo.nlh && 3805 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 3806 table = fib6_get_table(net, cfg->fc_table); 3807 if (!table) { 3808 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 3809 table = fib6_new_table(net, cfg->fc_table); 3810 } 3811 } else { 3812 table = fib6_new_table(net, cfg->fc_table); 3813 } 3814 if (!table) { 3815 err = -ENOBUFS; 3816 goto err; 3817 } 3818 3819 rt = fib6_info_alloc(gfp_flags, !cfg->fc_nh_id); 3820 if (!rt) { 3821 err = -ENOMEM; 3822 goto err; 3823 } 3824 3825 rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, 3826 extack); 3827 if (IS_ERR(rt->fib6_metrics)) { 3828 err = PTR_ERR(rt->fib6_metrics); 3829 goto free; 3830 } 3831 3832 if (cfg->fc_flags & RTF_ADDRCONF) 3833 rt->dst_nocount = true; 3834 3835 if (cfg->fc_flags & RTF_EXPIRES) 3836 fib6_set_expires(rt, jiffies + 3837 clock_t_to_jiffies(cfg->fc_expires)); 3838 3839 if (cfg->fc_protocol == RTPROT_UNSPEC) 3840 cfg->fc_protocol = RTPROT_BOOT; 3841 3842 rt->fib6_protocol = cfg->fc_protocol; 3843 rt->fib6_table = table; 3844 rt->fib6_metric = cfg->fc_metric; 3845 rt->fib6_type = cfg->fc_type ? : RTN_UNICAST; 3846 rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY; 3847 3848 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 3849 rt->fib6_dst.plen = cfg->fc_dst_len; 3850 3851#ifdef CONFIG_IPV6_SUBTREES 3852 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); 3853 rt->fib6_src.plen = cfg->fc_src_len; 3854#endif 3855 return rt; 3856free: 3857 kfree(rt); 3858err: 3859 return ERR_PTR(err); 3860} 3861 3862static int ip6_route_info_create_nh(struct fib6_info *rt, 3863 struct fib6_config *cfg, 3864 gfp_t gfp_flags, 3865 struct netlink_ext_ack *extack) 3866{ 3867 struct net *net = cfg->fc_nlinfo.nl_net; 3868 struct fib6_nh *fib6_nh; 3869 int err; 3870 3871 if (cfg->fc_nh_id) { 3872 struct nexthop *nh; 3873 3874 rcu_read_lock(); 3875 3876 nh = nexthop_find_by_id(net, cfg->fc_nh_id); 3877 if (!nh) { 3878 err = -EINVAL; 3879 NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); 3880 goto out_free; 3881 } 3882 3883 err = fib6_check_nexthop(nh, cfg, extack); 3884 if (err) 3885 goto out_free; 3886 3887 if (!nexthop_get(nh)) { 3888 NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); 3889 err = -ENOENT; 3890 goto out_free; 3891 } 3892 3893 rt->nh = nh; 3894 fib6_nh = nexthop_fib6_nh(rt->nh); 3895 3896 rcu_read_unlock(); 3897 } else { 3898 int addr_type; 3899 3900 err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack); 3901 if (err) 3902 goto out_release; 3903 3904 fib6_nh = rt->fib6_nh; 3905 3906 /* We cannot add true routes via loopback here, they would 3907 * result in kernel looping; promote them to reject routes 3908 */ 3909 addr_type = ipv6_addr_type(&cfg->fc_dst); 3910 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, 3911 addr_type)) 3912 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP; 3913 } 3914 3915 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 3916 struct net_device *dev = fib6_nh->fib_nh_dev; 3917 3918 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 3919 NL_SET_ERR_MSG(extack, "Invalid source address"); 3920 err = -EINVAL; 3921 goto out_release; 3922 } 3923 rt->fib6_prefsrc.addr = cfg->fc_prefsrc; 3924 rt->fib6_prefsrc.plen = 128; 3925 } 3926 3927 return 0; 3928out_release: 3929 fib6_info_release(rt); 3930 return err; 3931out_free: 3932 rcu_read_unlock(); 3933 ip_fib_metrics_put(rt->fib6_metrics); 3934 kfree(rt); 3935 return err; 3936} 3937 3938int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, 3939 struct netlink_ext_ack *extack) 3940{ 3941 struct fib6_info *rt; 3942 int err; 3943 3944 err = fib6_config_validate(cfg, extack); 3945 if (err) 3946 return err; 3947 3948 rt = ip6_route_info_create(cfg, gfp_flags, extack); 3949 if (IS_ERR(rt)) 3950 return PTR_ERR(rt); 3951 3952 err = ip6_route_info_create_nh(rt, cfg, gfp_flags, extack); 3953 if (err) 3954 return err; 3955 3956 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack); 3957 fib6_info_release(rt); 3958 3959 return err; 3960} 3961 3962static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info) 3963{ 3964 struct net *net = info->nl_net; 3965 struct fib6_table *table; 3966 int err; 3967 3968 if (rt == net->ipv6.fib6_null_entry) { 3969 err = -ENOENT; 3970 goto out; 3971 } 3972 3973 table = rt->fib6_table; 3974 spin_lock_bh(&table->tb6_lock); 3975 err = fib6_del(rt, info); 3976 spin_unlock_bh(&table->tb6_lock); 3977 3978out: 3979 fib6_info_release(rt); 3980 return err; 3981} 3982 3983int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify) 3984{ 3985 struct nl_info info = { 3986 .nl_net = net, 3987 .skip_notify = skip_notify 3988 }; 3989 3990 return __ip6_del_rt(rt, &info); 3991} 3992 3993static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) 3994{ 3995 struct nl_info *info = &cfg->fc_nlinfo; 3996 struct net *net = info->nl_net; 3997 struct sk_buff *skb = NULL; 3998 struct fib6_table *table; 3999 int err = -ENOENT; 4000 4001 if (rt == net->ipv6.fib6_null_entry) 4002 goto out_put; 4003 table = rt->fib6_table; 4004 spin_lock_bh(&table->tb6_lock); 4005 4006 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) { 4007 struct fib6_info *sibling, *next_sibling; 4008 struct fib6_node *fn; 4009 4010 /* prefer to send a single notification with all hops */ 4011 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 4012 if (skb) { 4013 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 4014 4015 if (rt6_fill_node(net, skb, rt, NULL, 4016 NULL, NULL, 0, RTM_DELROUTE, 4017 info->portid, seq, 0) < 0) { 4018 kfree_skb(skb); 4019 skb = NULL; 4020 } else 4021 info->skip_notify = 1; 4022 } 4023 4024 /* 'rt' points to the first sibling route. If it is not the 4025 * leaf, then we do not need to send a notification. Otherwise, 4026 * we need to check if the last sibling has a next route or not 4027 * and emit a replace or delete notification, respectively. 4028 */ 4029 info->skip_notify_kernel = 1; 4030 fn = rcu_dereference_protected(rt->fib6_node, 4031 lockdep_is_held(&table->tb6_lock)); 4032 if (rcu_access_pointer(fn->leaf) == rt) { 4033 struct fib6_info *last_sibling, *replace_rt; 4034 4035 last_sibling = list_last_entry(&rt->fib6_siblings, 4036 struct fib6_info, 4037 fib6_siblings); 4038 replace_rt = rcu_dereference_protected( 4039 last_sibling->fib6_next, 4040 lockdep_is_held(&table->tb6_lock)); 4041 if (replace_rt) 4042 call_fib6_entry_notifiers_replace(net, 4043 replace_rt); 4044 else 4045 call_fib6_multipath_entry_notifiers(net, 4046 FIB_EVENT_ENTRY_DEL, 4047 rt, rt->fib6_nsiblings, 4048 NULL); 4049 } 4050 list_for_each_entry_safe(sibling, next_sibling, 4051 &rt->fib6_siblings, 4052 fib6_siblings) { 4053 err = fib6_del(sibling, info); 4054 if (err) 4055 goto out_unlock; 4056 } 4057 } 4058 4059 err = fib6_del(rt, info); 4060out_unlock: 4061 spin_unlock_bh(&table->tb6_lock); 4062out_put: 4063 fib6_info_release(rt); 4064 4065 if (skb) { 4066 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 4067 info->nlh, gfp_any()); 4068 } 4069 return err; 4070} 4071 4072static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) 4073{ 4074 int rc = -ESRCH; 4075 4076 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex) 4077 goto out; 4078 4079 if (cfg->fc_flags & RTF_GATEWAY && 4080 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 4081 goto out; 4082 4083 rc = rt6_remove_exception_rt(rt); 4084out: 4085 return rc; 4086} 4087 4088static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt, 4089 struct fib6_nh *nh) 4090{ 4091 struct fib6_result res = { 4092 .f6i = rt, 4093 .nh = nh, 4094 }; 4095 struct rt6_info *rt_cache; 4096 4097 rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src); 4098 if (rt_cache) 4099 return __ip6_del_cached_rt(rt_cache, cfg); 4100 4101 return 0; 4102} 4103 4104struct fib6_nh_del_cached_rt_arg { 4105 struct fib6_config *cfg; 4106 struct fib6_info *f6i; 4107}; 4108 4109static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg) 4110{ 4111 struct fib6_nh_del_cached_rt_arg *arg = _arg; 4112 int rc; 4113 4114 rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh); 4115 return rc != -ESRCH ? rc : 0; 4116} 4117 4118static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i) 4119{ 4120 struct fib6_nh_del_cached_rt_arg arg = { 4121 .cfg = cfg, 4122 .f6i = f6i 4123 }; 4124 4125 return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg); 4126} 4127 4128static int ip6_route_del(struct fib6_config *cfg, 4129 struct netlink_ext_ack *extack) 4130{ 4131 struct fib6_table *table; 4132 struct fib6_info *rt; 4133 struct fib6_node *fn; 4134 int err = -ESRCH; 4135 4136 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 4137 if (!table) { 4138 NL_SET_ERR_MSG(extack, "FIB table does not exist"); 4139 return err; 4140 } 4141 4142 rcu_read_lock(); 4143 4144 fn = fib6_locate(&table->tb6_root, 4145 &cfg->fc_dst, cfg->fc_dst_len, 4146 &cfg->fc_src, cfg->fc_src_len, 4147 !(cfg->fc_flags & RTF_CACHE)); 4148 4149 if (fn) { 4150 for_each_fib6_node_rt_rcu(fn) { 4151 struct fib6_nh *nh; 4152 4153 if (rt->nh && cfg->fc_nh_id && 4154 rt->nh->id != cfg->fc_nh_id) 4155 continue; 4156 4157 if (cfg->fc_flags & RTF_CACHE) { 4158 int rc = 0; 4159 4160 if (rt->nh) { 4161 rc = ip6_del_cached_rt_nh(cfg, rt); 4162 } else if (cfg->fc_nh_id) { 4163 continue; 4164 } else { 4165 nh = rt->fib6_nh; 4166 rc = ip6_del_cached_rt(cfg, rt, nh); 4167 } 4168 if (rc != -ESRCH) { 4169 rcu_read_unlock(); 4170 return rc; 4171 } 4172 continue; 4173 } 4174 4175 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) 4176 continue; 4177 if (cfg->fc_protocol && 4178 cfg->fc_protocol != rt->fib6_protocol) 4179 continue; 4180 4181 if (rt->nh) { 4182 if (!fib6_info_hold_safe(rt)) 4183 continue; 4184 4185 err = __ip6_del_rt(rt, &cfg->fc_nlinfo); 4186 break; 4187 } 4188 if (cfg->fc_nh_id) 4189 continue; 4190 4191 nh = rt->fib6_nh; 4192 if (cfg->fc_ifindex && 4193 (!nh->fib_nh_dev || 4194 nh->fib_nh_dev->ifindex != cfg->fc_ifindex)) 4195 continue; 4196 if (cfg->fc_flags & RTF_GATEWAY && 4197 !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6)) 4198 continue; 4199 if (!fib6_info_hold_safe(rt)) 4200 continue; 4201 4202 /* if gateway was specified only delete the one hop */ 4203 if (cfg->fc_flags & RTF_GATEWAY) 4204 err = __ip6_del_rt(rt, &cfg->fc_nlinfo); 4205 else 4206 err = __ip6_del_rt_siblings(rt, cfg); 4207 break; 4208 } 4209 } 4210 rcu_read_unlock(); 4211 4212 return err; 4213} 4214 4215static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 4216{ 4217 struct netevent_redirect netevent; 4218 struct rt6_info *rt, *nrt = NULL; 4219 struct fib6_result res = {}; 4220 struct ndisc_options ndopts; 4221 struct inet6_dev *in6_dev; 4222 struct neighbour *neigh; 4223 struct rd_msg *msg; 4224 int optlen, on_link; 4225 u8 *lladdr; 4226 4227 optlen = skb_tail_pointer(skb) - skb_transport_header(skb); 4228 optlen -= sizeof(*msg); 4229 4230 if (optlen < 0) { 4231 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 4232 return; 4233 } 4234 4235 msg = (struct rd_msg *)icmp6_hdr(skb); 4236 4237 if (ipv6_addr_is_multicast(&msg->dest)) { 4238 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 4239 return; 4240 } 4241 4242 on_link = 0; 4243 if (ipv6_addr_equal(&msg->dest, &msg->target)) { 4244 on_link = 1; 4245 } else if (ipv6_addr_type(&msg->target) != 4246 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 4247 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 4248 return; 4249 } 4250 4251 in6_dev = __in6_dev_get(skb->dev); 4252 if (!in6_dev) 4253 return; 4254 if (READ_ONCE(in6_dev->cnf.forwarding) || 4255 !READ_ONCE(in6_dev->cnf.accept_redirects)) 4256 return; 4257 4258 /* RFC2461 8.1: 4259 * The IP source address of the Redirect MUST be the same as the current 4260 * first-hop router for the specified ICMP Destination Address. 4261 */ 4262 4263 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) { 4264 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 4265 return; 4266 } 4267 4268 lladdr = NULL; 4269 if (ndopts.nd_opts_tgt_lladdr) { 4270 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 4271 skb->dev); 4272 if (!lladdr) { 4273 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 4274 return; 4275 } 4276 } 4277 4278 rt = dst_rt6_info(dst); 4279 if (rt->rt6i_flags & RTF_REJECT) { 4280 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 4281 return; 4282 } 4283 4284 /* Redirect received -> path was valid. 4285 * Look, redirects are sent only in response to data packets, 4286 * so that this nexthop apparently is reachable. --ANK 4287 */ 4288 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr); 4289 4290 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 4291 if (!neigh) 4292 return; 4293 4294 /* 4295 * We have finally decided to accept it. 4296 */ 4297 4298 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, 4299 NEIGH_UPDATE_F_WEAK_OVERRIDE| 4300 NEIGH_UPDATE_F_OVERRIDE| 4301 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 4302 NEIGH_UPDATE_F_ISROUTER)), 4303 NDISC_REDIRECT, &ndopts); 4304 4305 rcu_read_lock(); 4306 res.f6i = rcu_dereference(rt->from); 4307 if (!res.f6i) 4308 goto out; 4309 4310 if (res.f6i->nh) { 4311 struct fib6_nh_match_arg arg = { 4312 .dev = dst_dev_rcu(dst), 4313 .gw = &rt->rt6i_gateway, 4314 }; 4315 4316 nexthop_for_each_fib6_nh(res.f6i->nh, 4317 fib6_nh_find_match, &arg); 4318 4319 /* fib6_info uses a nexthop that does not have fib6_nh 4320 * using the dst->dev. Should be impossible 4321 */ 4322 if (!arg.match) 4323 goto out; 4324 res.nh = arg.match; 4325 } else { 4326 res.nh = res.f6i->fib6_nh; 4327 } 4328 4329 res.fib6_flags = res.f6i->fib6_flags; 4330 res.fib6_type = res.f6i->fib6_type; 4331 nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL); 4332 if (!nrt) 4333 goto out; 4334 4335 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 4336 if (on_link) 4337 nrt->rt6i_flags &= ~RTF_GATEWAY; 4338 4339 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 4340 4341 /* rt6_insert_exception() will take care of duplicated exceptions */ 4342 if (rt6_insert_exception(nrt, &res)) { 4343 dst_release_immediate(&nrt->dst); 4344 goto out; 4345 } 4346 4347 netevent.old = &rt->dst; 4348 netevent.new = &nrt->dst; 4349 netevent.daddr = &msg->dest; 4350 netevent.neigh = neigh; 4351 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 4352 4353out: 4354 rcu_read_unlock(); 4355 neigh_release(neigh); 4356} 4357 4358#ifdef CONFIG_IPV6_ROUTE_INFO 4359static struct fib6_info *rt6_get_route_info(struct net *net, 4360 const struct in6_addr *prefix, int prefixlen, 4361 const struct in6_addr *gwaddr, 4362 struct net_device *dev) 4363{ 4364 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; 4365 int ifindex = dev->ifindex; 4366 struct fib6_node *fn; 4367 struct fib6_info *rt = NULL; 4368 struct fib6_table *table; 4369 4370 table = fib6_get_table(net, tb_id); 4371 if (!table) 4372 return NULL; 4373 4374 rcu_read_lock(); 4375 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true); 4376 if (!fn) 4377 goto out; 4378 4379 for_each_fib6_node_rt_rcu(fn) { 4380 /* these routes do not use nexthops */ 4381 if (rt->nh) 4382 continue; 4383 if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex) 4384 continue; 4385 if (!(rt->fib6_flags & RTF_ROUTEINFO) || 4386 !rt->fib6_nh->fib_nh_gw_family) 4387 continue; 4388 if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr)) 4389 continue; 4390 if (!fib6_info_hold_safe(rt)) 4391 continue; 4392 break; 4393 } 4394out: 4395 rcu_read_unlock(); 4396 return rt; 4397} 4398 4399static struct fib6_info *rt6_add_route_info(struct net *net, 4400 const struct in6_addr *prefix, int prefixlen, 4401 const struct in6_addr *gwaddr, 4402 struct net_device *dev, 4403 unsigned int pref) 4404{ 4405 struct fib6_config cfg = { 4406 .fc_metric = IP6_RT_PRIO_USER, 4407 .fc_ifindex = dev->ifindex, 4408 .fc_dst_len = prefixlen, 4409 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 4410 RTF_UP | RTF_PREF(pref), 4411 .fc_protocol = RTPROT_RA, 4412 .fc_type = RTN_UNICAST, 4413 .fc_nlinfo.portid = 0, 4414 .fc_nlinfo.nlh = NULL, 4415 .fc_nlinfo.nl_net = net, 4416 }; 4417 4418 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; 4419 cfg.fc_dst = *prefix; 4420 cfg.fc_gateway = *gwaddr; 4421 4422 /* We should treat it as a default route if prefix length is 0. */ 4423 if (!prefixlen) 4424 cfg.fc_flags |= RTF_DEFAULT; 4425 4426 ip6_route_add(&cfg, GFP_ATOMIC, NULL); 4427 4428 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); 4429} 4430#endif 4431 4432struct fib6_info *rt6_get_dflt_router(struct net *net, 4433 const struct in6_addr *addr, 4434 struct net_device *dev) 4435{ 4436 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; 4437 struct fib6_info *rt; 4438 struct fib6_table *table; 4439 4440 table = fib6_get_table(net, tb_id); 4441 if (!table) 4442 return NULL; 4443 4444 rcu_read_lock(); 4445 for_each_fib6_node_rt_rcu(&table->tb6_root) { 4446 struct fib6_nh *nh; 4447 4448 /* RA routes do not use nexthops */ 4449 if (rt->nh) 4450 continue; 4451 4452 nh = rt->fib6_nh; 4453 if (dev == nh->fib_nh_dev && 4454 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 4455 ipv6_addr_equal(&nh->fib_nh_gw6, addr)) 4456 break; 4457 } 4458 if (rt && !fib6_info_hold_safe(rt)) 4459 rt = NULL; 4460 rcu_read_unlock(); 4461 return rt; 4462} 4463 4464struct fib6_info *rt6_add_dflt_router(struct net *net, 4465 const struct in6_addr *gwaddr, 4466 struct net_device *dev, 4467 unsigned int pref, 4468 u32 defrtr_usr_metric, 4469 int lifetime) 4470{ 4471 struct fib6_config cfg = { 4472 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, 4473 .fc_metric = defrtr_usr_metric, 4474 .fc_ifindex = dev->ifindex, 4475 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 4476 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 4477 .fc_protocol = RTPROT_RA, 4478 .fc_type = RTN_UNICAST, 4479 .fc_nlinfo.portid = 0, 4480 .fc_nlinfo.nlh = NULL, 4481 .fc_nlinfo.nl_net = net, 4482 .fc_expires = jiffies_to_clock_t(lifetime * HZ), 4483 }; 4484 4485 cfg.fc_gateway = *gwaddr; 4486 4487 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) { 4488 struct fib6_table *table; 4489 4490 table = fib6_get_table(dev_net(dev), cfg.fc_table); 4491 if (table) 4492 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; 4493 } 4494 4495 return rt6_get_dflt_router(net, gwaddr, dev); 4496} 4497 4498static void __rt6_purge_dflt_routers(struct net *net, 4499 struct fib6_table *table) 4500{ 4501 struct fib6_info *rt; 4502 4503restart: 4504 rcu_read_lock(); 4505 for_each_fib6_node_rt_rcu(&table->tb6_root) { 4506 struct net_device *dev = fib6_info_nh_dev(rt); 4507 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL; 4508 4509 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 4510 (!idev || idev->cnf.accept_ra != 2) && 4511 fib6_info_hold_safe(rt)) { 4512 rcu_read_unlock(); 4513 ip6_del_rt(net, rt, false); 4514 goto restart; 4515 } 4516 } 4517 rcu_read_unlock(); 4518 4519 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; 4520} 4521 4522void rt6_purge_dflt_routers(struct net *net) 4523{ 4524 struct fib6_table *table; 4525 struct hlist_head *head; 4526 unsigned int h; 4527 4528 rcu_read_lock(); 4529 4530 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 4531 head = &net->ipv6.fib_table_hash[h]; 4532 hlist_for_each_entry_rcu(table, head, tb6_hlist) { 4533 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) 4534 __rt6_purge_dflt_routers(net, table); 4535 } 4536 } 4537 4538 rcu_read_unlock(); 4539} 4540 4541static void rtmsg_to_fib6_config(struct net *net, 4542 struct in6_rtmsg *rtmsg, 4543 struct fib6_config *cfg) 4544{ 4545 *cfg = (struct fib6_config){ 4546 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? 4547 : RT6_TABLE_MAIN, 4548 .fc_ifindex = rtmsg->rtmsg_ifindex, 4549 .fc_metric = rtmsg->rtmsg_metric, 4550 .fc_expires = rtmsg->rtmsg_info, 4551 .fc_dst_len = rtmsg->rtmsg_dst_len, 4552 .fc_src_len = rtmsg->rtmsg_src_len, 4553 .fc_flags = rtmsg->rtmsg_flags, 4554 .fc_type = rtmsg->rtmsg_type, 4555 4556 .fc_nlinfo.nl_net = net, 4557 4558 .fc_dst = rtmsg->rtmsg_dst, 4559 .fc_src = rtmsg->rtmsg_src, 4560 .fc_gateway = rtmsg->rtmsg_gateway, 4561 }; 4562} 4563 4564int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) 4565{ 4566 struct fib6_config cfg; 4567 int err; 4568 4569 if (cmd != SIOCADDRT && cmd != SIOCDELRT) 4570 return -EINVAL; 4571 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 4572 return -EPERM; 4573 4574 rtmsg_to_fib6_config(net, rtmsg, &cfg); 4575 4576 switch (cmd) { 4577 case SIOCADDRT: 4578 /* Only do the default setting of fc_metric in route adding */ 4579 if (cfg.fc_metric == 0) 4580 cfg.fc_metric = IP6_RT_PRIO_USER; 4581 err = ip6_route_add(&cfg, GFP_KERNEL, NULL); 4582 break; 4583 case SIOCDELRT: 4584 err = ip6_route_del(&cfg, NULL); 4585 break; 4586 } 4587 4588 return err; 4589} 4590 4591/* 4592 * Drop the packet on the floor 4593 */ 4594 4595static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 4596{ 4597 struct dst_entry *dst = skb_dst(skb); 4598 struct net_device *dev = dst_dev(dst); 4599 struct net *net = dev_net(dev); 4600 struct inet6_dev *idev; 4601 SKB_DR(reason); 4602 int type; 4603 4604 if (netif_is_l3_master(skb->dev) || 4605 dev == net->loopback_dev) 4606 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); 4607 else 4608 idev = ip6_dst_idev(dst); 4609 4610 switch (ipstats_mib_noroutes) { 4611 case IPSTATS_MIB_INNOROUTES: 4612 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 4613 if (type == IPV6_ADDR_ANY) { 4614 SKB_DR_SET(reason, IP_INADDRERRORS); 4615 IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 4616 break; 4617 } 4618 SKB_DR_SET(reason, IP_INNOROUTES); 4619 fallthrough; 4620 case IPSTATS_MIB_OUTNOROUTES: 4621 SKB_DR_OR(reason, IP_OUTNOROUTES); 4622 IP6_INC_STATS(net, idev, ipstats_mib_noroutes); 4623 break; 4624 } 4625 4626 /* Start over by dropping the dst for l3mdev case */ 4627 if (netif_is_l3_master(skb->dev)) 4628 skb_dst_drop(skb); 4629 4630 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 4631 kfree_skb_reason(skb, reason); 4632 return 0; 4633} 4634 4635static int ip6_pkt_discard(struct sk_buff *skb) 4636{ 4637 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 4638} 4639 4640static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) 4641{ 4642 skb->dev = skb_dst_dev(skb); 4643 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 4644} 4645 4646static int ip6_pkt_prohibit(struct sk_buff *skb) 4647{ 4648 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 4649} 4650 4651static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb) 4652{ 4653 skb->dev = skb_dst_dev(skb); 4654 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 4655} 4656 4657/* 4658 * Allocate a dst for local (unicast / anycast) address. 4659 */ 4660 4661struct fib6_info *addrconf_f6i_alloc(struct net *net, 4662 struct inet6_dev *idev, 4663 const struct in6_addr *addr, 4664 bool anycast, gfp_t gfp_flags, 4665 struct netlink_ext_ack *extack) 4666{ 4667 struct fib6_config cfg = { 4668 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL, 4669 .fc_ifindex = idev->dev->ifindex, 4670 .fc_flags = RTF_UP | RTF_NONEXTHOP, 4671 .fc_dst = *addr, 4672 .fc_dst_len = 128, 4673 .fc_protocol = RTPROT_KERNEL, 4674 .fc_nlinfo.nl_net = net, 4675 .fc_ignore_dev_down = true, 4676 }; 4677 struct fib6_info *f6i; 4678 int err; 4679 4680 if (anycast) { 4681 cfg.fc_type = RTN_ANYCAST; 4682 cfg.fc_flags |= RTF_ANYCAST; 4683 } else { 4684 cfg.fc_type = RTN_LOCAL; 4685 cfg.fc_flags |= RTF_LOCAL; 4686 } 4687 4688 f6i = ip6_route_info_create(&cfg, gfp_flags, extack); 4689 if (IS_ERR(f6i)) 4690 return f6i; 4691 4692 err = ip6_route_info_create_nh(f6i, &cfg, gfp_flags, extack); 4693 if (err) 4694 return ERR_PTR(err); 4695 4696 f6i->dst_nocount = true; 4697 4698 if (!anycast && 4699 (READ_ONCE(net->ipv6.devconf_all->disable_policy) || 4700 READ_ONCE(idev->cnf.disable_policy))) 4701 f6i->dst_nopolicy = true; 4702 4703 return f6i; 4704} 4705 4706/* remove deleted ip from prefsrc entries */ 4707struct arg_dev_net_ip { 4708 struct net *net; 4709 struct in6_addr *addr; 4710}; 4711 4712static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) 4713{ 4714 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 4715 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 4716 4717 if (!rt->nh && 4718 rt != net->ipv6.fib6_null_entry && 4719 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) && 4720 !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) { 4721 spin_lock_bh(&rt6_exception_lock); 4722 /* remove prefsrc entry */ 4723 rt->fib6_prefsrc.plen = 0; 4724 spin_unlock_bh(&rt6_exception_lock); 4725 } 4726 return 0; 4727} 4728 4729void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 4730{ 4731 struct net *net = dev_net(ifp->idev->dev); 4732 struct arg_dev_net_ip adni = { 4733 .net = net, 4734 .addr = &ifp->addr, 4735 }; 4736 fib6_clean_all(net, fib6_remove_prefsrc, &adni); 4737} 4738 4739#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT) 4740 4741/* Remove routers and update dst entries when gateway turn into host. */ 4742static int fib6_clean_tohost(struct fib6_info *rt, void *arg) 4743{ 4744 struct in6_addr *gateway = (struct in6_addr *)arg; 4745 struct fib6_nh *nh; 4746 4747 /* RA routes do not use nexthops */ 4748 if (rt->nh) 4749 return 0; 4750 4751 nh = rt->fib6_nh; 4752 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && 4753 nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6)) 4754 return -1; 4755 4756 /* Further clean up cached routes in exception table. 4757 * This is needed because cached route may have a different 4758 * gateway than its 'parent' in the case of an ip redirect. 4759 */ 4760 fib6_nh_exceptions_clean_tohost(nh, gateway); 4761 4762 return 0; 4763} 4764 4765void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) 4766{ 4767 fib6_clean_all(net, fib6_clean_tohost, gateway); 4768} 4769 4770struct arg_netdev_event { 4771 const struct net_device *dev; 4772 union { 4773 unsigned char nh_flags; 4774 unsigned long event; 4775 }; 4776}; 4777 4778static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) 4779{ 4780 struct fib6_info *iter; 4781 struct fib6_node *fn; 4782 4783 fn = rcu_dereference_protected(rt->fib6_node, 4784 lockdep_is_held(&rt->fib6_table->tb6_lock)); 4785 iter = rcu_dereference_protected(fn->leaf, 4786 lockdep_is_held(&rt->fib6_table->tb6_lock)); 4787 while (iter) { 4788 if (iter->fib6_metric == rt->fib6_metric && 4789 rt6_qualify_for_ecmp(iter)) 4790 return iter; 4791 iter = rcu_dereference_protected(iter->fib6_next, 4792 lockdep_is_held(&rt->fib6_table->tb6_lock)); 4793 } 4794 4795 return NULL; 4796} 4797 4798/* only called for fib entries with builtin fib6_nh */ 4799static bool rt6_is_dead(const struct fib6_info *rt) 4800{ 4801 if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD || 4802 (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN && 4803 ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev))) 4804 return true; 4805 4806 return false; 4807} 4808 4809static int rt6_multipath_total_weight(const struct fib6_info *rt) 4810{ 4811 struct fib6_info *iter; 4812 int total = 0; 4813 4814 if (!rt6_is_dead(rt)) 4815 total += rt->fib6_nh->fib_nh_weight; 4816 4817 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { 4818 if (!rt6_is_dead(iter)) 4819 total += iter->fib6_nh->fib_nh_weight; 4820 } 4821 4822 return total; 4823} 4824 4825static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total) 4826{ 4827 int upper_bound = -1; 4828 4829 if (!rt6_is_dead(rt)) { 4830 *weight += rt->fib6_nh->fib_nh_weight; 4831 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, 4832 total) - 1; 4833 } 4834 atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound); 4835} 4836 4837static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) 4838{ 4839 struct fib6_info *iter; 4840 int weight = 0; 4841 4842 rt6_upper_bound_set(rt, &weight, total); 4843 4844 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 4845 rt6_upper_bound_set(iter, &weight, total); 4846} 4847 4848void rt6_multipath_rebalance(struct fib6_info *rt) 4849{ 4850 struct fib6_info *first; 4851 int total; 4852 4853 /* In case the entire multipath route was marked for flushing, 4854 * then there is no need to rebalance upon the removal of every 4855 * sibling route. 4856 */ 4857 if (!rt->fib6_nsiblings || rt->should_flush) 4858 return; 4859 4860 /* During lookup routes are evaluated in order, so we need to 4861 * make sure upper bounds are assigned from the first sibling 4862 * onwards. 4863 */ 4864 first = rt6_multipath_first_sibling(rt); 4865 if (WARN_ON_ONCE(!first)) 4866 return; 4867 4868 total = rt6_multipath_total_weight(first); 4869 rt6_multipath_upper_bound_set(first, total); 4870} 4871 4872static int fib6_ifup(struct fib6_info *rt, void *p_arg) 4873{ 4874 const struct arg_netdev_event *arg = p_arg; 4875 struct net *net = dev_net(arg->dev); 4876 4877 if (rt != net->ipv6.fib6_null_entry && !rt->nh && 4878 rt->fib6_nh->fib_nh_dev == arg->dev) { 4879 rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags; 4880 fib6_update_sernum_upto_root(net, rt); 4881 rt6_multipath_rebalance(rt); 4882 } 4883 4884 return 0; 4885} 4886 4887void rt6_sync_up(struct net_device *dev, unsigned char nh_flags) 4888{ 4889 struct arg_netdev_event arg = { 4890 .dev = dev, 4891 { 4892 .nh_flags = nh_flags, 4893 }, 4894 }; 4895 4896 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev)) 4897 arg.nh_flags |= RTNH_F_LINKDOWN; 4898 4899 fib6_clean_all(dev_net(dev), fib6_ifup, &arg); 4900} 4901 4902/* only called for fib entries with inline fib6_nh */ 4903static bool rt6_multipath_uses_dev(const struct fib6_info *rt, 4904 const struct net_device *dev) 4905{ 4906 struct fib6_info *iter; 4907 4908 if (rt->fib6_nh->fib_nh_dev == dev) 4909 return true; 4910 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 4911 if (iter->fib6_nh->fib_nh_dev == dev) 4912 return true; 4913 4914 return false; 4915} 4916 4917static void rt6_multipath_flush(struct fib6_info *rt) 4918{ 4919 struct fib6_info *iter; 4920 4921 rt->should_flush = 1; 4922 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 4923 iter->should_flush = 1; 4924} 4925 4926static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt, 4927 const struct net_device *down_dev) 4928{ 4929 struct fib6_info *iter; 4930 unsigned int dead = 0; 4931 4932 if (rt->fib6_nh->fib_nh_dev == down_dev || 4933 rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD) 4934 dead++; 4935 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 4936 if (iter->fib6_nh->fib_nh_dev == down_dev || 4937 iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD) 4938 dead++; 4939 4940 return dead; 4941} 4942 4943static void rt6_multipath_nh_flags_set(struct fib6_info *rt, 4944 const struct net_device *dev, 4945 unsigned char nh_flags) 4946{ 4947 struct fib6_info *iter; 4948 4949 if (rt->fib6_nh->fib_nh_dev == dev) 4950 rt->fib6_nh->fib_nh_flags |= nh_flags; 4951 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 4952 if (iter->fib6_nh->fib_nh_dev == dev) 4953 iter->fib6_nh->fib_nh_flags |= nh_flags; 4954} 4955 4956/* called with write lock held for table with rt */ 4957static int fib6_ifdown(struct fib6_info *rt, void *p_arg) 4958{ 4959 const struct arg_netdev_event *arg = p_arg; 4960 const struct net_device *dev = arg->dev; 4961 struct net *net = dev_net(dev); 4962 4963 if (rt == net->ipv6.fib6_null_entry || rt->nh) 4964 return 0; 4965 4966 switch (arg->event) { 4967 case NETDEV_UNREGISTER: 4968 return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0; 4969 case NETDEV_DOWN: 4970 if (rt->should_flush) 4971 return -1; 4972 if (!rt->fib6_nsiblings) 4973 return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0; 4974 if (rt6_multipath_uses_dev(rt, dev)) { 4975 unsigned int count; 4976 4977 count = rt6_multipath_dead_count(rt, dev); 4978 if (rt->fib6_nsiblings + 1 == count) { 4979 rt6_multipath_flush(rt); 4980 return -1; 4981 } 4982 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | 4983 RTNH_F_LINKDOWN); 4984 fib6_update_sernum(net, rt); 4985 rt6_multipath_rebalance(rt); 4986 } 4987 return -2; 4988 case NETDEV_CHANGE: 4989 if (rt->fib6_nh->fib_nh_dev != dev || 4990 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) 4991 break; 4992 rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; 4993 rt6_multipath_rebalance(rt); 4994 break; 4995 } 4996 4997 return 0; 4998} 4999 5000void rt6_sync_down_dev(struct net_device *dev, unsigned long event) 5001{ 5002 struct arg_netdev_event arg = { 5003 .dev = dev, 5004 { 5005 .event = event, 5006 }, 5007 }; 5008 struct net *net = dev_net(dev); 5009 5010 if (READ_ONCE(net->ipv6.sysctl.skip_notify_on_dev_down)) 5011 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg); 5012 else 5013 fib6_clean_all(net, fib6_ifdown, &arg); 5014} 5015 5016void rt6_disable_ip(struct net_device *dev, unsigned long event) 5017{ 5018 rt6_sync_down_dev(dev, event); 5019 rt6_uncached_list_flush_dev(dev); 5020 neigh_ifdown(&nd_tbl, dev); 5021} 5022 5023struct rt6_mtu_change_arg { 5024 struct net_device *dev; 5025 unsigned int mtu; 5026 struct fib6_info *f6i; 5027}; 5028 5029static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg) 5030{ 5031 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg; 5032 struct fib6_info *f6i = arg->f6i; 5033 5034 /* For administrative MTU increase, there is no way to discover 5035 * IPv6 PMTU increase, so PMTU increase should be updated here. 5036 * Since RFC 1981 doesn't include administrative MTU increase 5037 * update PMTU increase is a MUST. (i.e. jumbo frame) 5038 */ 5039 if (nh->fib_nh_dev == arg->dev) { 5040 struct inet6_dev *idev = __in6_dev_get(arg->dev); 5041 u32 mtu = f6i->fib6_pmtu; 5042 5043 if (mtu >= arg->mtu || 5044 (mtu < arg->mtu && mtu == idev->cnf.mtu6)) 5045 fib6_metric_set(f6i, RTAX_MTU, arg->mtu); 5046 5047 spin_lock_bh(&rt6_exception_lock); 5048 rt6_exceptions_update_pmtu(idev, nh, arg->mtu); 5049 spin_unlock_bh(&rt6_exception_lock); 5050 } 5051 5052 return 0; 5053} 5054 5055static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg) 5056{ 5057 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 5058 struct inet6_dev *idev; 5059 5060 /* In IPv6 pmtu discovery is not optional, 5061 so that RTAX_MTU lock cannot disable it. 5062 We still use this lock to block changes 5063 caused by addrconf/ndisc. 5064 */ 5065 5066 idev = __in6_dev_get(arg->dev); 5067 if (!idev) 5068 return 0; 5069 5070 if (fib6_metric_locked(f6i, RTAX_MTU)) 5071 return 0; 5072 5073 arg->f6i = f6i; 5074 if (f6i->nh) { 5075 /* fib6_nh_mtu_change only returns 0, so this is safe */ 5076 return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change, 5077 arg); 5078 } 5079 5080 return fib6_nh_mtu_change(f6i->fib6_nh, arg); 5081} 5082 5083void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 5084{ 5085 struct rt6_mtu_change_arg arg = { 5086 .dev = dev, 5087 .mtu = mtu, 5088 }; 5089 5090 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); 5091} 5092 5093static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 5094 [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 }, 5095 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 5096 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) }, 5097 [RTA_OIF] = { .type = NLA_U32 }, 5098 [RTA_IIF] = { .type = NLA_U32 }, 5099 [RTA_PRIORITY] = { .type = NLA_U32 }, 5100 [RTA_METRICS] = { .type = NLA_NESTED }, 5101 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 5102 [RTA_PREF] = { .type = NLA_U8 }, 5103 [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 5104 [RTA_ENCAP] = { .type = NLA_NESTED }, 5105 [RTA_EXPIRES] = { .type = NLA_U32 }, 5106 [RTA_UID] = { .type = NLA_U32 }, 5107 [RTA_MARK] = { .type = NLA_U32 }, 5108 [RTA_TABLE] = { .type = NLA_U32 }, 5109 [RTA_IP_PROTO] = { .type = NLA_U8 }, 5110 [RTA_SPORT] = { .type = NLA_U16 }, 5111 [RTA_DPORT] = { .type = NLA_U16 }, 5112 [RTA_NH_ID] = { .type = NLA_U32 }, 5113 [RTA_FLOWLABEL] = { .type = NLA_BE32 }, 5114}; 5115 5116static int rtm_to_fib6_multipath_config(struct fib6_config *cfg, 5117 struct netlink_ext_ack *extack, 5118 bool newroute) 5119{ 5120 struct rtnexthop *rtnh; 5121 int remaining; 5122 5123 remaining = cfg->fc_mp_len; 5124 rtnh = (struct rtnexthop *)cfg->fc_mp; 5125 5126 if (!rtnh_ok(rtnh, remaining)) { 5127 NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - no valid nexthops"); 5128 return -EINVAL; 5129 } 5130 5131 do { 5132 bool has_gateway = cfg->fc_flags & RTF_GATEWAY; 5133 int attrlen = rtnh_attrlen(rtnh); 5134 5135 if (attrlen > 0) { 5136 struct nlattr *nla, *attrs; 5137 5138 attrs = rtnh_attrs(rtnh); 5139 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 5140 if (nla) { 5141 if (nla_len(nla) < sizeof(cfg->fc_gateway)) { 5142 NL_SET_ERR_MSG(extack, 5143 "Invalid IPv6 address in RTA_GATEWAY"); 5144 return -EINVAL; 5145 } 5146 5147 has_gateway = true; 5148 } 5149 } 5150 5151 if (newroute && (cfg->fc_nh_id || !has_gateway)) { 5152 NL_SET_ERR_MSG(extack, 5153 "Device only routes can not be added for IPv6 using the multipath API."); 5154 return -EINVAL; 5155 } 5156 5157 rtnh = rtnh_next(rtnh, &remaining); 5158 } while (rtnh_ok(rtnh, remaining)); 5159 5160 return lwtunnel_valid_encap_type_attr(cfg->fc_mp, cfg->fc_mp_len, extack); 5161} 5162 5163static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 5164 struct fib6_config *cfg, 5165 struct netlink_ext_ack *extack) 5166{ 5167 bool newroute = nlh->nlmsg_type == RTM_NEWROUTE; 5168 struct nlattr *tb[RTA_MAX+1]; 5169 struct rtmsg *rtm; 5170 unsigned int pref; 5171 int err; 5172 5173 err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 5174 rtm_ipv6_policy, extack); 5175 if (err < 0) 5176 goto errout; 5177 5178 err = -EINVAL; 5179 rtm = nlmsg_data(nlh); 5180 5181 if (rtm->rtm_tos) { 5182 NL_SET_ERR_MSG(extack, 5183 "Invalid dsfield (tos): option not available for IPv6"); 5184 goto errout; 5185 } 5186 5187 if (tb[RTA_FLOWLABEL]) { 5188 NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL], 5189 "Flow label cannot be specified for this operation"); 5190 goto errout; 5191 } 5192 5193 *cfg = (struct fib6_config){ 5194 .fc_table = rtm->rtm_table, 5195 .fc_dst_len = rtm->rtm_dst_len, 5196 .fc_src_len = rtm->rtm_src_len, 5197 .fc_flags = RTF_UP, 5198 .fc_protocol = rtm->rtm_protocol, 5199 .fc_type = rtm->rtm_type, 5200 5201 .fc_nlinfo.portid = NETLINK_CB(skb).portid, 5202 .fc_nlinfo.nlh = nlh, 5203 .fc_nlinfo.nl_net = sock_net(skb->sk), 5204 }; 5205 5206 if (rtm->rtm_type == RTN_UNREACHABLE || 5207 rtm->rtm_type == RTN_BLACKHOLE || 5208 rtm->rtm_type == RTN_PROHIBIT || 5209 rtm->rtm_type == RTN_THROW) 5210 cfg->fc_flags |= RTF_REJECT; 5211 5212 if (rtm->rtm_type == RTN_LOCAL) 5213 cfg->fc_flags |= RTF_LOCAL; 5214 5215 if (rtm->rtm_flags & RTM_F_CLONED) 5216 cfg->fc_flags |= RTF_CACHE; 5217 5218 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK); 5219 5220 if (tb[RTA_NH_ID]) { 5221 if (tb[RTA_GATEWAY] || tb[RTA_OIF] || 5222 tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) { 5223 NL_SET_ERR_MSG(extack, 5224 "Nexthop specification and nexthop id are mutually exclusive"); 5225 goto errout; 5226 } 5227 cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]); 5228 } 5229 5230 if (tb[RTA_GATEWAY]) { 5231 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); 5232 cfg->fc_flags |= RTF_GATEWAY; 5233 } 5234 if (tb[RTA_VIA]) { 5235 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute"); 5236 goto errout; 5237 } 5238 5239 if (tb[RTA_DST]) { 5240 int plen = (rtm->rtm_dst_len + 7) >> 3; 5241 5242 if (nla_len(tb[RTA_DST]) < plen) 5243 goto errout; 5244 5245 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 5246 } 5247 5248 if (tb[RTA_SRC]) { 5249 int plen = (rtm->rtm_src_len + 7) >> 3; 5250 5251 if (nla_len(tb[RTA_SRC]) < plen) 5252 goto errout; 5253 5254 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 5255 } 5256 5257 if (tb[RTA_PREFSRC]) 5258 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); 5259 5260 if (tb[RTA_OIF]) 5261 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 5262 5263 if (tb[RTA_PRIORITY]) 5264 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 5265 5266 if (tb[RTA_METRICS]) { 5267 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 5268 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 5269 } 5270 5271 if (tb[RTA_TABLE]) 5272 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 5273 5274 if (tb[RTA_MULTIPATH]) { 5275 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); 5276 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); 5277 5278 err = rtm_to_fib6_multipath_config(cfg, extack, newroute); 5279 if (err < 0) 5280 goto errout; 5281 } 5282 5283 if (tb[RTA_PREF]) { 5284 pref = nla_get_u8(tb[RTA_PREF]); 5285 if (pref != ICMPV6_ROUTER_PREF_LOW && 5286 pref != ICMPV6_ROUTER_PREF_HIGH) 5287 pref = ICMPV6_ROUTER_PREF_MEDIUM; 5288 cfg->fc_flags |= RTF_PREF(pref); 5289 } 5290 5291 if (tb[RTA_ENCAP]) 5292 cfg->fc_encap = tb[RTA_ENCAP]; 5293 5294 if (tb[RTA_ENCAP_TYPE]) { 5295 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); 5296 5297 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack); 5298 if (err < 0) 5299 goto errout; 5300 } 5301 5302 if (tb[RTA_EXPIRES]) { 5303 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); 5304 5305 if (addrconf_finite_timeout(timeout)) { 5306 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ); 5307 cfg->fc_flags |= RTF_EXPIRES; 5308 } 5309 } 5310 5311 err = 0; 5312errout: 5313 return err; 5314} 5315 5316struct rt6_nh { 5317 struct fib6_info *fib6_info; 5318 struct fib6_config r_cfg; 5319 struct list_head list; 5320}; 5321 5322static int ip6_route_info_append(struct list_head *rt6_nh_list, 5323 struct fib6_info *rt, 5324 struct fib6_config *r_cfg) 5325{ 5326 struct rt6_nh *nh; 5327 5328 list_for_each_entry(nh, rt6_nh_list, list) { 5329 /* check if fib6_info already exists */ 5330 if (rt6_duplicate_nexthop(nh->fib6_info, rt)) 5331 return -EEXIST; 5332 } 5333 5334 nh = kzalloc(sizeof(*nh), GFP_KERNEL); 5335 if (!nh) 5336 return -ENOMEM; 5337 5338 nh->fib6_info = rt; 5339 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); 5340 list_add_tail(&nh->list, rt6_nh_list); 5341 5342 return 0; 5343} 5344 5345static void ip6_route_mpath_notify(struct fib6_info *rt, 5346 struct fib6_info *rt_last, 5347 struct nl_info *info, 5348 __u16 nlflags) 5349{ 5350 /* if this is an APPEND route, then rt points to the first route 5351 * inserted and rt_last points to last route inserted. Userspace 5352 * wants a consistent dump of the route which starts at the first 5353 * nexthop. Since sibling routes are always added at the end of 5354 * the list, find the first sibling of the last route appended 5355 */ 5356 rcu_read_lock(); 5357 5358 if ((nlflags & NLM_F_APPEND) && rt_last && 5359 READ_ONCE(rt_last->fib6_nsiblings)) { 5360 rt = list_first_or_null_rcu(&rt_last->fib6_siblings, 5361 struct fib6_info, 5362 fib6_siblings); 5363 } 5364 5365 if (rt) 5366 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); 5367 5368 rcu_read_unlock(); 5369} 5370 5371static bool ip6_route_mpath_should_notify(const struct fib6_info *rt) 5372{ 5373 bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); 5374 bool should_notify = false; 5375 struct fib6_info *leaf; 5376 struct fib6_node *fn; 5377 5378 rcu_read_lock(); 5379 fn = rcu_dereference(rt->fib6_node); 5380 if (!fn) 5381 goto out; 5382 5383 leaf = rcu_dereference(fn->leaf); 5384 if (!leaf) 5385 goto out; 5386 5387 if (rt == leaf || 5388 (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric && 5389 rt6_qualify_for_ecmp(leaf))) 5390 should_notify = true; 5391out: 5392 rcu_read_unlock(); 5393 5394 return should_notify; 5395} 5396 5397static int ip6_route_multipath_add(struct fib6_config *cfg, 5398 struct netlink_ext_ack *extack) 5399{ 5400 struct fib6_info *rt_notif = NULL, *rt_last = NULL; 5401 struct nl_info *info = &cfg->fc_nlinfo; 5402 struct rt6_nh *nh, *nh_safe; 5403 struct fib6_config r_cfg; 5404 struct rtnexthop *rtnh; 5405 LIST_HEAD(rt6_nh_list); 5406 struct rt6_nh *err_nh; 5407 struct fib6_info *rt; 5408 __u16 nlflags; 5409 int remaining; 5410 int attrlen; 5411 int replace; 5412 int nhn = 0; 5413 int err; 5414 5415 err = fib6_config_validate(cfg, extack); 5416 if (err) 5417 return err; 5418 5419 replace = (cfg->fc_nlinfo.nlh && 5420 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); 5421 5422 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE; 5423 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND) 5424 nlflags |= NLM_F_APPEND; 5425 5426 remaining = cfg->fc_mp_len; 5427 rtnh = (struct rtnexthop *)cfg->fc_mp; 5428 5429 /* Parse a Multipath Entry and build a list (rt6_nh_list) of 5430 * fib6_info structs per nexthop 5431 */ 5432 while (rtnh_ok(rtnh, remaining)) { 5433 memcpy(&r_cfg, cfg, sizeof(*cfg)); 5434 if (rtnh->rtnh_ifindex) 5435 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 5436 5437 attrlen = rtnh_attrlen(rtnh); 5438 if (attrlen > 0) { 5439 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 5440 5441 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 5442 if (nla) { 5443 r_cfg.fc_gateway = nla_get_in6_addr(nla); 5444 r_cfg.fc_flags |= RTF_GATEWAY; 5445 } 5446 5447 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 5448 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 5449 if (nla) 5450 r_cfg.fc_encap_type = nla_get_u16(nla); 5451 } 5452 5453 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); 5454 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack); 5455 if (IS_ERR(rt)) { 5456 err = PTR_ERR(rt); 5457 rt = NULL; 5458 goto cleanup; 5459 } 5460 5461 err = ip6_route_info_create_nh(rt, &r_cfg, GFP_KERNEL, extack); 5462 if (err) { 5463 rt = NULL; 5464 goto cleanup; 5465 } 5466 5467 rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1; 5468 5469 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); 5470 if (err) { 5471 fib6_info_release(rt); 5472 goto cleanup; 5473 } 5474 5475 rtnh = rtnh_next(rtnh, &remaining); 5476 } 5477 5478 /* for add and replace send one notification with all nexthops. 5479 * Skip the notification in fib6_add_rt2node and send one with 5480 * the full route when done 5481 */ 5482 info->skip_notify = 1; 5483 5484 /* For add and replace, send one notification with all nexthops. For 5485 * append, send one notification with all appended nexthops. 5486 */ 5487 info->skip_notify_kernel = 1; 5488 5489 err_nh = NULL; 5490 list_for_each_entry(nh, &rt6_nh_list, list) { 5491 err = __ip6_ins_rt(nh->fib6_info, info, extack); 5492 5493 if (err) { 5494 if (replace && nhn) 5495 NL_SET_ERR_MSG_MOD(extack, 5496 "multipath route replace failed (check consistency of installed routes)"); 5497 err_nh = nh; 5498 goto add_errout; 5499 } 5500 /* save reference to last route successfully inserted */ 5501 rt_last = nh->fib6_info; 5502 5503 /* save reference to first route for notification */ 5504 if (!rt_notif) 5505 rt_notif = nh->fib6_info; 5506 5507 /* Because each route is added like a single route we remove 5508 * these flags after the first nexthop: if there is a collision, 5509 * we have already failed to add the first nexthop: 5510 * fib6_add_rt2node() has rejected it; when replacing, old 5511 * nexthops have been replaced by first new, the rest should 5512 * be added to it. 5513 */ 5514 if (cfg->fc_nlinfo.nlh) { 5515 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 5516 NLM_F_REPLACE); 5517 cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; 5518 } 5519 nhn++; 5520 } 5521 5522 /* An in-kernel notification should only be sent in case the new 5523 * multipath route is added as the first route in the node, or if 5524 * it was appended to it. We pass 'rt_notif' since it is the first 5525 * sibling and might allow us to skip some checks in the replace case. 5526 */ 5527 if (ip6_route_mpath_should_notify(rt_notif)) { 5528 enum fib_event_type fib_event; 5529 5530 if (rt_notif->fib6_nsiblings != nhn - 1) 5531 fib_event = FIB_EVENT_ENTRY_APPEND; 5532 else 5533 fib_event = FIB_EVENT_ENTRY_REPLACE; 5534 5535 err = call_fib6_multipath_entry_notifiers(info->nl_net, 5536 fib_event, rt_notif, 5537 nhn - 1, extack); 5538 if (err) { 5539 /* Delete all the siblings that were just added */ 5540 err_nh = NULL; 5541 goto add_errout; 5542 } 5543 } 5544 5545 /* success ... tell user about new route */ 5546 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); 5547 goto cleanup; 5548 5549add_errout: 5550 /* send notification for routes that were added so that 5551 * the delete notifications sent by ip6_route_del are 5552 * coherent 5553 */ 5554 if (rt_notif) 5555 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); 5556 5557 /* Delete routes that were already added */ 5558 list_for_each_entry(nh, &rt6_nh_list, list) { 5559 if (err_nh == nh) 5560 break; 5561 ip6_route_del(&nh->r_cfg, extack); 5562 } 5563 5564cleanup: 5565 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, list) { 5566 fib6_info_release(nh->fib6_info); 5567 list_del(&nh->list); 5568 kfree(nh); 5569 } 5570 5571 return err; 5572} 5573 5574static int ip6_route_multipath_del(struct fib6_config *cfg, 5575 struct netlink_ext_ack *extack) 5576{ 5577 struct fib6_config r_cfg; 5578 struct rtnexthop *rtnh; 5579 int last_err = 0; 5580 int remaining; 5581 int attrlen; 5582 int err; 5583 5584 remaining = cfg->fc_mp_len; 5585 rtnh = (struct rtnexthop *)cfg->fc_mp; 5586 5587 /* Parse a Multipath Entry */ 5588 while (rtnh_ok(rtnh, remaining)) { 5589 memcpy(&r_cfg, cfg, sizeof(*cfg)); 5590 if (rtnh->rtnh_ifindex) 5591 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 5592 5593 attrlen = rtnh_attrlen(rtnh); 5594 if (attrlen > 0) { 5595 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 5596 5597 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 5598 if (nla) { 5599 r_cfg.fc_gateway = nla_get_in6_addr(nla); 5600 r_cfg.fc_flags |= RTF_GATEWAY; 5601 } 5602 } 5603 5604 err = ip6_route_del(&r_cfg, extack); 5605 if (err) 5606 last_err = err; 5607 5608 rtnh = rtnh_next(rtnh, &remaining); 5609 } 5610 5611 return last_err; 5612} 5613 5614static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 5615 struct netlink_ext_ack *extack) 5616{ 5617 struct fib6_config cfg; 5618 int err; 5619 5620 err = rtm_to_fib6_config(skb, nlh, &cfg, extack); 5621 if (err < 0) 5622 return err; 5623 5624 if (cfg.fc_nh_id) { 5625 rcu_read_lock(); 5626 err = !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id); 5627 rcu_read_unlock(); 5628 5629 if (err) { 5630 NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); 5631 return -EINVAL; 5632 } 5633 } 5634 5635 if (cfg.fc_mp) { 5636 return ip6_route_multipath_del(&cfg, extack); 5637 } else { 5638 cfg.fc_delete_all_nh = 1; 5639 return ip6_route_del(&cfg, extack); 5640 } 5641} 5642 5643static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 5644 struct netlink_ext_ack *extack) 5645{ 5646 struct fib6_config cfg; 5647 int err; 5648 5649 err = rtm_to_fib6_config(skb, nlh, &cfg, extack); 5650 if (err < 0) 5651 return err; 5652 5653 if (cfg.fc_metric == 0) 5654 cfg.fc_metric = IP6_RT_PRIO_USER; 5655 5656 if (cfg.fc_mp) 5657 return ip6_route_multipath_add(&cfg, extack); 5658 else 5659 return ip6_route_add(&cfg, GFP_KERNEL, extack); 5660} 5661 5662/* add the overhead of this fib6_nh to nexthop_len */ 5663static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg) 5664{ 5665 int *nexthop_len = arg; 5666 5667 *nexthop_len += nla_total_size(0) /* RTA_MULTIPATH */ 5668 + NLA_ALIGN(sizeof(struct rtnexthop)) 5669 + nla_total_size(16); /* RTA_GATEWAY */ 5670 5671 if (nh->fib_nh_lws) { 5672 /* RTA_ENCAP_TYPE */ 5673 *nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); 5674 /* RTA_ENCAP */ 5675 *nexthop_len += nla_total_size(2); 5676 } 5677 5678 return 0; 5679} 5680 5681static size_t rt6_nlmsg_size(struct fib6_info *f6i) 5682{ 5683 struct fib6_info *sibling; 5684 struct fib6_nh *nh; 5685 int nexthop_len; 5686 5687 if (f6i->nh) { 5688 nexthop_len = nla_total_size(4); /* RTA_NH_ID */ 5689 nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size, 5690 &nexthop_len); 5691 goto common; 5692 } 5693 5694 rcu_read_lock(); 5695retry: 5696 nh = f6i->fib6_nh; 5697 nexthop_len = 0; 5698 if (READ_ONCE(f6i->fib6_nsiblings)) { 5699 rt6_nh_nlmsg_size(nh, &nexthop_len); 5700 5701 list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, 5702 fib6_siblings) { 5703 rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); 5704 if (!READ_ONCE(f6i->fib6_nsiblings)) 5705 goto retry; 5706 } 5707 } 5708 rcu_read_unlock(); 5709 nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); 5710common: 5711 return NLMSG_ALIGN(sizeof(struct rtmsg)) 5712 + nla_total_size(16) /* RTA_SRC */ 5713 + nla_total_size(16) /* RTA_DST */ 5714 + nla_total_size(16) /* RTA_GATEWAY */ 5715 + nla_total_size(16) /* RTA_PREFSRC */ 5716 + nla_total_size(4) /* RTA_TABLE */ 5717 + nla_total_size(4) /* RTA_IIF */ 5718 + nla_total_size(4) /* RTA_OIF */ 5719 + nla_total_size(4) /* RTA_PRIORITY */ 5720 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 5721 + nla_total_size(sizeof(struct rta_cacheinfo)) 5722 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 5723 + nla_total_size(1) /* RTA_PREF */ 5724 + nexthop_len; 5725} 5726 5727static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh, 5728 unsigned char *flags) 5729{ 5730 if (nexthop_is_multipath(nh)) { 5731 struct nlattr *mp; 5732 5733 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); 5734 if (!mp) 5735 goto nla_put_failure; 5736 5737 if (nexthop_mpath_fill_node(skb, nh, AF_INET6)) 5738 goto nla_put_failure; 5739 5740 nla_nest_end(skb, mp); 5741 } else { 5742 struct fib6_nh *fib6_nh; 5743 5744 fib6_nh = nexthop_fib6_nh(nh); 5745 if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6, 5746 flags, false) < 0) 5747 goto nla_put_failure; 5748 } 5749 5750 return 0; 5751 5752nla_put_failure: 5753 return -EMSGSIZE; 5754} 5755 5756static int rt6_fill_node(struct net *net, struct sk_buff *skb, 5757 struct fib6_info *rt, struct dst_entry *dst, 5758 struct in6_addr *dest, struct in6_addr *src, 5759 int iif, int type, u32 portid, u32 seq, 5760 unsigned int flags) 5761{ 5762 struct rt6_info *rt6 = dst_rt6_info(dst); 5763 struct rt6key *rt6_dst, *rt6_src; 5764 u32 *pmetrics, table, rt6_flags; 5765 unsigned char nh_flags = 0; 5766 struct nlmsghdr *nlh; 5767 struct rtmsg *rtm; 5768 long expires = 0; 5769 5770 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 5771 if (!nlh) 5772 return -EMSGSIZE; 5773 5774 if (rt6) { 5775 rt6_dst = &rt6->rt6i_dst; 5776 rt6_src = &rt6->rt6i_src; 5777 rt6_flags = rt6->rt6i_flags; 5778 } else { 5779 rt6_dst = &rt->fib6_dst; 5780 rt6_src = &rt->fib6_src; 5781 rt6_flags = rt->fib6_flags; 5782 } 5783 5784 rtm = nlmsg_data(nlh); 5785 rtm->rtm_family = AF_INET6; 5786 rtm->rtm_dst_len = rt6_dst->plen; 5787 rtm->rtm_src_len = rt6_src->plen; 5788 rtm->rtm_tos = 0; 5789 if (rt->fib6_table) 5790 table = rt->fib6_table->tb6_id; 5791 else 5792 table = RT6_TABLE_UNSPEC; 5793 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT; 5794 if (nla_put_u32(skb, RTA_TABLE, table)) 5795 goto nla_put_failure; 5796 5797 rtm->rtm_type = rt->fib6_type; 5798 rtm->rtm_flags = 0; 5799 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 5800 rtm->rtm_protocol = rt->fib6_protocol; 5801 5802 if (rt6_flags & RTF_CACHE) 5803 rtm->rtm_flags |= RTM_F_CLONED; 5804 5805 if (dest) { 5806 if (nla_put_in6_addr(skb, RTA_DST, dest)) 5807 goto nla_put_failure; 5808 rtm->rtm_dst_len = 128; 5809 } else if (rtm->rtm_dst_len) 5810 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr)) 5811 goto nla_put_failure; 5812#ifdef CONFIG_IPV6_SUBTREES 5813 if (src) { 5814 if (nla_put_in6_addr(skb, RTA_SRC, src)) 5815 goto nla_put_failure; 5816 rtm->rtm_src_len = 128; 5817 } else if (rtm->rtm_src_len && 5818 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr)) 5819 goto nla_put_failure; 5820#endif 5821 if (iif) { 5822#ifdef CONFIG_IPV6_MROUTE 5823 if (ipv6_addr_is_multicast(&rt6_dst->addr)) { 5824 int err = ip6mr_get_route(net, skb, rtm, portid); 5825 5826 if (err == 0) 5827 return 0; 5828 if (err < 0) 5829 goto nla_put_failure; 5830 } else 5831#endif 5832 if (nla_put_u32(skb, RTA_IIF, iif)) 5833 goto nla_put_failure; 5834 } else if (dest) { 5835 struct in6_addr saddr_buf; 5836 if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && 5837 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 5838 goto nla_put_failure; 5839 } 5840 5841 if (rt->fib6_prefsrc.plen) { 5842 struct in6_addr saddr_buf; 5843 saddr_buf = rt->fib6_prefsrc.addr; 5844 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 5845 goto nla_put_failure; 5846 } 5847 5848 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics; 5849 if (rtnetlink_put_metrics(skb, pmetrics) < 0) 5850 goto nla_put_failure; 5851 5852 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric)) 5853 goto nla_put_failure; 5854 5855 /* For multipath routes, walk the siblings list and add 5856 * each as a nexthop within RTA_MULTIPATH. 5857 */ 5858 if (rt6) { 5859 struct net_device *dev; 5860 5861 if (rt6_flags & RTF_GATEWAY && 5862 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway)) 5863 goto nla_put_failure; 5864 5865 dev = dst_dev(dst); 5866 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) 5867 goto nla_put_failure; 5868 5869 if (lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) 5870 goto nla_put_failure; 5871 } else if (READ_ONCE(rt->fib6_nsiblings)) { 5872 struct fib6_info *sibling; 5873 struct nlattr *mp; 5874 5875 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); 5876 if (!mp) 5877 goto nla_put_failure; 5878 5879 if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common, 5880 rt->fib6_nh->fib_nh_weight, AF_INET6, 5881 0) < 0) 5882 goto nla_put_failure; 5883 5884 rcu_read_lock(); 5885 5886 list_for_each_entry_rcu(sibling, &rt->fib6_siblings, 5887 fib6_siblings) { 5888 if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, 5889 sibling->fib6_nh->fib_nh_weight, 5890 AF_INET6, 0) < 0) { 5891 rcu_read_unlock(); 5892 5893 goto nla_put_failure; 5894 } 5895 } 5896 5897 rcu_read_unlock(); 5898 5899 nla_nest_end(skb, mp); 5900 } else if (rt->nh) { 5901 if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id)) 5902 goto nla_put_failure; 5903 5904 if (nexthop_is_blackhole(rt->nh)) 5905 rtm->rtm_type = RTN_BLACKHOLE; 5906 5907 if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) && 5908 rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) 5909 goto nla_put_failure; 5910 5911 rtm->rtm_flags |= nh_flags; 5912 } else { 5913 if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6, 5914 &nh_flags, false) < 0) 5915 goto nla_put_failure; 5916 5917 rtm->rtm_flags |= nh_flags; 5918 } 5919 5920 if (rt6_flags & RTF_EXPIRES) { 5921 expires = dst ? READ_ONCE(dst->expires) : rt->expires; 5922 expires -= jiffies; 5923 } 5924 5925 if (!dst) { 5926 if (READ_ONCE(rt->offload)) 5927 rtm->rtm_flags |= RTM_F_OFFLOAD; 5928 if (READ_ONCE(rt->trap)) 5929 rtm->rtm_flags |= RTM_F_TRAP; 5930 if (READ_ONCE(rt->offload_failed)) 5931 rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED; 5932 } 5933 5934 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0) 5935 goto nla_put_failure; 5936 5937 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags))) 5938 goto nla_put_failure; 5939 5940 5941 nlmsg_end(skb, nlh); 5942 return 0; 5943 5944nla_put_failure: 5945 nlmsg_cancel(skb, nlh); 5946 return -EMSGSIZE; 5947} 5948 5949static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg) 5950{ 5951 const struct net_device *dev = arg; 5952 5953 if (nh->fib_nh_dev == dev) 5954 return 1; 5955 5956 return 0; 5957} 5958 5959static bool fib6_info_uses_dev(const struct fib6_info *f6i, 5960 const struct net_device *dev) 5961{ 5962 if (f6i->nh) { 5963 struct net_device *_dev = (struct net_device *)dev; 5964 5965 return !!nexthop_for_each_fib6_nh(f6i->nh, 5966 fib6_info_nh_uses_dev, 5967 _dev); 5968 } 5969 5970 if (f6i->fib6_nh->fib_nh_dev == dev) 5971 return true; 5972 5973 if (READ_ONCE(f6i->fib6_nsiblings)) { 5974 const struct fib6_info *sibling; 5975 5976 rcu_read_lock(); 5977 list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, 5978 fib6_siblings) { 5979 if (sibling->fib6_nh->fib_nh_dev == dev) { 5980 rcu_read_unlock(); 5981 return true; 5982 } 5983 if (!READ_ONCE(f6i->fib6_nsiblings)) 5984 break; 5985 } 5986 rcu_read_unlock(); 5987 } 5988 return false; 5989} 5990 5991struct fib6_nh_exception_dump_walker { 5992 struct rt6_rtnl_dump_arg *dump; 5993 struct fib6_info *rt; 5994 unsigned int flags; 5995 unsigned int skip; 5996 unsigned int count; 5997}; 5998 5999static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg) 6000{ 6001 struct fib6_nh_exception_dump_walker *w = arg; 6002 struct rt6_rtnl_dump_arg *dump = w->dump; 6003 struct rt6_exception_bucket *bucket; 6004 struct rt6_exception *rt6_ex; 6005 int i, err; 6006 6007 bucket = fib6_nh_get_excptn_bucket(nh, NULL); 6008 if (!bucket) 6009 return 0; 6010 6011 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 6012 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 6013 if (w->skip) { 6014 w->skip--; 6015 continue; 6016 } 6017 6018 /* Expiration of entries doesn't bump sernum, insertion 6019 * does. Removal is triggered by insertion, so we can 6020 * rely on the fact that if entries change between two 6021 * partial dumps, this node is scanned again completely, 6022 * see rt6_insert_exception() and fib6_dump_table(). 6023 * 6024 * Count expired entries we go through as handled 6025 * entries that we'll skip next time, in case of partial 6026 * node dump. Otherwise, if entries expire meanwhile, 6027 * we'll skip the wrong amount. 6028 */ 6029 if (rt6_check_expired(rt6_ex->rt6i)) { 6030 w->count++; 6031 continue; 6032 } 6033 6034 err = rt6_fill_node(dump->net, dump->skb, w->rt, 6035 &rt6_ex->rt6i->dst, NULL, NULL, 0, 6036 RTM_NEWROUTE, 6037 NETLINK_CB(dump->cb->skb).portid, 6038 dump->cb->nlh->nlmsg_seq, w->flags); 6039 if (err) 6040 return err; 6041 6042 w->count++; 6043 } 6044 bucket++; 6045 } 6046 6047 return 0; 6048} 6049 6050/* Return -1 if done with node, number of handled routes on partial dump */ 6051int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip) 6052{ 6053 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 6054 struct fib_dump_filter *filter = &arg->filter; 6055 unsigned int flags = NLM_F_MULTI; 6056 struct net *net = arg->net; 6057 int count = 0; 6058 6059 if (rt == net->ipv6.fib6_null_entry) 6060 return -1; 6061 6062 if ((filter->flags & RTM_F_PREFIX) && 6063 !(rt->fib6_flags & RTF_PREFIX_RT)) { 6064 /* success since this is not a prefix route */ 6065 return -1; 6066 } 6067 if (filter->filter_set && 6068 ((filter->rt_type && rt->fib6_type != filter->rt_type) || 6069 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) || 6070 (filter->protocol && rt->fib6_protocol != filter->protocol))) { 6071 return -1; 6072 } 6073 6074 if (filter->filter_set || 6075 !filter->dump_routes || !filter->dump_exceptions) { 6076 flags |= NLM_F_DUMP_FILTERED; 6077 } 6078 6079 if (filter->dump_routes) { 6080 if (skip) { 6081 skip--; 6082 } else { 6083 if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 6084 0, RTM_NEWROUTE, 6085 NETLINK_CB(arg->cb->skb).portid, 6086 arg->cb->nlh->nlmsg_seq, flags)) { 6087 return 0; 6088 } 6089 count++; 6090 } 6091 } 6092 6093 if (filter->dump_exceptions) { 6094 struct fib6_nh_exception_dump_walker w = { .dump = arg, 6095 .rt = rt, 6096 .flags = flags, 6097 .skip = skip, 6098 .count = 0 }; 6099 int err; 6100 6101 rcu_read_lock(); 6102 if (rt->nh) { 6103 err = nexthop_for_each_fib6_nh(rt->nh, 6104 rt6_nh_dump_exceptions, 6105 &w); 6106 } else { 6107 err = rt6_nh_dump_exceptions(rt->fib6_nh, &w); 6108 } 6109 rcu_read_unlock(); 6110 6111 if (err) 6112 return count + w.count; 6113 } 6114 6115 return -1; 6116} 6117 6118static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, 6119 const struct nlmsghdr *nlh, 6120 struct nlattr **tb, 6121 struct netlink_ext_ack *extack) 6122{ 6123 struct rtmsg *rtm; 6124 int i, err; 6125 6126 rtm = nlmsg_payload(nlh, sizeof(*rtm)); 6127 if (!rtm) { 6128 NL_SET_ERR_MSG_MOD(extack, 6129 "Invalid header for get route request"); 6130 return -EINVAL; 6131 } 6132 6133 if (!netlink_strict_get_check(skb)) 6134 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 6135 rtm_ipv6_policy, extack); 6136 6137 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || 6138 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || 6139 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || 6140 rtm->rtm_type) { 6141 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); 6142 return -EINVAL; 6143 } 6144 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { 6145 NL_SET_ERR_MSG_MOD(extack, 6146 "Invalid flags for get route request"); 6147 return -EINVAL; 6148 } 6149 6150 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 6151 rtm_ipv6_policy, extack); 6152 if (err) 6153 return err; 6154 6155 if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 6156 (tb[RTA_DST] && !rtm->rtm_dst_len)) { 6157 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); 6158 return -EINVAL; 6159 } 6160 6161 if (tb[RTA_FLOWLABEL] && 6162 (nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) { 6163 NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL], 6164 "Invalid flow label"); 6165 return -EINVAL; 6166 } 6167 6168 for (i = 0; i <= RTA_MAX; i++) { 6169 if (!tb[i]) 6170 continue; 6171 6172 switch (i) { 6173 case RTA_SRC: 6174 case RTA_DST: 6175 case RTA_IIF: 6176 case RTA_OIF: 6177 case RTA_MARK: 6178 case RTA_UID: 6179 case RTA_SPORT: 6180 case RTA_DPORT: 6181 case RTA_IP_PROTO: 6182 case RTA_FLOWLABEL: 6183 break; 6184 default: 6185 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); 6186 return -EINVAL; 6187 } 6188 } 6189 6190 return 0; 6191} 6192 6193static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 6194 struct netlink_ext_ack *extack) 6195{ 6196 struct net *net = sock_net(in_skb->sk); 6197 struct nlattr *tb[RTA_MAX+1]; 6198 int err, iif = 0, oif = 0; 6199 struct fib6_info *from; 6200 struct dst_entry *dst; 6201 struct rt6_info *rt; 6202 struct sk_buff *skb; 6203 struct rtmsg *rtm; 6204 struct flowi6 fl6 = {}; 6205 __be32 flowlabel; 6206 bool fibmatch; 6207 6208 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 6209 if (err < 0) 6210 goto errout; 6211 6212 err = -EINVAL; 6213 rtm = nlmsg_data(nlh); 6214 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); 6215 6216 if (tb[RTA_SRC]) { 6217 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 6218 goto errout; 6219 6220 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 6221 } 6222 6223 if (tb[RTA_DST]) { 6224 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 6225 goto errout; 6226 6227 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 6228 } 6229 6230 if (tb[RTA_IIF]) 6231 iif = nla_get_u32(tb[RTA_IIF]); 6232 6233 if (tb[RTA_OIF]) 6234 oif = nla_get_u32(tb[RTA_OIF]); 6235 6236 if (tb[RTA_MARK]) 6237 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); 6238 6239 if (tb[RTA_UID]) 6240 fl6.flowi6_uid = make_kuid(current_user_ns(), 6241 nla_get_u32(tb[RTA_UID])); 6242 else 6243 fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); 6244 6245 if (tb[RTA_SPORT]) 6246 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]); 6247 6248 if (tb[RTA_DPORT]) 6249 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]); 6250 6251 if (tb[RTA_IP_PROTO]) { 6252 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], 6253 &fl6.flowi6_proto, AF_INET6, 6254 extack); 6255 if (err) 6256 goto errout; 6257 } 6258 6259 flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0); 6260 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel); 6261 6262 if (iif) { 6263 struct net_device *dev; 6264 int flags = 0; 6265 6266 rcu_read_lock(); 6267 6268 dev = dev_get_by_index_rcu(net, iif); 6269 if (!dev) { 6270 rcu_read_unlock(); 6271 err = -ENODEV; 6272 goto errout; 6273 } 6274 6275 fl6.flowi6_iif = iif; 6276 6277 if (!ipv6_addr_any(&fl6.saddr)) 6278 flags |= RT6_LOOKUP_F_HAS_SADDR; 6279 6280 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags); 6281 6282 rcu_read_unlock(); 6283 } else { 6284 fl6.flowi6_oif = oif; 6285 6286 dst = ip6_route_output(net, NULL, &fl6); 6287 } 6288 6289 6290 rt = dst_rt6_info(dst); 6291 if (rt->dst.error) { 6292 err = rt->dst.error; 6293 ip6_rt_put(rt); 6294 goto errout; 6295 } 6296 6297 if (rt == net->ipv6.ip6_null_entry) { 6298 err = rt->dst.error; 6299 ip6_rt_put(rt); 6300 goto errout; 6301 } 6302 6303 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 6304 if (!skb) { 6305 ip6_rt_put(rt); 6306 err = -ENOBUFS; 6307 goto errout; 6308 } 6309 6310 skb_dst_set(skb, &rt->dst); 6311 6312 rcu_read_lock(); 6313 from = rcu_dereference(rt->from); 6314 if (from) { 6315 if (fibmatch) 6316 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, 6317 iif, RTM_NEWROUTE, 6318 NETLINK_CB(in_skb).portid, 6319 nlh->nlmsg_seq, 0); 6320 else 6321 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, 6322 &fl6.saddr, iif, RTM_NEWROUTE, 6323 NETLINK_CB(in_skb).portid, 6324 nlh->nlmsg_seq, 0); 6325 } else { 6326 err = -ENETUNREACH; 6327 } 6328 rcu_read_unlock(); 6329 6330 if (err < 0) { 6331 kfree_skb(skb); 6332 goto errout; 6333 } 6334 6335 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 6336errout: 6337 return err; 6338} 6339 6340void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, 6341 unsigned int nlm_flags) 6342{ 6343 struct net *net = info->nl_net; 6344 struct sk_buff *skb; 6345 size_t sz; 6346 u32 seq; 6347 int err; 6348 6349 err = -ENOBUFS; 6350 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 6351 6352 rcu_read_lock(); 6353 sz = rt6_nlmsg_size(rt); 6354retry: 6355 skb = nlmsg_new(sz, GFP_ATOMIC); 6356 if (!skb) 6357 goto errout; 6358 6359 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, 6360 event, info->portid, seq, nlm_flags); 6361 if (err < 0) { 6362 kfree_skb(skb); 6363 /* -EMSGSIZE implies needed space grew under us. */ 6364 if (err == -EMSGSIZE) { 6365 sz = max(rt6_nlmsg_size(rt), sz << 1); 6366 goto retry; 6367 } 6368 goto errout; 6369 } 6370 6371 rcu_read_unlock(); 6372 6373 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 6374 info->nlh, GFP_ATOMIC); 6375 return; 6376errout: 6377 rcu_read_unlock(); 6378 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 6379} 6380 6381void fib6_rt_update(struct net *net, struct fib6_info *rt, 6382 struct nl_info *info) 6383{ 6384 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 6385 struct sk_buff *skb; 6386 int err = -ENOBUFS; 6387 6388 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 6389 if (!skb) 6390 goto errout; 6391 6392 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, 6393 RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE); 6394 if (err < 0) { 6395 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 6396 WARN_ON(err == -EMSGSIZE); 6397 kfree_skb(skb); 6398 goto errout; 6399 } 6400 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 6401 info->nlh, gfp_any()); 6402 return; 6403errout: 6404 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 6405} 6406 6407void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, 6408 bool offload, bool trap, bool offload_failed) 6409{ 6410 u8 fib_notify_on_flag_change; 6411 struct sk_buff *skb; 6412 int err; 6413 6414 if (READ_ONCE(f6i->offload) == offload && 6415 READ_ONCE(f6i->trap) == trap && 6416 READ_ONCE(f6i->offload_failed) == offload_failed) 6417 return; 6418 6419 WRITE_ONCE(f6i->offload, offload); 6420 WRITE_ONCE(f6i->trap, trap); 6421 6422 fib_notify_on_flag_change = READ_ONCE(net->ipv6.sysctl.fib_notify_on_flag_change); 6423 /* 2 means send notifications only if offload_failed was changed. */ 6424 if (fib_notify_on_flag_change == 2 && 6425 READ_ONCE(f6i->offload_failed) == offload_failed) 6426 return; 6427 6428 WRITE_ONCE(f6i->offload_failed, offload_failed); 6429 6430 if (!rcu_access_pointer(f6i->fib6_node)) 6431 /* The route was removed from the tree, do not send 6432 * notification. 6433 */ 6434 return; 6435 6436 if (!fib_notify_on_flag_change) 6437 return; 6438 6439 skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL); 6440 if (!skb) { 6441 err = -ENOBUFS; 6442 goto errout; 6443 } 6444 6445 err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0, 6446 0, 0); 6447 if (err < 0) { 6448 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 6449 WARN_ON(err == -EMSGSIZE); 6450 kfree_skb(skb); 6451 goto errout; 6452 } 6453 6454 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL); 6455 return; 6456 6457errout: 6458 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 6459} 6460EXPORT_SYMBOL(fib6_info_hw_flags_set); 6461 6462static int ip6_route_dev_notify(struct notifier_block *this, 6463 unsigned long event, void *ptr) 6464{ 6465 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 6466 struct net *net = dev_net(dev); 6467 6468 if (!(dev->flags & IFF_LOOPBACK)) 6469 return NOTIFY_OK; 6470 6471 if (event == NETDEV_REGISTER) { 6472 net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev; 6473 net->ipv6.ip6_null_entry->dst.dev = dev; 6474 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 6475#ifdef CONFIG_IPV6_MULTIPLE_TABLES 6476 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 6477 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 6478 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 6479 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 6480#endif 6481 } else if (event == NETDEV_UNREGISTER && 6482 dev->reg_state != NETREG_UNREGISTERED) { 6483 /* NETDEV_UNREGISTER could be fired for multiple times by 6484 * netdev_wait_allrefs(). Make sure we only call this once. 6485 */ 6486 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev); 6487#ifdef CONFIG_IPV6_MULTIPLE_TABLES 6488 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev); 6489 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev); 6490#endif 6491 } 6492 6493 return NOTIFY_OK; 6494} 6495 6496/* 6497 * /proc 6498 */ 6499 6500#ifdef CONFIG_PROC_FS 6501static int rt6_stats_seq_show(struct seq_file *seq, void *v) 6502{ 6503 struct net *net = (struct net *)seq->private; 6504 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 6505 net->ipv6.rt6_stats->fib_nodes, 6506 net->ipv6.rt6_stats->fib_route_nodes, 6507 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc), 6508 net->ipv6.rt6_stats->fib_rt_entries, 6509 net->ipv6.rt6_stats->fib_rt_cache, 6510 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 6511 net->ipv6.rt6_stats->fib_discarded_routes); 6512 6513 return 0; 6514} 6515#endif /* CONFIG_PROC_FS */ 6516 6517#ifdef CONFIG_SYSCTL 6518 6519static int ipv6_sysctl_rtcache_flush(const struct ctl_table *ctl, int write, 6520 void *buffer, size_t *lenp, loff_t *ppos) 6521{ 6522 struct net *net; 6523 int delay; 6524 int ret; 6525 if (!write) 6526 return -EINVAL; 6527 6528 ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 6529 if (ret) 6530 return ret; 6531 6532 net = (struct net *)ctl->extra1; 6533 delay = READ_ONCE(net->ipv6.sysctl.flush_delay); 6534 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); 6535 return 0; 6536} 6537 6538static struct ctl_table ipv6_route_table_template[] = { 6539 { 6540 .procname = "max_size", 6541 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 6542 .maxlen = sizeof(int), 6543 .mode = 0644, 6544 .proc_handler = proc_dointvec, 6545 }, 6546 { 6547 .procname = "gc_thresh", 6548 .data = &ip6_dst_ops_template.gc_thresh, 6549 .maxlen = sizeof(int), 6550 .mode = 0644, 6551 .proc_handler = proc_dointvec, 6552 }, 6553 { 6554 .procname = "flush", 6555 .data = &init_net.ipv6.sysctl.flush_delay, 6556 .maxlen = sizeof(int), 6557 .mode = 0200, 6558 .proc_handler = ipv6_sysctl_rtcache_flush 6559 }, 6560 { 6561 .procname = "gc_min_interval", 6562 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 6563 .maxlen = sizeof(int), 6564 .mode = 0644, 6565 .proc_handler = proc_dointvec_jiffies, 6566 }, 6567 { 6568 .procname = "gc_timeout", 6569 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 6570 .maxlen = sizeof(int), 6571 .mode = 0644, 6572 .proc_handler = proc_dointvec_jiffies, 6573 }, 6574 { 6575 .procname = "gc_interval", 6576 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 6577 .maxlen = sizeof(int), 6578 .mode = 0644, 6579 .proc_handler = proc_dointvec_jiffies, 6580 }, 6581 { 6582 .procname = "gc_elasticity", 6583 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 6584 .maxlen = sizeof(int), 6585 .mode = 0644, 6586 .proc_handler = proc_dointvec, 6587 }, 6588 { 6589 .procname = "mtu_expires", 6590 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 6591 .maxlen = sizeof(int), 6592 .mode = 0644, 6593 .proc_handler = proc_dointvec_jiffies, 6594 }, 6595 { 6596 .procname = "min_adv_mss", 6597 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 6598 .maxlen = sizeof(int), 6599 .mode = 0644, 6600 .proc_handler = proc_dointvec, 6601 }, 6602 { 6603 .procname = "gc_min_interval_ms", 6604 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 6605 .maxlen = sizeof(int), 6606 .mode = 0644, 6607 .proc_handler = proc_dointvec_ms_jiffies, 6608 }, 6609 { 6610 .procname = "skip_notify_on_dev_down", 6611 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down, 6612 .maxlen = sizeof(u8), 6613 .mode = 0644, 6614 .proc_handler = proc_dou8vec_minmax, 6615 .extra1 = SYSCTL_ZERO, 6616 .extra2 = SYSCTL_ONE, 6617 }, 6618}; 6619 6620struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 6621{ 6622 struct ctl_table *table; 6623 6624 table = kmemdup(ipv6_route_table_template, 6625 sizeof(ipv6_route_table_template), 6626 GFP_KERNEL); 6627 6628 if (table) { 6629 table[0].data = &net->ipv6.sysctl.ip6_rt_max_size; 6630 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 6631 table[2].data = &net->ipv6.sysctl.flush_delay; 6632 table[2].extra1 = net; 6633 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 6634 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 6635 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 6636 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 6637 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 6638 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 6639 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 6640 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down; 6641 } 6642 6643 return table; 6644} 6645 6646size_t ipv6_route_sysctl_table_size(struct net *net) 6647{ 6648 /* Don't export sysctls to unprivileged users */ 6649 if (net->user_ns != &init_user_ns) 6650 return 1; 6651 6652 return ARRAY_SIZE(ipv6_route_table_template); 6653} 6654#endif 6655 6656static int __net_init ip6_route_net_init(struct net *net) 6657{ 6658 int ret = -ENOMEM; 6659 6660 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 6661 sizeof(net->ipv6.ip6_dst_ops)); 6662 6663 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 6664 goto out_ip6_dst_ops; 6665 6666 net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true); 6667 if (!net->ipv6.fib6_null_entry) 6668 goto out_ip6_dst_entries; 6669 memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template, 6670 sizeof(*net->ipv6.fib6_null_entry)); 6671 6672 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 6673 sizeof(*net->ipv6.ip6_null_entry), 6674 GFP_KERNEL); 6675 if (!net->ipv6.ip6_null_entry) 6676 goto out_fib6_null_entry; 6677 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 6678 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 6679 ip6_template_metrics, true); 6680 INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached); 6681 6682#ifdef CONFIG_IPV6_MULTIPLE_TABLES 6683 net->ipv6.fib6_has_custom_rules = false; 6684 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 6685 sizeof(*net->ipv6.ip6_prohibit_entry), 6686 GFP_KERNEL); 6687 if (!net->ipv6.ip6_prohibit_entry) 6688 goto out_ip6_null_entry; 6689 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 6690 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 6691 ip6_template_metrics, true); 6692 INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached); 6693 6694 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 6695 sizeof(*net->ipv6.ip6_blk_hole_entry), 6696 GFP_KERNEL); 6697 if (!net->ipv6.ip6_blk_hole_entry) 6698 goto out_ip6_prohibit_entry; 6699 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 6700 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 6701 ip6_template_metrics, true); 6702 INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached); 6703#ifdef CONFIG_IPV6_SUBTREES 6704 net->ipv6.fib6_routes_require_src = 0; 6705#endif 6706#endif 6707 6708 net->ipv6.sysctl.flush_delay = 0; 6709 net->ipv6.sysctl.ip6_rt_max_size = INT_MAX; 6710 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 6711 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 6712 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 6713 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 6714 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 6715 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 6716 net->ipv6.sysctl.skip_notify_on_dev_down = 0; 6717 6718 atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ); 6719 6720 ret = 0; 6721out: 6722 return ret; 6723 6724#ifdef CONFIG_IPV6_MULTIPLE_TABLES 6725out_ip6_prohibit_entry: 6726 kfree(net->ipv6.ip6_prohibit_entry); 6727out_ip6_null_entry: 6728 kfree(net->ipv6.ip6_null_entry); 6729#endif 6730out_fib6_null_entry: 6731 kfree(net->ipv6.fib6_null_entry); 6732out_ip6_dst_entries: 6733 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 6734out_ip6_dst_ops: 6735 goto out; 6736} 6737 6738static void __net_exit ip6_route_net_exit(struct net *net) 6739{ 6740 kfree(net->ipv6.fib6_null_entry); 6741 kfree(net->ipv6.ip6_null_entry); 6742#ifdef CONFIG_IPV6_MULTIPLE_TABLES 6743 kfree(net->ipv6.ip6_prohibit_entry); 6744 kfree(net->ipv6.ip6_blk_hole_entry); 6745#endif 6746 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 6747} 6748 6749static int __net_init ip6_route_net_init_late(struct net *net) 6750{ 6751#ifdef CONFIG_PROC_FS 6752 if (!proc_create_net("ipv6_route", 0, net->proc_net, 6753 &ipv6_route_seq_ops, 6754 sizeof(struct ipv6_route_iter))) 6755 return -ENOMEM; 6756 6757 if (!proc_create_net_single("rt6_stats", 0444, net->proc_net, 6758 rt6_stats_seq_show, NULL)) { 6759 remove_proc_entry("ipv6_route", net->proc_net); 6760 return -ENOMEM; 6761 } 6762#endif 6763 return 0; 6764} 6765 6766static void __net_exit ip6_route_net_exit_late(struct net *net) 6767{ 6768#ifdef CONFIG_PROC_FS 6769 remove_proc_entry("ipv6_route", net->proc_net); 6770 remove_proc_entry("rt6_stats", net->proc_net); 6771#endif 6772} 6773 6774static struct pernet_operations ip6_route_net_ops = { 6775 .init = ip6_route_net_init, 6776 .exit = ip6_route_net_exit, 6777}; 6778 6779static int __net_init ipv6_inetpeer_init(struct net *net) 6780{ 6781 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 6782 6783 if (!bp) 6784 return -ENOMEM; 6785 inet_peer_base_init(bp); 6786 net->ipv6.peers = bp; 6787 return 0; 6788} 6789 6790static void __net_exit ipv6_inetpeer_exit(struct net *net) 6791{ 6792 struct inet_peer_base *bp = net->ipv6.peers; 6793 6794 net->ipv6.peers = NULL; 6795 inetpeer_invalidate_tree(bp); 6796 kfree(bp); 6797} 6798 6799static struct pernet_operations ipv6_inetpeer_ops = { 6800 .init = ipv6_inetpeer_init, 6801 .exit = ipv6_inetpeer_exit, 6802}; 6803 6804static struct pernet_operations ip6_route_net_late_ops = { 6805 .init = ip6_route_net_init_late, 6806 .exit = ip6_route_net_exit_late, 6807}; 6808 6809static struct notifier_block ip6_route_dev_notifier = { 6810 .notifier_call = ip6_route_dev_notify, 6811 .priority = ADDRCONF_NOTIFY_PRIORITY - 10, 6812}; 6813 6814void __init ip6_route_init_special_entries(void) 6815{ 6816 /* Registering of the loopback is done before this portion of code, 6817 * the loopback reference in rt6_info will not be taken, do it 6818 * manually for init_net */ 6819 init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev; 6820 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 6821 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 6822 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 6823 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 6824 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 6825 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 6826 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 6827 #endif 6828} 6829 6830#if IS_BUILTIN(CONFIG_IPV6) 6831#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 6832DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt) 6833 6834BTF_ID_LIST_SINGLE(btf_fib6_info_id, struct, fib6_info) 6835 6836static const struct bpf_iter_seq_info ipv6_route_seq_info = { 6837 .seq_ops = &ipv6_route_seq_ops, 6838 .init_seq_private = bpf_iter_init_seq_net, 6839 .fini_seq_private = bpf_iter_fini_seq_net, 6840 .seq_priv_size = sizeof(struct ipv6_route_iter), 6841}; 6842 6843static struct bpf_iter_reg ipv6_route_reg_info = { 6844 .target = "ipv6_route", 6845 .ctx_arg_info_size = 1, 6846 .ctx_arg_info = { 6847 { offsetof(struct bpf_iter__ipv6_route, rt), 6848 PTR_TO_BTF_ID_OR_NULL }, 6849 }, 6850 .seq_info = &ipv6_route_seq_info, 6851}; 6852 6853static int __init bpf_iter_register(void) 6854{ 6855 ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id; 6856 return bpf_iter_reg_target(&ipv6_route_reg_info); 6857} 6858 6859static void bpf_iter_unregister(void) 6860{ 6861 bpf_iter_unreg_target(&ipv6_route_reg_info); 6862} 6863#endif 6864#endif 6865 6866static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = { 6867 {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE, 6868 .doit = inet6_rtm_newroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, 6869 {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE, 6870 .doit = inet6_rtm_delroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, 6871 {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE, 6872 .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, 6873}; 6874 6875int __init ip6_route_init(void) 6876{ 6877 int ret; 6878 int cpu; 6879 6880 ret = -ENOMEM; 6881 ip6_dst_ops_template.kmem_cachep = 6882 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 6883 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); 6884 if (!ip6_dst_ops_template.kmem_cachep) 6885 goto out; 6886 6887 ret = dst_entries_init(&ip6_dst_blackhole_ops); 6888 if (ret) 6889 goto out_kmem_cache; 6890 6891 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 6892 if (ret) 6893 goto out_dst_entries; 6894 6895 ret = register_pernet_subsys(&ip6_route_net_ops); 6896 if (ret) 6897 goto out_register_inetpeer; 6898 6899 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 6900 6901 ret = fib6_init(); 6902 if (ret) 6903 goto out_register_subsys; 6904 6905 ret = xfrm6_init(); 6906 if (ret) 6907 goto out_fib6_init; 6908 6909 ret = fib6_rules_init(); 6910 if (ret) 6911 goto xfrm6_init; 6912 6913 ret = register_pernet_subsys(&ip6_route_net_late_ops); 6914 if (ret) 6915 goto fib6_rules_init; 6916 6917 ret = rtnl_register_many(ip6_route_rtnl_msg_handlers); 6918 if (ret < 0) 6919 goto out_register_late_subsys; 6920 6921 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 6922 if (ret) 6923 goto out_register_late_subsys; 6924 6925#if IS_BUILTIN(CONFIG_IPV6) 6926#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 6927 ret = bpf_iter_register(); 6928 if (ret) 6929 goto out_register_late_subsys; 6930#endif 6931#endif 6932 6933 for_each_possible_cpu(cpu) { 6934 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 6935 6936 INIT_LIST_HEAD(&ul->head); 6937 spin_lock_init(&ul->lock); 6938 } 6939 6940out: 6941 return ret; 6942 6943out_register_late_subsys: 6944 rtnl_unregister_all(PF_INET6); 6945 unregister_pernet_subsys(&ip6_route_net_late_ops); 6946fib6_rules_init: 6947 fib6_rules_cleanup(); 6948xfrm6_init: 6949 xfrm6_fini(); 6950out_fib6_init: 6951 fib6_gc_cleanup(); 6952out_register_subsys: 6953 unregister_pernet_subsys(&ip6_route_net_ops); 6954out_register_inetpeer: 6955 unregister_pernet_subsys(&ipv6_inetpeer_ops); 6956out_dst_entries: 6957 dst_entries_destroy(&ip6_dst_blackhole_ops); 6958out_kmem_cache: 6959 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 6960 goto out; 6961} 6962 6963void ip6_route_cleanup(void) 6964{ 6965#if IS_BUILTIN(CONFIG_IPV6) 6966#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 6967 bpf_iter_unregister(); 6968#endif 6969#endif 6970 unregister_netdevice_notifier(&ip6_route_dev_notifier); 6971 unregister_pernet_subsys(&ip6_route_net_late_ops); 6972 fib6_rules_cleanup(); 6973 xfrm6_fini(); 6974 fib6_gc_cleanup(); 6975 unregister_pernet_subsys(&ipv6_inetpeer_ops); 6976 unregister_pernet_subsys(&ip6_route_net_ops); 6977 dst_entries_destroy(&ip6_dst_blackhole_ops); 6978 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 6979}