Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'ipv6-Refactor-nexthop-selection-helpers-during-a-fib-lookup'

David Ahern says:

====================
ipv6: Refactor nexthop selection helpers during a fib lookup

IPv6 has a fib6_nh embedded within each fib6_info and a separate
fib6_info for each path in a multipath route. A side effect is that
a fib6_info is passed all the way down the stack when selecting a path
on a fib lookup. Refactor the fib lookup functions and associated
helper functions to take a fib6_nh when appropriate to enable IPv6
to work with nexthop objects where the fib6_nh is not directly part
of a fib entry.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+151 -127
+4 -4
include/net/ip6_fib.h
··· 127 127 128 128 struct fib6_nh { 129 129 struct fib_nh_common nh_common; 130 + 131 + #ifdef CONFIG_IPV6_ROUTER_PREF 132 + unsigned long last_probe; 133 + #endif 130 134 }; 131 135 132 136 struct fib6_info { ··· 158 154 159 155 struct rt6_info * __percpu *rt6i_pcpu; 160 156 struct rt6_exception_bucket __rcu *rt6i_exception_bucket; 161 - 162 - #ifdef CONFIG_IPV6_ROUTER_PREF 163 - unsigned long last_probe; 164 - #endif 165 157 166 158 u32 fib6_metric; 167 159 u8 fib6_protocol;
+147 -123
net/ipv6/route.c
··· 102 102 struct sk_buff *skb, u32 mtu); 103 103 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 104 104 struct sk_buff *skb); 105 - static int rt6_score_route(struct fib6_info *rt, int oif, int strict); 105 + static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, 106 + int strict); 106 107 static size_t rt6_nlmsg_size(struct fib6_info *rt); 107 108 static int rt6_fill_node(struct net *net, struct sk_buff *skb, 108 109 struct fib6_info *rt, struct dst_entry *dst, ··· 447 446 448 447 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, 449 448 fib6_siblings) { 449 + const struct fib6_nh *nh = &sibling->fib6_nh; 450 450 int nh_upper_bound; 451 451 452 - nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound); 452 + nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); 453 453 if (fl6->mp_hash > nh_upper_bound) 454 454 continue; 455 - if (rt6_score_route(sibling, oif, strict) < 0) 455 + if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0) 456 456 break; 457 457 match = sibling; 458 458 break; ··· 466 464 * Route lookup. rcu_read_lock() should be held. 467 465 */ 468 466 467 + static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh, 468 + const struct in6_addr *saddr, int oif, int flags) 469 + { 470 + const struct net_device *dev; 471 + 472 + if (nh->fib_nh_flags & RTNH_F_DEAD) 473 + return false; 474 + 475 + dev = nh->fib_nh_dev; 476 + if (oif) { 477 + if (dev->ifindex == oif) 478 + return true; 479 + } else { 480 + if (ipv6_chk_addr(net, saddr, dev, 481 + flags & RT6_LOOKUP_F_IFACE)) 482 + return true; 483 + } 484 + 485 + return false; 486 + } 487 + 469 488 static inline struct fib6_info *rt6_device_match(struct net *net, 470 489 struct fib6_info *rt, 471 490 const struct in6_addr *saddr, 472 491 int oif, 473 492 int flags) 474 493 { 494 + const struct fib6_nh *nh; 475 495 struct fib6_info *sprt; 476 496 477 497 if (!oif && ipv6_addr_any(saddr) && ··· 501 477 return rt; 502 478 503 479 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) { 504 - const struct net_device *dev = sprt->fib6_nh.fib_nh_dev; 505 - 506 - if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) 507 - continue; 508 - 509 - if (oif) { 510 - if (dev->ifindex == oif) 511 - return sprt; 512 - } else { 513 - if (ipv6_chk_addr(net, saddr, dev, 514 - flags & RT6_LOOKUP_F_IFACE)) 515 - return sprt; 516 - } 480 + nh = &sprt->fib6_nh; 481 + if (__rt6_device_match(net, nh, saddr, oif, flags)) 482 + return sprt; 517 483 } 518 484 519 485 if (oif && flags & RT6_LOOKUP_F_IFACE) ··· 531 517 kfree(work); 532 518 } 533 519 534 - static void rt6_probe(struct fib6_info *rt) 520 + static void rt6_probe(struct fib6_nh *fib6_nh) 535 521 { 536 522 struct __rt6_probe_work *work = NULL; 537 523 const struct in6_addr *nh_gw; ··· 547 533 * Router Reachability Probe MUST be rate-limited 548 534 * to no more than one per minute. 549 535 */ 550 - if (!rt || !rt->fib6_nh.fib_nh_gw_family) 536 + if (fib6_nh->fib_nh_gw_family) 551 537 return; 552 538 553 - nh_gw = &rt->fib6_nh.fib_nh_gw6; 554 - dev = rt->fib6_nh.fib_nh_dev; 539 + nh_gw = &fib6_nh->fib_nh_gw6; 540 + dev = fib6_nh->fib_nh_dev; 555 541 rcu_read_lock_bh(); 556 542 idev = __in6_dev_get(dev); 557 543 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); ··· 568 554 __neigh_set_probe_once(neigh); 569 555 } 570 556 write_unlock(&neigh->lock); 571 - } else if (time_after(jiffies, rt->last_probe + 557 + } else if (time_after(jiffies, fib6_nh->last_probe + 572 558 idev->cnf.rtr_probe_interval)) { 573 559 work = kmalloc(sizeof(*work), GFP_ATOMIC); 574 560 } 575 561 576 562 if (work) { 577 - rt->last_probe = jiffies; 563 + fib6_nh->last_probe = jiffies; 578 564 INIT_WORK(&work->work, rt6_probe_deferred); 579 565 work->target = *nh_gw; 580 566 dev_hold(dev); ··· 586 572 rcu_read_unlock_bh(); 587 573 } 588 574 #else 589 - static inline void rt6_probe(struct fib6_info *rt) 575 + static inline void rt6_probe(struct fib6_nh *fib6_nh) 590 576 { 591 577 } 592 578 #endif ··· 594 580 /* 595 581 * Default Router Selection (RFC 2461 6.3.6) 596 582 */ 597 - static inline int rt6_check_dev(struct fib6_info *rt, int oif) 598 - { 599 - const struct net_device *dev = rt->fib6_nh.fib_nh_dev; 600 - 601 - if (!oif || dev->ifindex == oif) 602 - return 2; 603 - return 0; 604 - } 605 - 606 - static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) 583 + static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh) 607 584 { 608 585 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 609 586 struct neighbour *neigh; 610 587 611 - if (rt->fib6_flags & RTF_NONEXTHOP || 612 - !rt->fib6_nh.fib_nh_gw_family) 613 - return RT6_NUD_SUCCEED; 614 - 615 588 rcu_read_lock_bh(); 616 - neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.fib_nh_dev, 617 - &rt->fib6_nh.fib_nh_gw6); 589 + neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev, 590 + &fib6_nh->fib_nh_gw6); 618 591 if (neigh) { 619 592 read_lock(&neigh->lock); 620 593 if (neigh->nud_state & NUD_VALID) ··· 622 621 return ret; 623 622 } 624 623 625 - static int rt6_score_route(struct fib6_info *rt, int oif, int strict) 624 + static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, 625 + int strict) 626 626 { 627 - int m; 627 + int m = 0; 628 628 629 - m = rt6_check_dev(rt, oif); 629 + if (!oif || nh->fib_nh_dev->ifindex == oif) 630 + m = 2; 631 + 630 632 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 631 633 return RT6_NUD_FAIL_HARD; 632 634 #ifdef CONFIG_IPV6_ROUTER_PREF 633 - m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2; 635 + m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2; 634 636 #endif 635 - if (strict & RT6_LOOKUP_F_REACHABLE) { 636 - int n = rt6_check_neigh(rt); 637 + if ((strict & RT6_LOOKUP_F_REACHABLE) && 638 + !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) { 639 + int n = rt6_check_neigh(nh); 637 640 if (n < 0) 638 641 return n; 639 642 } 640 643 return m; 641 644 } 642 645 643 - static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, 644 - int *mpri, struct fib6_info *match, 645 - bool *do_rr) 646 + static bool find_match(struct fib6_nh *nh, u32 fib6_flags, 647 + int oif, int strict, int *mpri, bool *do_rr) 646 648 { 647 - int m; 648 649 bool match_do_rr = false; 650 + bool rc = false; 651 + int m; 649 652 650 - if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) 653 + if (nh->fib_nh_flags & RTNH_F_DEAD) 651 654 goto out; 652 655 653 - if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) && 654 - rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN && 656 + if (ip6_ignore_linkdown(nh->fib_nh_dev) && 657 + nh->fib_nh_flags & RTNH_F_LINKDOWN && 655 658 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) 656 659 goto out; 657 660 658 - if (fib6_check_expired(rt)) 659 - goto out; 660 - 661 - m = rt6_score_route(rt, oif, strict); 661 + m = rt6_score_route(nh, fib6_flags, oif, strict); 662 662 if (m == RT6_NUD_FAIL_DO_RR) { 663 663 match_do_rr = true; 664 664 m = 0; /* lowest valid score */ ··· 668 666 } 669 667 670 668 if (strict & RT6_LOOKUP_F_REACHABLE) 671 - rt6_probe(rt); 669 + rt6_probe(nh); 672 670 673 671 /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 674 672 if (m > *mpri) { 675 673 *do_rr = match_do_rr; 676 674 *mpri = m; 677 - match = rt; 675 + rc = true; 678 676 } 679 677 out: 680 - return match; 678 + return rc; 679 + } 680 + 681 + static void __find_rr_leaf(struct fib6_info *rt_start, 682 + struct fib6_info *nomatch, u32 metric, 683 + struct fib6_info **match, struct fib6_info **cont, 684 + int oif, int strict, bool *do_rr, int *mpri) 685 + { 686 + struct fib6_info *rt; 687 + 688 + for (rt = rt_start; 689 + rt && rt != nomatch; 690 + rt = rcu_dereference(rt->fib6_next)) { 691 + struct fib6_nh *nh; 692 + 693 + if (cont && rt->fib6_metric != metric) { 694 + *cont = rt; 695 + return; 696 + } 697 + 698 + if (fib6_check_expired(rt)) 699 + continue; 700 + 701 + nh = &rt->fib6_nh; 702 + if (find_match(nh, rt->fib6_flags, oif, strict, mpri, do_rr)) 703 + *match = rt; 704 + } 681 705 } 682 706 683 707 static struct fib6_info *find_rr_leaf(struct fib6_node *fn, 684 - struct fib6_info *leaf, 685 - struct fib6_info *rr_head, 686 - u32 metric, int oif, int strict, 687 - bool *do_rr) 708 + struct fib6_info *leaf, 709 + struct fib6_info *rr_head, 710 + u32 metric, int oif, int strict, 711 + bool *do_rr) 688 712 { 689 - struct fib6_info *rt, *match, *cont; 713 + struct fib6_info *match = NULL, *cont = NULL; 690 714 int mpri = -1; 691 715 692 - match = NULL; 693 - cont = NULL; 694 - for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) { 695 - if (rt->fib6_metric != metric) { 696 - cont = rt; 697 - break; 698 - } 716 + __find_rr_leaf(rr_head, NULL, metric, &match, &cont, 717 + oif, strict, do_rr, &mpri); 699 718 700 - match = find_match(rt, oif, strict, &mpri, match, do_rr); 701 - } 702 - 703 - for (rt = leaf; rt && rt != rr_head; 704 - rt = rcu_dereference(rt->fib6_next)) { 705 - if (rt->fib6_metric != metric) { 706 - cont = rt; 707 - break; 708 - } 709 - 710 - match = find_match(rt, oif, strict, &mpri, match, do_rr); 711 - } 719 + __find_rr_leaf(leaf, rr_head, metric, &match, &cont, 720 + oif, strict, do_rr, &mpri); 712 721 713 722 if (match || !cont) 714 723 return match; 715 724 716 - for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next)) 717 - match = find_match(rt, oif, strict, &mpri, match, do_rr); 725 + __find_rr_leaf(cont, NULL, metric, &match, NULL, 726 + oif, strict, do_rr, &mpri); 718 727 719 728 return match; 720 729 } ··· 1074 1061 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1075 1062 restart: 1076 1063 f6i = rcu_dereference(fn->leaf); 1077 - if (!f6i) { 1064 + if (!f6i) 1078 1065 f6i = net->ipv6.fib6_null_entry; 1079 - } else { 1066 + else 1080 1067 f6i = rt6_device_match(net, f6i, &fl6->saddr, 1081 1068 fl6->flowi6_oif, flags); 1082 - if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0) 1083 - f6i = fib6_multipath_select(net, f6i, fl6, 1084 - fl6->flowi6_oif, skb, 1085 - flags); 1086 - } 1069 + 1087 1070 if (f6i == net->ipv6.fib6_null_entry) { 1088 1071 fn = fib6_backtrack(fn, &fl6->saddr); 1089 1072 if (fn) 1090 1073 goto restart; 1074 + 1075 + rt = net->ipv6.ip6_null_entry; 1076 + dst_hold(&rt->dst); 1077 + goto out; 1091 1078 } 1092 1079 1093 - trace_fib6_table_lookup(net, f6i, table, fl6); 1094 - 1080 + if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0) 1081 + f6i = fib6_multipath_select(net, f6i, fl6, fl6->flowi6_oif, skb, 1082 + flags); 1095 1083 /* Search through exception table */ 1096 1084 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); 1097 1085 if (rt) { 1098 1086 if (ip6_hold_safe(net, &rt)) 1099 1087 dst_use_noref(&rt->dst, jiffies); 1100 - } else if (f6i == net->ipv6.fib6_null_entry) { 1101 - rt = net->ipv6.ip6_null_entry; 1102 - dst_hold(&rt->dst); 1103 1088 } else { 1104 1089 rt = ip6_create_rt_rcu(f6i); 1105 1090 } 1091 + 1092 + out: 1093 + trace_fib6_table_lookup(net, f6i, table, fl6); 1106 1094 1107 1095 rcu_read_unlock(); 1108 1096 ··· 1855 1841 rcu_read_lock(); 1856 1842 1857 1843 f6i = fib6_table_lookup(net, table, oif, fl6, strict); 1858 - if (f6i->fib6_nsiblings) 1859 - f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict); 1860 - 1861 1844 if (f6i == net->ipv6.fib6_null_entry) { 1862 1845 rt = net->ipv6.ip6_null_entry; 1863 1846 rcu_read_unlock(); 1864 1847 dst_hold(&rt->dst); 1865 1848 return rt; 1866 1849 } 1850 + 1851 + if (f6i->fib6_nsiblings) 1852 + f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict); 1867 1853 1868 1854 /*Search through exception table */ 1869 1855 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); ··· 2407 2393 NULL); 2408 2394 } 2409 2395 2396 + static bool ip6_redirect_nh_match(struct fib6_info *f6i, 2397 + struct fib6_nh *nh, 2398 + struct flowi6 *fl6, 2399 + const struct in6_addr *gw, 2400 + struct rt6_info **ret) 2401 + { 2402 + if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family || 2403 + fl6->flowi6_oif != nh->fib_nh_dev->ifindex) 2404 + return false; 2405 + 2406 + /* rt_cache's gateway might be different from its 'parent' 2407 + * in the case of an ip redirect. 2408 + * So we keep searching in the exception table if the gateway 2409 + * is different. 2410 + */ 2411 + if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) { 2412 + struct rt6_info *rt_cache; 2413 + 2414 + rt_cache = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); 2415 + if (rt_cache && 2416 + ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) { 2417 + *ret = rt_cache; 2418 + return true; 2419 + } 2420 + return false; 2421 + } 2422 + return true; 2423 + } 2424 + 2410 2425 /* Handle redirects */ 2411 2426 struct ip6rd_flowi { 2412 2427 struct flowi6 fl6; ··· 2449 2406 int flags) 2450 2407 { 2451 2408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 2452 - struct rt6_info *ret = NULL, *rt_cache; 2409 + struct rt6_info *ret = NULL; 2453 2410 struct fib6_info *rt; 2454 2411 struct fib6_node *fn; 2455 2412 ··· 2467 2424 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 2468 2425 restart: 2469 2426 for_each_fib6_node_rt_rcu(fn) { 2470 - if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) 2471 - continue; 2472 2427 if (fib6_check_expired(rt)) 2473 2428 continue; 2474 2429 if (rt->fib6_flags & RTF_REJECT) 2475 2430 break; 2476 - if (!rt->fib6_nh.fib_nh_gw_family) 2477 - continue; 2478 2431 if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex) 2479 2432 continue; 2480 - /* rt_cache's gateway might be different from its 'parent' 2481 - * in the case of an ip redirect. 2482 - * So we keep searching in the exception table if the gateway 2483 - * is different. 2484 - */ 2485 - if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) { 2486 - rt_cache = rt6_find_cached_rt(rt, 2487 - &fl6->daddr, 2488 - &fl6->saddr); 2489 - if (rt_cache && 2490 - ipv6_addr_equal(&rdfl->gateway, 2491 - &rt_cache->rt6i_gateway)) { 2492 - ret = rt_cache; 2493 - break; 2494 - } 2495 - continue; 2496 - } 2497 - break; 2433 + if (ip6_redirect_nh_match(rt, &rt->fib6_nh, fl6, 2434 + &rdfl->gateway, &ret)) 2435 + goto out; 2498 2436 } 2499 2437 2500 2438 if (!rt)