Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv6: do not overwrite inetpeer metrics prematurely

If an IPv6 host route with metrics exists, an attempt to add a
new route for the same target with different metrics fails but
rewrites the metrics anyway:

12sp0:~ # ip route add fec0::1 dev eth0 rto_min 1000
12sp0:~ # ip -6 route show
fe80::/64 dev eth0 proto kernel metric 256
fec0::1 dev eth0 metric 1024 rto_min lock 1s
12sp0:~ # ip route add fec0::1 dev eth0 rto_min 1500
RTNETLINK answers: File exists
12sp0:~ # ip -6 route show
fe80::/64 dev eth0 proto kernel metric 256
fec0::1 dev eth0 metric 1024 rto_min lock 1.5s

This is caused by all IPv6 host routes using the metrics in
their inetpeer (or the shared default). This also holds for the
new route created in ip6_route_add() which shares the metrics
with the already existing route and thus ip6_route_add()
rewrites the metrics even if the new route ends up not being
used at all.

Another problem is that old metrics in inetpeer can reappear
unexpectedly for a new route, e.g.

12sp0:~ # ip route add fec0::1 dev eth0 rto_min 1000
12sp0:~ # ip route del fec0::1
12sp0:~ # ip route add fec0::1 dev eth0
12sp0:~ # ip route change fec0::1 dev eth0 hoplimit 10
12sp0:~ # ip -6 route show
fe80::/64 dev eth0 proto kernel metric 256
fec0::1 dev eth0 metric 1024 hoplimit 10 rto_min lock 1s

Resolve the first problem by moving the setting of metrics down
into fib6_add_rt2node() to the point we are sure we are
inserting the new route into the tree. Second problem is
addressed by introducing new flag DST_METRICS_FORCE_OVERWRITE
which is set for a new host route in ip6_route_add() and makes
ipv6_cow_metrics() always overwrite the metrics in inetpeer
(even if they are not "new"); it is reset after that.

v5: use a flag in _metrics member rather than one in flags

v4: fix a typo making a condition always true (thanks to Hannes
Frederic Sowa)

v3: rewritten based on David Miller's idea to move setting the
metrics (and allocation in non-host case) down to the point we
already know the route is to be inserted. Also rebased to
net-next as it is quite late in the cycle.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Michal Kubeček and committed by
David S. Miller
e5fd387a 4ec54f95

+66 -39
+9 -2
include/net/dst.h
··· 108 108 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); 109 109 extern const u32 dst_default_metrics[]; 110 110 111 - #define DST_METRICS_READ_ONLY 0x1UL 111 + #define DST_METRICS_READ_ONLY 0x1UL 112 + #define DST_METRICS_FORCE_OVERWRITE 0x2UL 113 + #define DST_METRICS_FLAGS 0x3UL 112 114 #define __DST_METRICS_PTR(Y) \ 113 - ((u32 *)((Y) & ~DST_METRICS_READ_ONLY)) 115 + ((u32 *)((Y) & ~DST_METRICS_FLAGS)) 114 116 #define DST_METRICS_PTR(X) __DST_METRICS_PTR((X)->_metrics) 115 117 116 118 static inline bool dst_metrics_read_only(const struct dst_entry *dst) 117 119 { 118 120 return dst->_metrics & DST_METRICS_READ_ONLY; 121 + } 122 + 123 + static inline void dst_metrics_set_force_overwrite(struct dst_entry *dst) 124 + { 125 + dst->_metrics |= DST_METRICS_FORCE_OVERWRITE; 119 126 } 120 127 121 128 void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old);
+2 -1
include/net/ip6_fib.h
··· 284 284 void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), 285 285 void *arg); 286 286 287 - int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info); 287 + int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, 288 + struct nlattr *mx, int mx_len); 288 289 289 290 int fib6_del(struct rt6_info *rt, struct nl_info *info); 290 291
+44 -3
net/ipv6/ip6_fib.c
··· 638 638 RTF_GATEWAY; 639 639 } 640 640 641 + static int fib6_commit_metrics(struct dst_entry *dst, 642 + struct nlattr *mx, int mx_len) 643 + { 644 + struct nlattr *nla; 645 + int remaining; 646 + u32 *mp; 647 + 648 + if (dst->flags & DST_HOST) { 649 + mp = dst_metrics_write_ptr(dst); 650 + } else { 651 + mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 652 + if (!mp) 653 + return -ENOMEM; 654 + dst_init_metrics(dst, mp, 0); 655 + } 656 + 657 + nla_for_each_attr(nla, mx, mx_len, remaining) { 658 + int type = nla_type(nla); 659 + 660 + if (type) { 661 + if (type > RTAX_MAX) 662 + return -EINVAL; 663 + 664 + mp[type - 1] = nla_get_u32(nla); 665 + } 666 + } 667 + return 0; 668 + } 669 + 641 670 /* 642 671 * Insert routing information in a node. 643 672 */ 644 673 645 674 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, 646 - struct nl_info *info) 675 + struct nl_info *info, struct nlattr *mx, int mx_len) 647 676 { 648 677 struct rt6_info *iter = NULL; 649 678 struct rt6_info **ins; ··· 682 653 (info->nlh->nlmsg_flags & NLM_F_CREATE)); 683 654 int found = 0; 684 655 bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); 656 + int err; 685 657 686 658 ins = &fn->leaf; 687 659 ··· 781 751 pr_warn("NLM_F_CREATE should be set when creating new route\n"); 782 752 783 753 add: 754 + if (mx) { 755 + err = fib6_commit_metrics(&rt->dst, mx, mx_len); 756 + if (err) 757 + return err; 758 + } 784 759 rt->dst.rt6_next = iter; 785 760 *ins = rt; 786 761 rt->rt6i_node = fn; ··· 804 769 goto add; 805 770 pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); 806 771 return -ENOENT; 772 + } 773 + if (mx) { 774 + err = fib6_commit_metrics(&rt->dst, mx, mx_len); 775 + if (err) 776 + return err; 807 777 } 808 778 *ins = rt; 809 779 rt->rt6i_node = fn; ··· 846 806 * with source addr info in sub-trees 847 807 */ 848 808 849 - int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) 809 + int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, 810 + struct nlattr *mx, int mx_len) 850 811 { 851 812 struct fib6_node *fn, *pn = NULL; 852 813 int err = -ENOMEM; ··· 941 900 } 942 901 #endif 943 902 944 - err = fib6_add_rt2node(fn, rt, info); 903 + err = fib6_add_rt2node(fn, rt, info, mx, mx_len); 945 904 if (!err) { 946 905 fib6_start_gc(info->nl_net, rt); 947 906 if (!(rt->rt6i_flags & RTF_CACHE))
+11 -33
net/ipv6/route.c
··· 149 149 unsigned long prev, new; 150 150 151 151 p = peer->metrics; 152 - if (inet_metrics_new(peer)) 152 + if (inet_metrics_new(peer) || 153 + (old & DST_METRICS_FORCE_OVERWRITE)) 153 154 memcpy(p, old_p, sizeof(u32) * RTAX_MAX); 154 155 155 156 new = (unsigned long) p; ··· 858 857 be destroyed. 859 858 */ 860 859 861 - static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 860 + static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, 861 + struct nlattr *mx, int mx_len) 862 862 { 863 863 int err; 864 864 struct fib6_table *table; 865 865 866 866 table = rt->rt6i_table; 867 867 write_lock_bh(&table->tb6_lock); 868 - err = fib6_add(&table->tb6_root, rt, info); 868 + err = fib6_add(&table->tb6_root, rt, info, mx, mx_len); 869 869 write_unlock_bh(&table->tb6_lock); 870 870 871 871 return err; ··· 877 875 struct nl_info info = { 878 876 .nl_net = dev_net(rt->dst.dev), 879 877 }; 880 - return __ip6_ins_rt(rt, &info); 878 + return __ip6_ins_rt(rt, &info, NULL, 0); 881 879 } 882 880 883 881 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, ··· 1545 1543 1546 1544 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1547 1545 rt->rt6i_dst.plen = cfg->fc_dst_len; 1548 - if (rt->rt6i_dst.plen == 128) 1549 - rt->dst.flags |= DST_HOST; 1550 - 1551 - if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { 1552 - u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 1553 - if (!metrics) { 1554 - err = -ENOMEM; 1555 - goto out; 1556 - } 1557 - dst_init_metrics(&rt->dst, metrics, 0); 1546 + if (rt->rt6i_dst.plen == 128) { 1547 + rt->dst.flags |= DST_HOST; 1548 + dst_metrics_set_force_overwrite(&rt->dst); 1558 1549 } 1550 + 1559 1551 #ifdef CONFIG_IPV6_SUBTREES 1560 1552 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1561 1553 rt->rt6i_src.plen = cfg->fc_src_len; ··· 1668 1672 rt->rt6i_flags = cfg->fc_flags; 1669 1673 1670 1674 install_route: 1671 - if (cfg->fc_mx) { 1672 - struct nlattr *nla; 1673 - int remaining; 1674 - 1675 - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1676 - int type = nla_type(nla); 1677 - 1678 - if (type) { 1679 - if (type > RTAX_MAX) { 1680 - err = -EINVAL; 1681 - goto out; 1682 - } 1683 - 1684 - dst_metric_set(&rt->dst, type, nla_get_u32(nla)); 1685 - } 1686 - } 1687 - } 1688 - 1689 1675 rt->dst.dev = dev; 1690 1676 rt->rt6i_idev = idev; 1691 1677 rt->rt6i_table = table; 1692 1678 1693 1679 cfg->fc_nlinfo.nl_net = dev_net(dev); 1694 1680 1695 - return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1681 + return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len); 1696 1682 1697 1683 out: 1698 1684 if (dev)