Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net-Kernel-side-filtering-for-route-dumps'

David Ahern says:

====================
net: Kernel side filtering for route dumps

Implement kernel side filtering of route dumps by protocol (e.g., which
routing daemon installed the route), route type (e.g., unicast), table
id and nexthop device.

iproute2 has been doing this filtering in userspace for years; pushing
the filters to the kernel side reduces the amount of data the kernel
sends and reduces wasted cycles on both sides processing unwanted data.
These initial options provide a huge improvement for efficiently
examining routes on large scale systems.

v2
- better handling of requests for a specific table. Rather than walking
the hash of all tables, lookup the specific table and dump it
- refactor mr_rtm_dumproute moving the loop over the table into a
helper that can be invoked directly
- add hook to return NLM_F_DUMP_FILTERED in DONE message to ensure
it is returned even when the dump returns nothing
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+386 -94
+9 -2
include/linux/mroute_base.h
··· 7 7 #include <net/net_namespace.h> 8 8 #include <net/sock.h> 9 9 #include <net/fib_notifier.h> 10 + #include <net/ip_fib.h> 10 11 11 12 /** 12 13 * struct vif_device - interface representor for multicast routing ··· 284 283 285 284 int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 286 285 struct mr_mfc *c, struct rtmsg *rtm); 286 + int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, 287 + struct netlink_callback *cb, 288 + int (*fill)(struct mr_table *mrt, struct sk_buff *skb, 289 + u32 portid, u32 seq, struct mr_mfc *c, 290 + int cmd, int flags), 291 + spinlock_t *lock, struct fib_dump_filter *filter); 287 292 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, 288 293 struct mr_table *(*iter)(struct net *net, 289 294 struct mr_table *mrt), ··· 297 290 struct sk_buff *skb, 298 291 u32 portid, u32 seq, struct mr_mfc *c, 299 292 int cmd, int flags), 300 - spinlock_t *lock); 293 + spinlock_t *lock, struct fib_dump_filter *filter); 301 294 302 295 int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, 303 296 int (*rules_dump)(struct net *net, ··· 347 340 struct sk_buff *skb, 348 341 u32 portid, u32 seq, struct mr_mfc *c, 349 342 int cmd, int flags), 350 - spinlock_t *lock) 343 + spinlock_t *lock, struct fib_dump_filter *filter) 351 344 { 352 345 return -EINVAL; 353 346 }
+1
include/linux/netlink.h
··· 180 180 u16 family; 181 181 u16 min_dump_alloc; 182 182 bool strict_check; 183 + u16 answer_flags; 183 184 unsigned int prev_seq, seq; 184 185 long args[6]; 185 186 };
+1
include/net/ip6_route.h
··· 174 174 struct sk_buff *skb; 175 175 struct netlink_callback *cb; 176 176 struct net *net; 177 + struct fib_dump_filter filter; 177 178 }; 178 179 179 180 int rt6_dump_route(struct fib6_info *f6i, void *p_arg);
+14 -3
include/net/ip_fib.h
··· 222 222 unsigned long __data[0]; 223 223 }; 224 224 225 + struct fib_dump_filter { 226 + u32 table_id; 227 + /* filter_set is an optimization that an entry is set */ 228 + bool filter_set; 229 + unsigned char protocol; 230 + unsigned char rt_type; 231 + unsigned int flags; 232 + struct net_device *dev; 233 + }; 234 + 225 235 int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, 226 236 struct fib_result *res, int fib_flags); 227 237 int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, ··· 239 229 int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, 240 230 struct netlink_ext_ack *extack); 241 231 int fib_table_dump(struct fib_table *table, struct sk_buff *skb, 242 - struct netlink_callback *cb); 232 + struct netlink_callback *cb, struct fib_dump_filter *filter); 243 233 int fib_table_flush(struct net *net, struct fib_table *table); 244 234 struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); 245 235 void fib_table_flush_external(struct fib_table *table); ··· 463 453 464 454 u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); 465 455 466 - int ip_valid_fib_dump_req(const struct nlmsghdr *nlh, 467 - struct netlink_ext_ack *extack); 456 + int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, 457 + struct fib_dump_filter *filter, 458 + struct netlink_callback *cb); 468 459 #endif /* _NET_FIB_H */
+65 -11
net/ipv4/fib_frontend.c
··· 802 802 return err; 803 803 } 804 804 805 - int ip_valid_fib_dump_req(const struct nlmsghdr *nlh, 806 - struct netlink_ext_ack *extack) 805 + int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, 806 + struct fib_dump_filter *filter, 807 + struct netlink_callback *cb) 807 808 { 809 + struct netlink_ext_ack *extack = cb->extack; 810 + struct nlattr *tb[RTA_MAX + 1]; 808 811 struct rtmsg *rtm; 812 + int err, i; 813 + 814 + ASSERT_RTNL(); 809 815 810 816 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { 811 817 NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request"); ··· 820 814 821 815 rtm = nlmsg_data(nlh); 822 816 if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || 823 - rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || 824 - rtm->rtm_type) { 817 + rtm->rtm_scope) { 825 818 NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request"); 826 819 return -EINVAL; 827 820 } ··· 829 824 return -EINVAL; 830 825 } 831 826 832 - if (nlmsg_attrlen(nlh, sizeof(*rtm))) { 833 - NL_SET_ERR_MSG(extack, "Invalid data after header in FIB dump request"); 834 - return -EINVAL; 827 + filter->flags = rtm->rtm_flags; 828 + filter->protocol = rtm->rtm_protocol; 829 + filter->rt_type = rtm->rtm_type; 830 + filter->table_id = rtm->rtm_table; 831 + 832 + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 833 + rtm_ipv4_policy, extack); 834 + if (err < 0) 835 + return err; 836 + 837 + for (i = 0; i <= RTA_MAX; ++i) { 838 + int ifindex; 839 + 840 + if (!tb[i]) 841 + continue; 842 + 843 + switch (i) { 844 + case RTA_TABLE: 845 + filter->table_id = nla_get_u32(tb[i]); 846 + break; 847 + case RTA_OIF: 848 + ifindex = nla_get_u32(tb[i]); 849 + filter->dev = __dev_get_by_index(net, ifindex); 850 + if (!filter->dev) 851 + return -ENODEV; 852 + break; 853 + default: 854 + NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); 855 + return -EINVAL; 856 + } 857 + } 858 + 859 + if (filter->flags || filter->protocol || filter->rt_type || 860 + filter->table_id || filter->dev) { 861 + filter->filter_set = 1; 862 + cb->answer_flags = NLM_F_DUMP_FILTERED; 835 863 } 836 864 837 865 return 0; ··· 875 837 { 876 838 const struct nlmsghdr *nlh = cb->nlh; 877 839 struct net *net = sock_net(skb->sk); 840 + struct fib_dump_filter filter = {}; 878 841 unsigned int h, s_h; 879 842 unsigned int e = 0, s_e; 880 843 struct fib_table *tb; ··· 883 844 int dumped = 0, err; 884 845 885 846 if (cb->strict_check) { 886 - err = ip_valid_fib_dump_req(nlh, cb->extack); 847 + err = ip_valid_fib_dump_req(net, nlh, &filter, cb); 887 848 if (err < 0) 888 849 return err; 850 + } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { 851 + struct rtmsg *rtm = nlmsg_data(nlh); 852 + 853 + filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED); 889 854 } 890 855 891 - if (nlmsg_len(nlh) >= sizeof(struct rtmsg) && 892 - ((struct rtmsg *)nlmsg_data(nlh))->rtm_flags & RTM_F_CLONED) 856 + /* fib entries are never clones and ipv4 does not use prefix flag */ 857 + if (filter.flags & (RTM_F_PREFIX | RTM_F_CLONED)) 893 858 return skb->len; 859 + 860 + if (filter.table_id) { 861 + tb = fib_get_table(net, filter.table_id); 862 + if (!tb) { 863 + NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); 864 + return -ENOENT; 865 + } 866 + 867 + err = fib_table_dump(tb, skb, cb, &filter); 868 + return skb->len ? : err; 869 + } 894 870 895 871 s_h = cb->args[0]; 896 872 s_e = cb->args[1]; ··· 921 867 if (dumped) 922 868 memset(&cb->args[2], 0, sizeof(cb->args) - 923 869 2 * sizeof(cb->args[0])); 924 - err = fib_table_dump(tb, skb, cb); 870 + err = fib_table_dump(tb, skb, cb, &filter); 925 871 if (err < 0) { 926 872 if (likely(skb->len)) 927 873 goto out;
+26 -11
net/ipv4/fib_trie.c
··· 2003 2003 } 2004 2004 2005 2005 static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, 2006 - struct sk_buff *skb, struct netlink_callback *cb) 2006 + struct sk_buff *skb, struct netlink_callback *cb, 2007 + struct fib_dump_filter *filter) 2007 2008 { 2009 + unsigned int flags = NLM_F_MULTI; 2008 2010 __be32 xkey = htonl(l->key); 2009 2011 struct fib_alias *fa; 2010 2012 int i, s_i; 2013 + 2014 + if (filter->filter_set) 2015 + flags |= NLM_F_DUMP_FILTERED; 2011 2016 2012 2017 s_i = cb->args[4]; 2013 2018 i = 0; ··· 2021 2016 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 2022 2017 int err; 2023 2018 2024 - if (i < s_i) { 2025 - i++; 2026 - continue; 2027 - } 2019 + if (i < s_i) 2020 + goto next; 2028 2021 2029 - if (tb->tb_id != fa->tb_id) { 2030 - i++; 2031 - continue; 2022 + if (tb->tb_id != fa->tb_id) 2023 + goto next; 2024 + 2025 + if (filter->filter_set) { 2026 + if (filter->rt_type && fa->fa_type != filter->rt_type) 2027 + goto next; 2028 + 2029 + if ((filter->protocol && 2030 + fa->fa_info->fib_protocol != filter->protocol)) 2031 + goto next; 2032 + 2033 + if (filter->dev && 2034 + !fib_info_nh_uses_dev(fa->fa_info, filter->dev)) 2035 + goto next; 2032 2036 } 2033 2037 2034 2038 err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, 2035 2039 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2036 2040 tb->tb_id, fa->fa_type, 2037 2041 xkey, KEYLENGTH - fa->fa_slen, 2038 - fa->fa_tos, fa->fa_info, NLM_F_MULTI); 2042 + fa->fa_tos, fa->fa_info, flags); 2039 2043 if (err < 0) { 2040 2044 cb->args[4] = i; 2041 2045 return err; 2042 2046 } 2047 + next: 2043 2048 i++; 2044 2049 } 2045 2050 ··· 2059 2044 2060 2045 /* rcu_read_lock needs to be hold by caller from readside */ 2061 2046 int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 2062 - struct netlink_callback *cb) 2047 + struct netlink_callback *cb, struct fib_dump_filter *filter) 2063 2048 { 2064 2049 struct trie *t = (struct trie *)tb->tb_data; 2065 2050 struct key_vector *l, *tp = t->kv; ··· 2072 2057 while ((l = leaf_walk_rcu(&tp, key)) != NULL) { 2073 2058 int err; 2074 2059 2075 - err = fn_trie_dump_leaf(l, tb, skb, cb); 2060 + err = fn_trie_dump_leaf(l, tb, skb, cb, filter); 2076 2061 if (err < 0) { 2077 2062 cb->args[3] = key; 2078 2063 cb->args[2] = count;
+19 -3
net/ipv4/ipmr.c
··· 2527 2527 2528 2528 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2529 2529 { 2530 - if (cb->strict_check) { 2531 - int err = ip_valid_fib_dump_req(cb->nlh, cb->extack); 2530 + struct fib_dump_filter filter = {}; 2531 + int err; 2532 2532 2533 + if (cb->strict_check) { 2534 + err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, 2535 + &filter, cb); 2533 2536 if (err < 0) 2534 2537 return err; 2535 2538 } 2536 2539 2540 + if (filter.table_id) { 2541 + struct mr_table *mrt; 2542 + 2543 + mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); 2544 + if (!mrt) { 2545 + NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); 2546 + return -ENOENT; 2547 + } 2548 + err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, 2549 + &mfc_unres_lock, &filter); 2550 + return skb->len ? : err; 2551 + } 2552 + 2537 2553 return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, 2538 - _ipmr_fill_mroute, &mfc_unres_lock); 2554 + _ipmr_fill_mroute, &mfc_unres_lock, &filter); 2539 2555 } 2540 2556 2541 2557 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
+92 -33
net/ipv4/ipmr_base.c
··· 268 268 } 269 269 EXPORT_SYMBOL(mr_fill_mroute); 270 270 271 + static bool mr_mfc_uses_dev(const struct mr_table *mrt, 272 + const struct mr_mfc *c, 273 + const struct net_device *dev) 274 + { 275 + int ct; 276 + 277 + for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 278 + if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 279 + const struct vif_device *vif; 280 + 281 + vif = &mrt->vif_table[ct]; 282 + if (vif->dev == dev) 283 + return true; 284 + } 285 + } 286 + return false; 287 + } 288 + 289 + int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, 290 + struct netlink_callback *cb, 291 + int (*fill)(struct mr_table *mrt, struct sk_buff *skb, 292 + u32 portid, u32 seq, struct mr_mfc *c, 293 + int cmd, int flags), 294 + spinlock_t *lock, struct fib_dump_filter *filter) 295 + { 296 + unsigned int e = 0, s_e = cb->args[1]; 297 + unsigned int flags = NLM_F_MULTI; 298 + struct mr_mfc *mfc; 299 + int err; 300 + 301 + if (filter->filter_set) 302 + flags |= NLM_F_DUMP_FILTERED; 303 + 304 + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { 305 + if (e < s_e) 306 + goto next_entry; 307 + if (filter->dev && 308 + !mr_mfc_uses_dev(mrt, mfc, filter->dev)) 309 + goto next_entry; 310 + 311 + err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, 312 + cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); 313 + if (err < 0) 314 + goto out; 315 + next_entry: 316 + e++; 317 + } 318 + e = 0; 319 + s_e = 0; 320 + 321 + spin_lock_bh(lock); 322 + list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { 323 + if (e < s_e) 324 + goto next_entry2; 325 + if (filter->dev && 326 + !mr_mfc_uses_dev(mrt, mfc, filter->dev)) 327 + goto next_entry2; 328 + 329 + err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, 330 + cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); 331 + if (err < 0) { 332 + spin_unlock_bh(lock); 333 + goto out; 334 + } 335 + next_entry2: 336 + e++; 337 + } 338 + spin_unlock_bh(lock); 339 + err = 0; 340 + e = 0; 341 + 342 + out: 343 + cb->args[1] = e; 344 + return err; 345 + } 346 + EXPORT_SYMBOL(mr_table_dump); 347 + 271 348 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, 272 349 struct mr_table *(*iter)(struct net *net, 273 350 struct mr_table *mrt), ··· 352 275 struct sk_buff *skb, 353 276 u32 portid, u32 seq, struct mr_mfc *c, 354 277 int cmd, int flags), 355 - spinlock_t *lock) 278 + spinlock_t *lock, struct fib_dump_filter *filter) 356 279 { 357 - unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1]; 280 + unsigned int t = 0, s_t = cb->args[0]; 358 281 struct net *net = sock_net(skb->sk); 359 282 struct mr_table *mrt; 360 - struct mr_mfc *mfc; 283 + int err; 284 + 285 + /* multicast does not track protocol or have route type other 286 + * than RTN_MULTICAST 287 + */ 288 + if (filter->filter_set) { 289 + if (filter->protocol || filter->flags || 290 + (filter->rt_type && filter->rt_type != RTN_MULTICAST)) 291 + return skb->len; 292 + } 361 293 362 294 rcu_read_lock(); 363 295 for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) { 364 296 if (t < s_t) 365 297 goto next_table; 366 - list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { 367 - if (e < s_e) 368 - goto next_entry; 369 - if (fill(mrt, skb, NETLINK_CB(cb->skb).portid, 370 - cb->nlh->nlmsg_seq, mfc, 371 - RTM_NEWROUTE, NLM_F_MULTI) < 0) 372 - goto done; 373 - next_entry: 374 - e++; 375 - } 376 - e = 0; 377 - s_e = 0; 378 298 379 - spin_lock_bh(lock); 380 - list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { 381 - if (e < s_e) 382 - goto next_entry2; 383 - if (fill(mrt, skb, NETLINK_CB(cb->skb).portid, 384 - cb->nlh->nlmsg_seq, mfc, 385 - RTM_NEWROUTE, NLM_F_MULTI) < 0) { 386 - spin_unlock_bh(lock); 387 - goto done; 388 - } 389 - next_entry2: 390 - e++; 391 - } 392 - spin_unlock_bh(lock); 393 - e = 0; 394 - s_e = 0; 299 + err = mr_table_dump(mrt, skb, cb, fill, lock, filter); 300 + if (err < 0) 301 + break; 395 302 next_table: 396 303 t++; 397 304 } 398 - done: 399 305 rcu_read_unlock(); 400 306 401 - cb->args[1] = e; 402 307 cb->args[0] = t; 403 308 404 309 return skb->len;
+27 -7
net/ipv6/ip6_fib.c
··· 569 569 { 570 570 const struct nlmsghdr *nlh = cb->nlh; 571 571 struct net *net = sock_net(skb->sk); 572 + struct rt6_rtnl_dump_arg arg = {}; 572 573 unsigned int h, s_h; 573 574 unsigned int e = 0, s_e; 574 - struct rt6_rtnl_dump_arg arg; 575 575 struct fib6_walker *w; 576 576 struct fib6_table *tb; 577 577 struct hlist_head *head; 578 578 int res = 0; 579 579 580 580 if (cb->strict_check) { 581 - int err = ip_valid_fib_dump_req(nlh, cb->extack); 581 + int err; 582 582 583 + err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb); 583 584 if (err < 0) 584 585 return err; 586 + } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { 587 + struct rtmsg *rtm = nlmsg_data(nlh); 588 + 589 + arg.filter.flags = rtm->rtm_flags & (RTM_F_PREFIX|RTM_F_CLONED); 585 590 } 586 591 587 - s_h = cb->args[0]; 588 - s_e = cb->args[1]; 592 + /* fib entries are never clones */ 593 + if (arg.filter.flags & RTM_F_CLONED) 594 + return skb->len; 589 595 590 596 w = (void *)cb->args[2]; 591 597 if (!w) { ··· 617 611 arg.net = net; 618 612 w->args = &arg; 619 613 614 + if (arg.filter.table_id) { 615 + tb = fib6_get_table(net, arg.filter.table_id); 616 + if (!tb) { 617 + NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); 618 + return -ENOENT; 619 + } 620 + 621 + res = fib6_dump_table(tb, skb, cb); 622 + goto out; 623 + } 624 + 625 + s_h = cb->args[0]; 626 + s_e = cb->args[1]; 627 + 620 628 rcu_read_lock(); 621 629 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { 622 630 e = 0; ··· 640 620 goto next; 641 621 res = fib6_dump_table(tb, skb, cb); 642 622 if (res != 0) 643 - goto out; 623 + goto out_unlock; 644 624 next: 645 625 e++; 646 626 } 647 627 } 648 - out: 628 + out_unlock: 649 629 rcu_read_unlock(); 650 630 cb->args[1] = e; 651 631 cb->args[0] = h; 652 - 632 + out: 653 633 res = res < 0 ? res : skb->len; 654 634 if (res <= 0) 655 635 fib6_dump_end(cb);
+18 -3
net/ipv6/ip6mr.c
··· 2458 2458 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2459 2459 { 2460 2460 const struct nlmsghdr *nlh = cb->nlh; 2461 + struct fib_dump_filter filter = {}; 2462 + int err; 2461 2463 2462 2464 if (cb->strict_check) { 2463 - int err = ip_valid_fib_dump_req(nlh, cb->extack); 2464 - 2465 + err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, 2466 + &filter, cb); 2465 2467 if (err < 0) 2466 2468 return err; 2467 2469 } 2468 2470 2471 + if (filter.table_id) { 2472 + struct mr_table *mrt; 2473 + 2474 + mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); 2475 + if (!mrt) { 2476 + NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); 2477 + return -ENOENT; 2478 + } 2479 + err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, 2480 + &mfc_unres_lock, &filter); 2481 + return skb->len ? : err; 2482 + } 2483 + 2469 2484 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, 2470 - _ip6mr_fill_mroute, &mfc_unres_lock); 2485 + _ip6mr_fill_mroute, &mfc_unres_lock, &filter); 2471 2486 }
+32 -8
net/ipv6/route.c
··· 4767 4767 return -EMSGSIZE; 4768 4768 } 4769 4769 4770 + static bool fib6_info_uses_dev(const struct fib6_info *f6i, 4771 + const struct net_device *dev) 4772 + { 4773 + if (f6i->fib6_nh.nh_dev == dev) 4774 + return true; 4775 + 4776 + if (f6i->fib6_nsiblings) { 4777 + struct fib6_info *sibling, *next_sibling; 4778 + 4779 + list_for_each_entry_safe(sibling, next_sibling, 4780 + &f6i->fib6_siblings, fib6_siblings) { 4781 + if (sibling->fib6_nh.nh_dev == dev) 4782 + return true; 4783 + } 4784 + } 4785 + 4786 + return false; 4787 + } 4788 + 4770 4789 int rt6_dump_route(struct fib6_info *rt, void *p_arg) 4771 4790 { 4772 4791 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 4792 + struct fib_dump_filter *filter = &arg->filter; 4793 + unsigned int flags = NLM_F_MULTI; 4773 4794 struct net *net = arg->net; 4774 4795 4775 4796 if (rt == net->ipv6.fib6_null_entry) 4776 4797 return 0; 4777 4798 4778 - if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 4779 - struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 4780 - 4781 - /* user wants prefix routes only */ 4782 - if (rtm->rtm_flags & RTM_F_PREFIX && 4783 - !(rt->fib6_flags & RTF_PREFIX_RT)) { 4784 - /* success since this is not a prefix route */ 4799 + if ((filter->flags & RTM_F_PREFIX) && 4800 + !(rt->fib6_flags & RTF_PREFIX_RT)) { 4801 + /* success since this is not a prefix route */ 4802 + return 1; 4803 + } 4804 + if (filter->filter_set) { 4805 + if ((filter->rt_type && rt->fib6_type != filter->rt_type) || 4806 + (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) || 4807 + (filter->protocol && rt->fib6_protocol != filter->protocol)) { 4785 4808 return 1; 4786 4809 } 4810 + flags |= NLM_F_DUMP_FILTERED; 4787 4811 } 4788 4812 4789 4813 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0, 4790 4814 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, 4791 - arg->cb->nlh->nlmsg_seq, NLM_F_MULTI); 4815 + arg->cb->nlh->nlmsg_seq, flags); 4792 4816 } 4793 4817 4794 4818 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+80 -12
net/mpls/af_mpls.c
··· 2032 2032 } 2033 2033 2034 2034 #if IS_ENABLED(CONFIG_INET) 2035 - static int mpls_valid_fib_dump_req(const struct nlmsghdr *nlh, 2036 - struct netlink_ext_ack *extack) 2035 + static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, 2036 + struct fib_dump_filter *filter, 2037 + struct netlink_callback *cb) 2037 2038 { 2038 - return ip_valid_fib_dump_req(nlh, extack); 2039 + return ip_valid_fib_dump_req(net, nlh, filter, cb); 2039 2040 } 2040 2041 #else 2041 - static int mpls_valid_fib_dump_req(const struct nlmsghdr *nlh, 2042 - struct netlink_ext_ack *extack) 2042 + static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, 2043 + struct fib_dump_filter *filter, 2044 + struct netlink_callback *cb) 2043 2045 { 2046 + struct netlink_ext_ack *extack = cb->extack; 2047 + struct nlattr *tb[RTA_MAX + 1]; 2044 2048 struct rtmsg *rtm; 2049 + int err, i; 2045 2050 2046 2051 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { 2047 2052 NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request"); ··· 2055 2050 2056 2051 rtm = nlmsg_data(nlh); 2057 2052 if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || 2058 - rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || 2059 - rtm->rtm_type || rtm->rtm_flags) { 2053 + rtm->rtm_table || rtm->rtm_scope || rtm->rtm_type || 2054 + rtm->rtm_flags) { 2060 2055 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for FIB dump request"); 2061 2056 return -EINVAL; 2062 2057 } 2063 2058 2064 - if (nlmsg_attrlen(nlh, sizeof(*rtm))) { 2065 - NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in FIB dump request"); 2066 - return -EINVAL; 2059 + if (rtm->rtm_protocol) { 2060 + filter->protocol = rtm->rtm_protocol; 2061 + filter->filter_set = 1; 2062 + cb->answer_flags = NLM_F_DUMP_FILTERED; 2063 + } 2064 + 2065 + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2066 + rtm_mpls_policy, extack); 2067 + if (err < 0) 2068 + return err; 2069 + 2070 + for (i = 0; i <= RTA_MAX; ++i) { 2071 + int ifindex; 2072 + 2073 + if (i == RTA_OIF) { 2074 + ifindex = nla_get_u32(tb[i]); 2075 + filter->dev = __dev_get_by_index(net, ifindex); 2076 + if (!filter->dev) 2077 + return -ENODEV; 2078 + filter->filter_set = 1; 2079 + } else if (tb[i]) { 2080 + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request"); 2081 + return -EINVAL; 2082 + } 2067 2083 } 2068 2084 2069 2085 return 0; 2070 2086 } 2071 2087 #endif 2072 2088 2089 + static bool mpls_rt_uses_dev(struct mpls_route *rt, 2090 + const struct net_device *dev) 2091 + { 2092 + struct net_device *nh_dev; 2093 + 2094 + if (rt->rt_nhn == 1) { 2095 + struct mpls_nh *nh = rt->rt_nh; 2096 + 2097 + nh_dev = rtnl_dereference(nh->nh_dev); 2098 + if (dev == nh_dev) 2099 + return true; 2100 + } else { 2101 + for_nexthops(rt) { 2102 + nh_dev = rtnl_dereference(nh->nh_dev); 2103 + if (nh_dev == dev) 2104 + return true; 2105 + } endfor_nexthops(rt); 2106 + } 2107 + 2108 + return false; 2109 + } 2110 + 2073 2111 static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) 2074 2112 { 2075 2113 const struct nlmsghdr *nlh = cb->nlh; 2076 2114 struct net *net = sock_net(skb->sk); 2077 2115 struct mpls_route __rcu **platform_label; 2116 + struct fib_dump_filter filter = {}; 2117 + unsigned int flags = NLM_F_MULTI; 2078 2118 size_t platform_labels; 2079 2119 unsigned int index; 2080 2120 2081 2121 ASSERT_RTNL(); 2082 2122 2083 2123 if (cb->strict_check) { 2084 - int err = mpls_valid_fib_dump_req(nlh, cb->extack); 2124 + int err; 2085 2125 2126 + err = mpls_valid_fib_dump_req(net, nlh, &filter, cb); 2086 2127 if (err < 0) 2087 2128 return err; 2129 + 2130 + /* for MPLS, there is only 1 table with fixed type and flags. 2131 + * If either are set in the filter then return nothing. 2132 + */ 2133 + if ((filter.table_id && filter.table_id != RT_TABLE_MAIN) || 2134 + (filter.rt_type && filter.rt_type != RTN_UNICAST) || 2135 + filter.flags) 2136 + return skb->len; 2088 2137 } 2089 2138 2090 2139 index = cb->args[0]; ··· 2147 2088 2148 2089 platform_label = rtnl_dereference(net->mpls.platform_label); 2149 2090 platform_labels = net->mpls.platform_labels; 2091 + 2092 + if (filter.filter_set) 2093 + flags |= NLM_F_DUMP_FILTERED; 2094 + 2150 2095 for (; index < platform_labels; index++) { 2151 2096 struct mpls_route *rt; 2097 + 2152 2098 rt = rtnl_dereference(platform_label[index]); 2153 2099 if (!rt) 2154 2100 continue; 2155 2101 2102 + if ((filter.dev && !mpls_rt_uses_dev(rt, filter.dev)) || 2103 + (filter.protocol && rt->rt_protocol != filter.protocol)) 2104 + continue; 2105 + 2156 2106 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, 2157 2107 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2158 - index, rt, NLM_F_MULTI) < 0) 2108 + index, rt, flags) < 0) 2159 2109 break; 2160 2110 } 2161 2111 cb->args[0] = index;
+2 -1
net/netlink/af_netlink.c
··· 2257 2257 } 2258 2258 2259 2259 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, 2260 - sizeof(nlk->dump_done_errno), NLM_F_MULTI); 2260 + sizeof(nlk->dump_done_errno), 2261 + NLM_F_MULTI | cb->answer_flags); 2261 2262 if (WARN_ON(!nlh)) 2262 2263 goto errout_skb; 2263 2264