at master 13 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Generic nexthop implementation 4 * 5 * Copyright (c) 2017-19 Cumulus Networks 6 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> 7 */ 8 9#ifndef __LINUX_NEXTHOP_H 10#define __LINUX_NEXTHOP_H 11 12#include <linux/netdevice.h> 13#include <linux/notifier.h> 14#include <linux/route.h> 15#include <linux/types.h> 16#include <net/ip_fib.h> 17#include <net/ip6_fib.h> 18#include <net/netlink.h> 19 20#define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK 21 22struct nexthop; 23 24struct nh_config { 25 u32 nh_id; 26 27 u8 nh_family; 28 u8 nh_protocol; 29 u8 nh_blackhole; 30 u8 nh_fdb; 31 u32 nh_flags; 32 33 int nh_ifindex; 34 struct net_device *dev; 35 36 union { 37 __be32 ipv4; 38 struct in6_addr ipv6; 39 } gw; 40 41 struct nlattr *nh_grp; 42 u16 nh_grp_type; 43 u16 nh_grp_res_num_buckets; 44 unsigned long nh_grp_res_idle_timer; 45 unsigned long nh_grp_res_unbalanced_timer; 46 bool nh_grp_res_has_num_buckets; 47 bool nh_grp_res_has_idle_timer; 48 bool nh_grp_res_has_unbalanced_timer; 49 50 bool nh_hw_stats; 51 52 struct nlattr *nh_encap; 53 u16 nh_encap_type; 54 55 u32 nlflags; 56 struct nl_info nlinfo; 57}; 58 59struct nh_info { 60 struct hlist_node dev_hash; /* entry on netns devhash */ 61 struct nexthop *nh_parent; 62 63 u8 family; 64 bool reject_nh; 65 bool fdb_nh; 66 67 union { 68 struct fib_nh_common fib_nhc; 69 struct fib_nh fib_nh; 70 struct fib6_nh fib6_nh; 71 }; 72}; 73 74struct nh_res_bucket { 75 struct nh_grp_entry __rcu *nh_entry; 76 atomic_long_t used_time; 77 unsigned long migrated_time; 78 bool occupied; 79 u8 nh_flags; 80}; 81 82struct nh_res_table { 83 struct net *net; 84 u32 nhg_id; 85 struct delayed_work upkeep_dw; 86 87 /* List of NHGEs that have too few buckets ("uw" for underweight). 88 * Reclaimed buckets will be given to entries in this list. 89 */ 90 struct list_head uw_nh_entries; 91 unsigned long unbalanced_since; 92 93 u32 idle_timer; 94 u32 unbalanced_timer; 95 96 u16 num_nh_buckets; 97 struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); 98}; 99 100struct nh_grp_entry_stats { 101 u64_stats_t packets; 102 struct u64_stats_sync syncp; 103}; 104 105struct nh_grp_entry { 106 struct nexthop *nh; 107 struct nh_grp_entry_stats __percpu *stats; 108 u16 weight; 109 110 union { 111 struct { 112 atomic_t upper_bound; 113 } hthr; 114 struct { 115 /* Member on uw_nh_entries. */ 116 struct list_head uw_nh_entry; 117 118 u16 count_buckets; 119 u16 wants_buckets; 120 } res; 121 }; 122 123 struct list_head nh_list; 124 struct nexthop *nh_parent; /* nexthop of group with this entry */ 125 u64 packets_hw; 126}; 127 128struct nh_group { 129 struct nh_group *spare; /* spare group for removals */ 130 u16 num_nh; 131 bool is_multipath; 132 bool hash_threshold; 133 bool resilient; 134 bool fdb_nh; 135 bool has_v4; 136 bool hw_stats; 137 138 struct nh_res_table __rcu *res_table; 139 struct nh_grp_entry nh_entries[] __counted_by(num_nh); 140}; 141 142struct nexthop { 143 struct rb_node rb_node; /* entry on netns rbtree */ 144 struct list_head fi_list; /* v4 entries using nh */ 145 struct list_head f6i_list; /* v6 entries using nh */ 146 struct list_head fdb_list; /* fdb entries using this nh */ 147 struct list_head grp_list; /* nh group entries using this nh */ 148 struct net *net; 149 150 u32 id; 151 152 u8 protocol; /* app managing this nh */ 153 u8 nh_flags; 154 bool is_group; 155 bool dead; 156 spinlock_t lock; /* protect dead and f6i_list */ 157 158 refcount_t refcnt; 159 struct rcu_head rcu; 160 161 union { 162 struct nh_info __rcu *nh_info; 163 struct nh_group __rcu *nh_grp; 164 }; 165}; 166 167enum nexthop_event_type { 168 NEXTHOP_EVENT_DEL, 169 NEXTHOP_EVENT_REPLACE, 170 NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, 171 NEXTHOP_EVENT_BUCKET_REPLACE, 172 NEXTHOP_EVENT_HW_STATS_REPORT_DELTA, 173}; 174 175enum nh_notifier_info_type { 176 NH_NOTIFIER_INFO_TYPE_SINGLE, 177 NH_NOTIFIER_INFO_TYPE_GRP, 178 NH_NOTIFIER_INFO_TYPE_RES_TABLE, 179 NH_NOTIFIER_INFO_TYPE_RES_BUCKET, 180 NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS, 181}; 182 183struct nh_notifier_single_info { 184 struct net_device *dev; 185 u8 gw_family; 186 union { 187 __be32 ipv4; 188 struct in6_addr ipv6; 189 }; 190 u32 id; 191 u8 is_reject:1, 192 is_fdb:1, 193 has_encap:1; 194}; 195 196struct nh_notifier_grp_entry_info { 197 u16 weight; 198 struct nh_notifier_single_info nh; 199}; 200 201struct nh_notifier_grp_info { 202 u16 num_nh; 203 bool is_fdb; 204 bool hw_stats; 205 struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); 206}; 207 208struct nh_notifier_res_bucket_info { 209 u16 bucket_index; 210 unsigned int idle_timer_ms; 211 bool force; 212 struct nh_notifier_single_info old_nh; 213 struct nh_notifier_single_info new_nh; 214}; 215 216struct nh_notifier_res_table_info { 217 u16 num_nh_buckets; 218 bool hw_stats; 219 struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); 220}; 221 222struct nh_notifier_grp_hw_stats_entry_info { 223 u32 id; 224 u64 packets; 225}; 226 227struct nh_notifier_grp_hw_stats_info { 228 u16 num_nh; 229 bool hw_stats_used; 230 struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh); 231}; 232 233struct nh_notifier_info { 234 struct net *net; 235 struct netlink_ext_ack *extack; 236 u32 id; 237 enum nh_notifier_info_type type; 238 union { 239 struct nh_notifier_single_info *nh; 240 struct nh_notifier_grp_info *nh_grp; 241 struct nh_notifier_res_table_info *nh_res_table; 242 struct nh_notifier_res_bucket_info *nh_res_bucket; 243 struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats; 244 }; 245}; 246 247int register_nexthop_notifier(struct net *net, struct notifier_block *nb, 248 struct netlink_ext_ack *extack); 249int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); 250int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); 251void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); 252void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, 253 bool offload, bool trap); 254void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, 255 unsigned long *activity); 256void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info, 257 unsigned int nh_idx, 258 u64 delta_packets); 259 260/* caller is holding rcu or rtnl; no reference taken to nexthop */ 261struct nexthop *nexthop_find_by_id(struct net *net, u32 id); 262void nexthop_free_rcu(struct rcu_head *head); 263 264static inline bool nexthop_get(struct nexthop *nh) 265{ 266 return refcount_inc_not_zero(&nh->refcnt); 267} 268 269static inline void nexthop_put(struct nexthop *nh) 270{ 271 if (refcount_dec_and_test(&nh->refcnt)) 272 call_rcu_hurry(&nh->rcu, nexthop_free_rcu); 273} 274 275static inline bool nexthop_cmp(const struct nexthop *nh1, 276 const struct nexthop *nh2) 277{ 278 return nh1 == nh2; 279} 280 281static inline bool nexthop_is_fdb(const struct nexthop *nh) 282{ 283 if (nh->is_group) { 284 const struct nh_group *nh_grp; 285 286 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 287 return nh_grp->fdb_nh; 288 } else { 289 const struct nh_info *nhi; 290 291 nhi = rcu_dereference_rtnl(nh->nh_info); 292 return nhi->fdb_nh; 293 } 294} 295 296static inline bool nexthop_has_v4(const struct nexthop *nh) 297{ 298 if (nh->is_group) { 299 struct nh_group *nh_grp; 300 301 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 302 return nh_grp->has_v4; 303 } 304 return false; 305} 306 307static inline bool nexthop_is_multipath(const struct nexthop *nh) 308{ 309 if (nh->is_group) { 310 struct nh_group *nh_grp; 311 312 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 313 return nh_grp->is_multipath; 314 } 315 return false; 316} 317 318struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); 319 320static inline unsigned int nexthop_num_path(const struct nexthop *nh) 321{ 322 unsigned int rc = 1; 323 324 if (nh->is_group) { 325 struct nh_group *nh_grp; 326 327 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 328 if (nh_grp->is_multipath) 329 rc = nh_grp->num_nh; 330 } 331 332 return rc; 333} 334 335static inline 336struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel) 337{ 338 /* for_nexthops macros in fib_semantics.c grabs a pointer to 339 * the nexthop before checking nhsel 340 */ 341 if (nhsel >= nhg->num_nh) 342 return NULL; 343 344 return nhg->nh_entries[nhsel].nh; 345} 346 347static inline 348int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, 349 u8 rt_family) 350{ 351 struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); 352 int i; 353 354 for (i = 0; i < nhg->num_nh; i++) { 355 struct nexthop *nhe = nhg->nh_entries[i].nh; 356 struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); 357 struct fib_nh_common *nhc = &nhi->fib_nhc; 358 int weight = nhg->nh_entries[i].weight; 359 360 if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) 361 return -EMSGSIZE; 362 } 363 364 return 0; 365} 366 367/* called with rcu lock */ 368static inline bool nexthop_is_blackhole(const struct nexthop *nh) 369{ 370 const struct nh_info *nhi; 371 372 if (nh->is_group) { 373 struct nh_group *nh_grp; 374 375 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 376 if (nh_grp->num_nh > 1) 377 return false; 378 379 nh = nh_grp->nh_entries[0].nh; 380 } 381 382 nhi = rcu_dereference_rtnl(nh->nh_info); 383 return nhi->reject_nh; 384} 385 386static inline void nexthop_path_fib_result(struct fib_result *res, int hash) 387{ 388 struct nh_info *nhi; 389 struct nexthop *nh; 390 391 nh = nexthop_select_path(res->fi->nh, hash); 392 nhi = rcu_dereference(nh->nh_info); 393 res->nhc = &nhi->fib_nhc; 394} 395 396/* called with rcu read lock or rtnl held */ 397static inline 398struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) 399{ 400 struct nh_info *nhi; 401 402 BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); 403 BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); 404 405 if (nh->is_group) { 406 struct nh_group *nh_grp; 407 408 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 409 if (nh_grp->is_multipath) { 410 nh = nexthop_mpath_select(nh_grp, nhsel); 411 if (!nh) 412 return NULL; 413 } 414 } 415 416 nhi = rcu_dereference_rtnl(nh->nh_info); 417 return &nhi->fib_nhc; 418} 419 420/* called from fib_table_lookup with rcu_lock */ 421static inline 422struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh, 423 int fib_flags, 424 const struct flowi4 *flp, 425 int *nhsel) 426{ 427 struct nh_info *nhi; 428 429 if (nh->is_group) { 430 struct nh_group *nhg = rcu_dereference(nh->nh_grp); 431 int i; 432 433 for (i = 0; i < nhg->num_nh; i++) { 434 struct nexthop *nhe = nhg->nh_entries[i].nh; 435 436 nhi = rcu_dereference(nhe->nh_info); 437 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { 438 *nhsel = i; 439 return &nhi->fib_nhc; 440 } 441 } 442 } else { 443 nhi = rcu_dereference(nh->nh_info); 444 if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) { 445 *nhsel = 0; 446 return &nhi->fib_nhc; 447 } 448 } 449 450 return NULL; 451} 452 453static inline bool nexthop_uses_dev(const struct nexthop *nh, 454 const struct net_device *dev) 455{ 456 struct nh_info *nhi; 457 458 if (nh->is_group) { 459 struct nh_group *nhg = rcu_dereference(nh->nh_grp); 460 int i; 461 462 for (i = 0; i < nhg->num_nh; i++) { 463 struct nexthop *nhe = nhg->nh_entries[i].nh; 464 465 nhi = rcu_dereference(nhe->nh_info); 466 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) 467 return true; 468 } 469 } else { 470 nhi = rcu_dereference(nh->nh_info); 471 if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev)) 472 return true; 473 } 474 475 return false; 476} 477 478static inline unsigned int fib_info_num_path(const struct fib_info *fi) 479{ 480 if (unlikely(fi->nh)) 481 return nexthop_num_path(fi->nh); 482 483 return fi->fib_nhs; 484} 485 486int fib_check_nexthop(struct nexthop *nh, u8 scope, 487 struct netlink_ext_ack *extack); 488 489static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) 490{ 491 if (unlikely(fi->nh)) 492 return nexthop_fib_nhc(fi->nh, nhsel); 493 494 return &fi->fib_nh[nhsel].nh_common; 495} 496 497/* only used when fib_nh is built into fib_info */ 498static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) 499{ 500 WARN_ON(fi->nh); 501 502 return &fi->fib_nh[nhsel]; 503} 504 505/* 506 * IPv6 variants 507 */ 508int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, 509 struct netlink_ext_ack *extack); 510 511/* Caller should either hold rcu_read_lock(), or RTNL. */ 512static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) 513{ 514 struct nh_info *nhi; 515 516 if (nh->is_group) { 517 struct nh_group *nh_grp; 518 519 nh_grp = rcu_dereference_rtnl(nh->nh_grp); 520 nh = nexthop_mpath_select(nh_grp, 0); 521 if (!nh) 522 return NULL; 523 } 524 525 nhi = rcu_dereference_rtnl(nh->nh_info); 526 if (nhi->family == AF_INET6) 527 return &nhi->fib6_nh; 528 529 return NULL; 530} 531 532static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) 533{ 534 struct fib6_nh *fib6_nh; 535 536 fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; 537 return fib6_nh->fib_nh_dev; 538} 539 540static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) 541{ 542 struct nexthop *nh = res->f6i->nh; 543 struct nh_info *nhi; 544 545 nh = nexthop_select_path(nh, hash); 546 547 nhi = rcu_dereference_rtnl(nh->nh_info); 548 if (nhi->reject_nh) { 549 res->fib6_type = RTN_BLACKHOLE; 550 res->fib6_flags |= RTF_REJECT; 551 res->nh = nexthop_fib6_nh(nh); 552 } else { 553 res->nh = &nhi->fib6_nh; 554 } 555} 556 557int nexthop_for_each_fib6_nh(struct nexthop *nh, 558 int (*cb)(struct fib6_nh *nh, void *arg), 559 void *arg); 560 561static inline int nexthop_get_family(struct nexthop *nh) 562{ 563 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); 564 565 return nhi->family; 566} 567 568static inline 569struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) 570{ 571 struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); 572 573 return &nhi->fib_nhc; 574} 575 576static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, 577 int hash) 578{ 579 struct nh_info *nhi; 580 struct nexthop *nhp; 581 582 nhp = nexthop_select_path(nh, hash); 583 if (unlikely(!nhp)) 584 return NULL; 585 nhi = rcu_dereference(nhp->nh_info); 586 return &nhi->fib_nhc; 587} 588#endif