Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nexthop: Add netlink defines and enumerators for resilient NH groups

- RTM_NEWNEXTHOP et.al. that handle resilient groups will have a new nested
attribute, NHA_RES_GROUP, whose elements are attributes NHA_RES_GROUP_*.

- RTM_NEWNEXTHOPBUCKET et.al. is a suite of new messages that will
currently serve only for dumping of individual buckets of resilient next
hop groups. For nexthop group buckets, these messages will carry a nested
attribute NHA_RES_BUCKET, whose elements are attributes NHA_RES_BUCKET_*.

There are several reasons why a new suite of messages is created for
nexthop buckets instead of overloading the information on the existing
RTM_{NEW,DEL,GET}NEXTHOP messages.

First, a nexthop group can contain a large number of nexthop buckets (4k
is not unheard of). This imposes limits on the amount of information that
can be encoded for each nexthop bucket given a netlink message is limited
to 64k bytes.

Second, while RTM_NEWNEXTHOPBUCKET is only used for notifications at
this point, in the future it can be extended to provide user space with
control over nexthop buckets configuration.

- The new group type is NEXTHOP_GRP_TYPE_RES. Note that nexthop code is
adjusted to bounce groups with that type for now.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Ido Schimmel and committed by
David S. Miller
710ec562 90e1a9e2

+59 -2
+46 -1
include/uapi/linux/nexthop.h
··· 21 21 }; 22 22 23 23 enum { 24 - NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */ 24 + NEXTHOP_GRP_TYPE_MPATH, /* hash-threshold nexthop group 25 + * default type if not specified 26 + */ 27 + NEXTHOP_GRP_TYPE_RES, /* resilient nexthop group */ 25 28 __NEXTHOP_GRP_TYPE_MAX, 26 29 }; 27 30 ··· 55 52 NHA_FDB, /* flag; nexthop belongs to a bridge fdb */ 56 53 /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */ 57 54 55 + /* nested; resilient nexthop group attributes */ 56 + NHA_RES_GROUP, 57 + /* nested; nexthop bucket attributes */ 58 + NHA_RES_BUCKET, 59 + 58 60 __NHA_MAX, 59 61 }; 60 62 61 63 #define NHA_MAX (__NHA_MAX - 1) 64 + 65 + enum { 66 + NHA_RES_GROUP_UNSPEC, 67 + /* Pad attribute for 64-bit alignment. */ 68 + NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC, 69 + 70 + /* u16; number of nexthop buckets in a resilient nexthop group */ 71 + NHA_RES_GROUP_BUCKETS, 72 + /* clock_t as u32; nexthop bucket idle timer (per-group) */ 73 + NHA_RES_GROUP_IDLE_TIMER, 74 + /* clock_t as u32; nexthop unbalanced timer */ 75 + NHA_RES_GROUP_UNBALANCED_TIMER, 76 + /* clock_t as u64; nexthop unbalanced time */ 77 + NHA_RES_GROUP_UNBALANCED_TIME, 78 + 79 + __NHA_RES_GROUP_MAX, 80 + }; 81 + 82 + #define NHA_RES_GROUP_MAX (__NHA_RES_GROUP_MAX - 1) 83 + 84 + enum { 85 + NHA_RES_BUCKET_UNSPEC, 86 + /* Pad attribute for 64-bit alignment. */ 87 + NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC, 88 + 89 + /* u16; nexthop bucket index */ 90 + NHA_RES_BUCKET_INDEX, 91 + /* clock_t as u64; nexthop bucket idle time */ 92 + NHA_RES_BUCKET_IDLE_TIME, 93 + /* u32; nexthop id assigned to the nexthop bucket */ 94 + NHA_RES_BUCKET_NH_ID, 95 + 96 + __NHA_RES_BUCKET_MAX, 97 + }; 98 + 99 + #define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) 100 + 62 101 #endif
+7
include/uapi/linux/rtnetlink.h
··· 178 178 RTM_GETVLAN, 179 179 #define RTM_GETVLAN RTM_GETVLAN 180 180 181 + RTM_NEWNEXTHOPBUCKET = 116, 182 + #define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET 183 + RTM_DELNEXTHOPBUCKET, 184 + #define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET 185 + RTM_GETNEXTHOPBUCKET, 186 + #define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET 187 + 181 188 __RTM_MAX, 182 189 #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) 183 190 };
+2
net/ipv4/nexthop.c
··· 1492 1492 if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { 1493 1493 nhg->mpath = 1; 1494 1494 nhg->is_multipath = true; 1495 + } else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) { 1496 + goto out_no_nh; 1495 1497 } 1496 1498 1497 1499 WARN_ON_ONCE(nhg->mpath != 1);
+4 -1
security/selinux/nlmsgtab.c
··· 88 88 { RTM_NEWVLAN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, 89 89 { RTM_DELVLAN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, 90 90 { RTM_GETVLAN, NETLINK_ROUTE_SOCKET__NLMSG_READ }, 91 + { RTM_NEWNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, 92 + { RTM_DELNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, 93 + { RTM_GETNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_READ }, 91 94 }; 92 95 93 96 static const struct nlmsg_perm nlmsg_tcpdiag_perms[] = ··· 174 171 * structures at the top of this file with the new mappings 175 172 * before updating the BUILD_BUG_ON() macro! 176 173 */ 177 - BUILD_BUG_ON(RTM_MAX != (RTM_NEWVLAN + 3)); 174 + BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOPBUCKET + 3)); 178 175 err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms, 179 176 sizeof(nlmsg_route_perms)); 180 177 break;