Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'switchdev_offload_flags'

Roopa Prabhu says:

====================
switchdev offload flags

This patch series introduces new offload flags for switchdev.
Kernel network subsystems can use this flag to accelerate
network functions by offloading to hw.

I expect that there will be need for subsystem specific feature
flag in the future.

This patch series currently only addresses bridge driver link
attribute offloads to hardware.

Looking at the current state of bridge l2 offload in the kernel,
- flag 'self' is the way to directly manage the bridge device in hw via
the ndo_bridge_setlink/ndo_bridge_getlink calls

- flag 'master' is always used to manage the in kernel bridge devices
via the same ndo_bridge_setlink/ndo_bridge_getlink calls

Today these are used separately. The nic offloads use hwmode "vepa/veb" to go
directly to hw with the "self" flag.

At this point i am trying not to introduce any new user facing flags/attributes.
In the model where we want the kernel bridging to be accelerated with
hardware, we very much want the bridge driver to be involved.

In this proposal,
- The offload flag/bit helps switch asic drivers to indicate that they
accelerate the kernel networking objects/functions
- The user does not have to specify a new flag to do so. A bridge created with
switch asic ports will be accelerated if the switch driver supports it.
- The user can continue to directly manage l2 in nics (ixgbe) using the
existing hwmode/self flags
- It also does not stop users from using the 'self' flag to talk to the
switch asic driver directly
- Involving the bridge driver makes sure the add/del notifications to user
space go out after both kernel and hardware are programmed

(To selectively offload bridge port attributes,
example learning in hw only etc, we can introduce offload bits for
per bridge port flag attribute as in my previous patch
https://patchwork.ozlabs.org/patch/413211/. I have not included that in this
series)

v2
- try a different name for the offload flag/bit
- tries to solve the stacked netdev case by traversing the lowerdev
list to reach the switch port

v3 -
- Tested with bond as bridge port for the stacked device case.
Includes a bond_fix_features change to not ignore the
NETIF_F_HW_NETFUNC_OFFLOAD flag
- Some checkpatch fixes

v4 -
- rename flag to NETIF_F_HW_SWITCH_OFFLOAD
- add ndo_bridge_setlink/dellink handlers in bond and team drivers as
suggested by jiri.
- introduce default ndo_dflt_netdev_switch_port_bridge_setlink/dellink
handlers that masters can use to call offload api on lowerdevs.
====================

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>

+206 -21
+8 -1
drivers/net/bonding/bond_main.c
··· 77 77 #include <net/pkt_sched.h> 78 78 #include <linux/rculist.h> 79 79 #include <net/flow_keys.h> 80 + #include <net/switchdev.h> 80 81 #include <net/bonding.h> 81 82 #include <net/bond_3ad.h> 82 83 #include <net/bond_alb.h> ··· 980 979 netdev_features_t mask; 981 980 struct slave *slave; 982 981 983 - mask = features; 982 + /* If any slave has the offload feature flag set, 983 + * set the offload flag on the bond. 984 + */ 985 + mask = features | NETIF_F_HW_SWITCH_OFFLOAD; 986 + 984 987 features &= ~NETIF_F_ONE_FOR_ALL; 985 988 features |= NETIF_F_ALL_FOR_ALL; 986 989 ··· 3957 3952 .ndo_add_slave = bond_enslave, 3958 3953 .ndo_del_slave = bond_release, 3959 3954 .ndo_fix_features = bond_fix_features, 3955 + .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, 3956 + .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, 3960 3957 }; 3961 3958 3962 3959 static const struct device_type bond_type = {
+2 -1
drivers/net/ethernet/emulex/benet/be_main.c
··· 4327 4327 return status; 4328 4328 } 4329 4329 4330 - static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh) 4330 + static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, 4331 + u16 flags) 4331 4332 { 4332 4333 struct be_adapter *adapter = netdev_priv(dev); 4333 4334 struct nlattr *attr, *br_spec;
+1 -1
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
··· 7786 7786 } 7787 7787 7788 7788 static int ixgbe_ndo_bridge_setlink(struct net_device *dev, 7789 - struct nlmsghdr *nlh) 7789 + struct nlmsghdr *nlh, u16 flags) 7790 7790 { 7791 7791 struct ixgbe_adapter *adapter = netdev_priv(dev); 7792 7792 struct nlattr *attr, *br_spec;
+3 -2
drivers/net/ethernet/rocker/rocker.c
··· 3722 3722 } 3723 3723 3724 3724 static int rocker_port_bridge_setlink(struct net_device *dev, 3725 - struct nlmsghdr *nlh) 3725 + struct nlmsghdr *nlh, u16 flags) 3726 3726 { 3727 3727 struct rocker_port *rocker_port = netdev_priv(dev); 3728 3728 struct nlattr *protinfo; ··· 4030 4030 NAPI_POLL_WEIGHT); 4031 4031 rocker_carrier_init(rocker_port); 4032 4032 4033 - dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; 4033 + dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | 4034 + NETIF_F_HW_SWITCH_OFFLOAD; 4034 4035 4035 4036 err = register_netdev(dev); 4036 4037 if (err) {
+4 -1
drivers/net/team/team.c
··· 28 28 #include <net/genetlink.h> 29 29 #include <net/netlink.h> 30 30 #include <net/sch_generic.h> 31 + #include <net/switchdev.h> 31 32 #include <generated/utsrelease.h> 32 33 #include <linux/if_team.h> 33 34 ··· 1926 1925 struct team *team = netdev_priv(dev); 1927 1926 netdev_features_t mask; 1928 1927 1929 - mask = features; 1928 + mask = features | NETIF_F_HW_SWITCH_OFFLOAD; 1930 1929 features &= ~NETIF_F_ONE_FOR_ALL; 1931 1930 features |= NETIF_F_ALL_FOR_ALL; 1932 1931 ··· 1976 1975 .ndo_del_slave = team_del_slave, 1977 1976 .ndo_fix_features = team_fix_features, 1978 1977 .ndo_change_carrier = team_change_carrier, 1978 + .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, 1979 + .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, 1979 1980 }; 1980 1981 1981 1982 /***********************
+5 -1
include/linux/netdev_features.h
··· 66 66 NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ 67 67 NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ 68 68 NETIF_F_BUSY_POLL_BIT, /* Busy poll */ 69 + NETIF_F_HW_SWITCH_OFFLOAD_BIT, /* HW switch offload */ 69 70 70 71 /* 71 72 * Add your fresh new feature above and remember to update ··· 125 124 #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) 126 125 #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) 127 126 #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) 127 + #define NETIF_F_HW_SWITCH_OFFLOAD __NETIF_F(HW_SWITCH_OFFLOAD) 128 128 129 129 /* Features valid for ethtool to change */ 130 130 /* = all defined minus driver/device-class-related */ ··· 161 159 */ 162 160 #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ 163 161 NETIF_F_SG | NETIF_F_HIGHDMA | \ 164 - NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED) 162 + NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED | \ 163 + NETIF_F_HW_SWITCH_OFFLOAD) 164 + 165 165 /* 166 166 * If one device doesn't support one of these features, then disable it 167 167 * for all in netdev_increment_features.
+4 -2
include/linux/netdevice.h
··· 1154 1154 int idx); 1155 1155 1156 1156 int (*ndo_bridge_setlink)(struct net_device *dev, 1157 - struct nlmsghdr *nlh); 1157 + struct nlmsghdr *nlh, 1158 + u16 flags); 1158 1159 int (*ndo_bridge_getlink)(struct sk_buff *skb, 1159 1160 u32 pid, u32 seq, 1160 1161 struct net_device *dev, 1161 1162 u32 filter_mask); 1162 1163 int (*ndo_bridge_dellink)(struct net_device *dev, 1163 - struct nlmsghdr *nlh); 1164 + struct nlmsghdr *nlh, 1165 + u16 flags); 1164 1166 int (*ndo_change_carrier)(struct net_device *dev, 1165 1167 bool new_carrier); 1166 1168 int (*ndo_get_phys_port_id)(struct net_device *dev,
+36 -1
include/net/switchdev.h
··· 43 43 int unregister_netdev_switch_notifier(struct notifier_block *nb); 44 44 int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, 45 45 struct netdev_switch_notifier_info *info); 46 - 46 + int netdev_switch_port_bridge_setlink(struct net_device *dev, 47 + struct nlmsghdr *nlh, u16 flags); 48 + int netdev_switch_port_bridge_dellink(struct net_device *dev, 49 + struct nlmsghdr *nlh, u16 flags); 50 + int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, 51 + struct nlmsghdr *nlh, u16 flags); 52 + int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, 53 + struct nlmsghdr *nlh, u16 flags); 47 54 #else 48 55 49 56 static inline int netdev_switch_parent_id_get(struct net_device *dev, ··· 79 72 struct netdev_switch_notifier_info *info) 80 73 { 81 74 return NOTIFY_DONE; 75 + } 76 + 77 + static inline int netdev_switch_port_bridge_setlink(struct net_device *dev, 78 + struct nlmsghdr *nlh, 79 + u16 flags) 80 + { 81 + return -EOPNOTSUPP; 82 + } 83 + 84 + static inline int netdev_switch_port_bridge_dellink(struct net_device *dev, 85 + struct nlmsghdr *nlh, 86 + u16 flags) 87 + { 88 + return -EOPNOTSUPP; 89 + } 90 + 91 + static inline int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, 92 + struct nlmsghdr *nlh, 93 + u16 flags) 94 + { 95 + return 0; 96 + } 97 + 98 + static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, 99 + struct nlmsghdr *nlh, 100 + u16 flags) 101 + { 102 + return 0; 82 103 } 83 104 84 105 #endif
+25 -5
net/bridge/br_netlink.c
··· 16 16 #include <net/rtnetlink.h> 17 17 #include <net/net_namespace.h> 18 18 #include <net/sock.h> 19 + #include <net/switchdev.h> 19 20 #include <uapi/linux/if_bridge.h> 20 21 21 22 #include "br_private.h" ··· 495 494 } 496 495 497 496 /* Change state and parameters on port. */ 498 - int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) 497 + int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) 499 498 { 500 499 struct nlattr *protinfo; 501 500 struct nlattr *afspec; 502 501 struct net_bridge_port *p; 503 502 struct nlattr *tb[IFLA_BRPORT_MAX + 1]; 504 - int err = 0; 503 + int err = 0, ret_offload = 0; 505 504 506 505 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); 507 506 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); ··· 543 542 afspec, RTM_SETLINK); 544 543 } 545 544 545 + if (!(flags & BRIDGE_FLAGS_SELF)) { 546 + /* set bridge attributes in hardware if supported 547 + */ 548 + ret_offload = netdev_switch_port_bridge_setlink(dev, nlh, 549 + flags); 550 + if (ret_offload && ret_offload != -EOPNOTSUPP) 551 + br_warn(p->br, "error setting attrs on port %u(%s)\n", 552 + (unsigned int)p->port_no, p->dev->name); 553 + } 554 + 546 555 if (err == 0) 547 556 br_ifinfo_notify(RTM_NEWLINK, p); 548 - 549 557 out: 550 558 return err; 551 559 } 552 560 553 561 /* Delete port information */ 554 - int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) 562 + int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) 555 563 { 556 564 struct nlattr *afspec; 557 565 struct net_bridge_port *p; 558 - int err; 566 + int err = 0, ret_offload = 0; 559 567 560 568 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); 561 569 if (!afspec) ··· 582 572 * expects RTM_NEWLINK for vlan dels 583 573 */ 584 574 br_ifinfo_notify(RTM_NEWLINK, p); 575 + 576 + if (!(flags & BRIDGE_FLAGS_SELF)) { 577 + /* del bridge attributes in hardware 578 + */ 579 + ret_offload = netdev_switch_port_bridge_dellink(dev, nlh, 580 + flags); 581 + if (ret_offload && ret_offload != -EOPNOTSUPP) 582 + br_warn(p->br, "error deleting attrs on port %u (%s)\n", 583 + (unsigned int)p->port_no, p->dev->name); 584 + } 585 585 586 586 return err; 587 587 }
+2 -2
net/bridge/br_private.h
··· 819 819 int br_netlink_init(void); 820 820 void br_netlink_fini(void); 821 821 void br_ifinfo_notify(int event, struct net_bridge_port *port); 822 - int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); 823 - int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); 822 + int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); 823 + int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); 824 824 int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, 825 825 u32 filter_mask); 826 826
+6 -4
net/core/rtnetlink.c
··· 2991 2991 goto out; 2992 2992 } 2993 2993 2994 - err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh); 2994 + err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags); 2995 2995 if (err) 2996 2996 goto out; 2997 2997 ··· 3002 3002 if (!dev->netdev_ops->ndo_bridge_setlink) 3003 3003 err = -EOPNOTSUPP; 3004 3004 else 3005 - err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); 3005 + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh, 3006 + flags); 3006 3007 if (!err) { 3007 3008 flags &= ~BRIDGE_FLAGS_SELF; 3008 3009 ··· 3065 3064 goto out; 3066 3065 } 3067 3066 3068 - err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); 3067 + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags); 3069 3068 if (err) 3070 3069 goto out; 3071 3070 ··· 3076 3075 if (!dev->netdev_ops->ndo_bridge_dellink) 3077 3076 err = -EOPNOTSUPP; 3078 3077 else 3079 - err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); 3078 + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh, 3079 + flags); 3080 3080 3081 3081 if (!err) { 3082 3082 flags &= ~BRIDGE_FLAGS_SELF;
+110
net/switchdev/switchdev.c
··· 115 115 return err; 116 116 } 117 117 EXPORT_SYMBOL(call_netdev_switch_notifiers); 118 + 119 + /** 120 + * netdev_switch_port_bridge_setlink - Notify switch device port of bridge 121 + * port attributes 122 + * 123 + * @dev: port device 124 + * @nlh: netlink msg with bridge port attributes 125 + * @flags: bridge setlink flags 126 + * 127 + * Notify switch device port of bridge port attributes 128 + */ 129 + int netdev_switch_port_bridge_setlink(struct net_device *dev, 130 + struct nlmsghdr *nlh, u16 flags) 131 + { 132 + const struct net_device_ops *ops = dev->netdev_ops; 133 + 134 + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) 135 + return 0; 136 + 137 + if (!ops->ndo_bridge_setlink) 138 + return -EOPNOTSUPP; 139 + 140 + return ops->ndo_bridge_setlink(dev, nlh, flags); 141 + } 142 + EXPORT_SYMBOL(netdev_switch_port_bridge_setlink); 143 + 144 + /** 145 + * netdev_switch_port_bridge_dellink - Notify switch device port of bridge 146 + * port attribute delete 147 + * 148 + * @dev: port device 149 + * @nlh: netlink msg with bridge port attributes 150 + * @flags: bridge setlink flags 151 + * 152 + * Notify switch device port of bridge port attribute delete 153 + */ 154 + int netdev_switch_port_bridge_dellink(struct net_device *dev, 155 + struct nlmsghdr *nlh, u16 flags) 156 + { 157 + const struct net_device_ops *ops = dev->netdev_ops; 158 + 159 + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) 160 + return 0; 161 + 162 + if (!ops->ndo_bridge_dellink) 163 + return -EOPNOTSUPP; 164 + 165 + return ops->ndo_bridge_dellink(dev, nlh, flags); 166 + } 167 + EXPORT_SYMBOL(netdev_switch_port_bridge_dellink); 168 + 169 + /** 170 + * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink 171 + * op for master devices 172 + * 173 + * @dev: port device 174 + * @nlh: netlink msg with bridge port attributes 175 + * @flags: bridge setlink flags 176 + * 177 + * Notify master device slaves of bridge port attributes 178 + */ 179 + int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, 180 + struct nlmsghdr *nlh, u16 flags) 181 + { 182 + struct net_device *lower_dev; 183 + struct list_head *iter; 184 + int ret = 0, err = 0; 185 + 186 + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) 187 + return ret; 188 + 189 + netdev_for_each_lower_dev(dev, lower_dev, iter) { 190 + err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags); 191 + if (err && err != -EOPNOTSUPP) 192 + ret = err; 193 + } 194 + 195 + return ret; 196 + } 197 + EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_setlink); 198 + 199 + /** 200 + * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink 201 + * op for master devices 202 + * 203 + * @dev: port device 204 + * @nlh: netlink msg with bridge port attributes 205 + * @flags: bridge dellink flags 206 + * 207 + * Notify master device slaves of bridge port attribute deletes 208 + */ 209 + int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, 210 + struct nlmsghdr *nlh, u16 flags) 211 + { 212 + struct net_device *lower_dev; 213 + struct list_head *iter; 214 + int ret = 0, err = 0; 215 + 216 + if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) 217 + return ret; 218 + 219 + netdev_for_each_lower_dev(dev, lower_dev, iter) { 220 + err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags); 221 + if (err && err != -EOPNOTSUPP) 222 + ret = err; 223 + } 224 + 225 + return ret; 226 + } 227 + EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink);