Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: Introduce generic failover module

The failover module provides a generic interface for paravirtual drivers
to register a netdev and a set of ops with a failover instance. The ops
are used as event handlers that get called to handle netdev register/
unregister/link change/name change events on slave pci ethernet devices
with the same mac address as the failover netdev.

This enables paravirtual drivers to use a VF as an accelerated low latency
datapath. It also allows migration of VMs with direct attached VFs by
failing over to the paravirtual datapath when the VF is unplugged.

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Sridhar Samudrala and committed by
David S. Miller
30c8bd5a cb160394

+407
+18
Documentation/networking/failover.rst
··· 1 + .. SPDX-License-Identifier: GPL-2.0 2 + 3 + ======== 4 + FAILOVER 5 + ======== 6 + 7 + Overview 8 + ======== 9 + 10 + The failover module provides a generic interface for paravirtual drivers 11 + to register a netdev and a set of ops with a failover instance. The ops 12 + are used as event handlers that get called to handle netdev register/ 13 + unregister/link change/name change events on slave pci ethernet devices 14 + with the same mac address as the failover netdev. 15 + 16 + This enables paravirtual drivers to use a VF as an accelerated low latency 17 + datapath. It also allows live migration of VMs with direct attached VFs by 18 + failing over to the paravirtual datapath when the VF is unplugged.
+8
MAINTAINERS
··· 5411 5411 F: Documentation/hwmon/f71805f 5412 5412 F: drivers/hwmon/f71805f.c 5413 5413 5414 + FAILOVER MODULE 5415 + M: Sridhar Samudrala <sridhar.samudrala@intel.com> 5416 + L: netdev@vger.kernel.org 5417 + S: Supported 5418 + F: net/core/failover.c 5419 + F: include/net/failover.h 5420 + F: Documentation/networking/failover.rst 5421 + 5414 5422 FANOTIFY 5415 5423 M: Jan Kara <jack@suse.cz> 5416 5424 R: Amir Goldstein <amir73il@gmail.com>
+16
include/linux/netdevice.h
··· 1425 1425 * entity (i.e. the master device for bridged veth) 1426 1426 * @IFF_MACSEC: device is a MACsec device 1427 1427 * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook 1428 + * @IFF_FAILOVER: device is a failover master device 1429 + * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device 1428 1430 */ 1429 1431 enum netdev_priv_flags { 1430 1432 IFF_802_1Q_VLAN = 1<<0, ··· 1456 1454 IFF_PHONY_HEADROOM = 1<<24, 1457 1455 IFF_MACSEC = 1<<25, 1458 1456 IFF_NO_RX_HANDLER = 1<<26, 1457 + IFF_FAILOVER = 1<<27, 1458 + IFF_FAILOVER_SLAVE = 1<<28, 1459 1459 }; 1460 1460 1461 1461 #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN ··· 1486 1482 #define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED 1487 1483 #define IFF_MACSEC IFF_MACSEC 1488 1484 #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER 1485 + #define IFF_FAILOVER IFF_FAILOVER 1486 + #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE 1489 1487 1490 1488 /** 1491 1489 * struct net_device - The DEVICE structure. ··· 4340 4334 static inline bool netif_is_rxfh_configured(const struct net_device *dev) 4341 4335 { 4342 4336 return dev->priv_flags & IFF_RXFH_CONFIGURED; 4337 + } 4338 + 4339 + static inline bool netif_is_failover(const struct net_device *dev) 4340 + { 4341 + return dev->priv_flags & IFF_FAILOVER; 4342 + } 4343 + 4344 + static inline bool netif_is_failover_slave(const struct net_device *dev) 4345 + { 4346 + return dev->priv_flags & IFF_FAILOVER_SLAVE; 4343 4347 } 4344 4348 4345 4349 /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
+36
include/net/failover.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (c) 2018, Intel Corporation. */ 3 + 4 + #ifndef _FAILOVER_H 5 + #define _FAILOVER_H 6 + 7 + #include <linux/netdevice.h> 8 + 9 + struct failover_ops { 10 + int (*slave_pre_register)(struct net_device *slave_dev, 11 + struct net_device *failover_dev); 12 + int (*slave_register)(struct net_device *slave_dev, 13 + struct net_device *failover_dev); 14 + int (*slave_pre_unregister)(struct net_device *slave_dev, 15 + struct net_device *failover_dev); 16 + int (*slave_unregister)(struct net_device *slave_dev, 17 + struct net_device *failover_dev); 18 + int (*slave_link_change)(struct net_device *slave_dev, 19 + struct net_device *failover_dev); 20 + int (*slave_name_change)(struct net_device *slave_dev, 21 + struct net_device *failover_dev); 22 + rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb); 23 + }; 24 + 25 + struct failover { 26 + struct list_head list; 27 + struct net_device __rcu *failover_dev; 28 + struct failover_ops __rcu *ops; 29 + }; 30 + 31 + struct failover *failover_register(struct net_device *dev, 32 + struct failover_ops *ops); 33 + void failover_unregister(struct failover *failover); 34 + int failover_slave_unregister(struct net_device *slave_dev); 35 + 36 + #endif /* _FAILOVER_H */
+13
net/Kconfig
··· 432 432 config PAGE_POOL 433 433 bool 434 434 435 + config FAILOVER 436 + tristate "Generic failover module" 437 + help 438 + The failover module provides a generic interface for paravirtual 439 + drivers to register a netdev and a set of ops with a failover 440 + instance. The ops are used as event handlers that get called to 441 + handle netdev register/unregister/link change/name change events 442 + on slave pci ethernet devices with the same mac address as the 443 + failover netdev. This enables paravirtual drivers to use a 444 + VF as an accelerated low latency datapath. It also allows live 445 + migration of VMs with direct attached VFs by failing over to the 446 + paravirtual datapath when the VF is unplugged. 447 + 435 448 endif # if NET 436 449 437 450 # Used by archs to tell that they support BPF JIT compiler plus which flavour.
+1
net/core/Makefile
··· 31 31 obj-$(CONFIG_HWBM) += hwbm.o 32 32 obj-$(CONFIG_NET_DEVLINK) += devlink.o 33 33 obj-$(CONFIG_GRO_CELLS) += gro_cells.o 34 + obj-$(CONFIG_FAILOVER) += failover.o
+315
net/core/failover.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2018, Intel Corporation. */ 3 + 4 + /* A common module to handle registrations and notifications for paravirtual 5 + * drivers to enable accelerated datapath and support VF live migration. 6 + * 7 + * The notifier and event handling code is based on netvsc driver. 8 + */ 9 + 10 + #include <linux/module.h> 11 + #include <linux/etherdevice.h> 12 + #include <uapi/linux/if_arp.h> 13 + #include <linux/rtnetlink.h> 14 + #include <linux/if_vlan.h> 15 + #include <net/failover.h> 16 + 17 + static LIST_HEAD(failover_list); 18 + static DEFINE_SPINLOCK(failover_lock); 19 + 20 + static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops) 21 + { 22 + struct net_device *failover_dev; 23 + struct failover *failover; 24 + 25 + spin_lock(&failover_lock); 26 + list_for_each_entry(failover, &failover_list, list) { 27 + failover_dev = rtnl_dereference(failover->failover_dev); 28 + if (ether_addr_equal(failover_dev->perm_addr, mac)) { 29 + *ops = rtnl_dereference(failover->ops); 30 + spin_unlock(&failover_lock); 31 + return failover_dev; 32 + } 33 + } 34 + spin_unlock(&failover_lock); 35 + return NULL; 36 + } 37 + 38 + /** 39 + * failover_slave_register - Register a slave netdev 40 + * 41 + * @slave_dev: slave netdev that is being registered 42 + * 43 + * Registers a slave device to a failover instance. Only ethernet devices 44 + * are supported. 45 + */ 46 + static int failover_slave_register(struct net_device *slave_dev) 47 + { 48 + struct netdev_lag_upper_info lag_upper_info; 49 + struct net_device *failover_dev; 50 + struct failover_ops *fops; 51 + int err; 52 + 53 + if (slave_dev->type != ARPHRD_ETHER) 54 + goto done; 55 + 56 + ASSERT_RTNL(); 57 + 58 + failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); 59 + if (!failover_dev) 60 + goto done; 61 + 62 + if (fops && fops->slave_pre_register && 63 + fops->slave_pre_register(slave_dev, failover_dev)) 64 + goto done; 65 + 66 + err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame, 67 + failover_dev); 68 + if (err) { 69 + netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n", 70 + err); 71 + goto done; 72 + } 73 + 74 + lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP; 75 + err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL, 76 + &lag_upper_info, NULL); 77 + if (err) { 78 + netdev_err(slave_dev, "can not set failover device %s (err = %d)\n", 79 + failover_dev->name, err); 80 + goto err_upper_link; 81 + } 82 + 83 + slave_dev->priv_flags |= IFF_FAILOVER_SLAVE; 84 + 85 + if (fops && fops->slave_register && 86 + !fops->slave_register(slave_dev, failover_dev)) 87 + return NOTIFY_OK; 88 + 89 + netdev_upper_dev_unlink(slave_dev, failover_dev); 90 + slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; 91 + err_upper_link: 92 + netdev_rx_handler_unregister(slave_dev); 93 + done: 94 + return NOTIFY_DONE; 95 + } 96 + 97 + /** 98 + * failover_slave_unregister - Unregister a slave netdev 99 + * 100 + * @slave_dev: slave netdev that is being unregistered 101 + * 102 + * Unregisters a slave device from a failover instance. 103 + */ 104 + int failover_slave_unregister(struct net_device *slave_dev) 105 + { 106 + struct net_device *failover_dev; 107 + struct failover_ops *fops; 108 + 109 + if (!netif_is_failover_slave(slave_dev)) 110 + goto done; 111 + 112 + ASSERT_RTNL(); 113 + 114 + failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); 115 + if (!failover_dev) 116 + goto done; 117 + 118 + if (fops && fops->slave_pre_unregister && 119 + fops->slave_pre_unregister(slave_dev, failover_dev)) 120 + goto done; 121 + 122 + netdev_rx_handler_unregister(slave_dev); 123 + netdev_upper_dev_unlink(slave_dev, failover_dev); 124 + slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; 125 + 126 + if (fops && fops->slave_unregister && 127 + !fops->slave_unregister(slave_dev, failover_dev)) 128 + return NOTIFY_OK; 129 + 130 + done: 131 + return NOTIFY_DONE; 132 + } 133 + EXPORT_SYMBOL_GPL(failover_slave_unregister); 134 + 135 + static int failover_slave_link_change(struct net_device *slave_dev) 136 + { 137 + struct net_device *failover_dev; 138 + struct failover_ops *fops; 139 + 140 + if (!netif_is_failover_slave(slave_dev)) 141 + goto done; 142 + 143 + ASSERT_RTNL(); 144 + 145 + failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); 146 + if (!failover_dev) 147 + goto done; 148 + 149 + if (!netif_running(failover_dev)) 150 + goto done; 151 + 152 + if (fops && fops->slave_link_change && 153 + !fops->slave_link_change(slave_dev, failover_dev)) 154 + return NOTIFY_OK; 155 + 156 + done: 157 + return NOTIFY_DONE; 158 + } 159 + 160 + static int failover_slave_name_change(struct net_device *slave_dev) 161 + { 162 + struct net_device *failover_dev; 163 + struct failover_ops *fops; 164 + 165 + if (!netif_is_failover_slave(slave_dev)) 166 + goto done; 167 + 168 + ASSERT_RTNL(); 169 + 170 + failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops); 171 + if (!failover_dev) 172 + goto done; 173 + 174 + if (!netif_running(failover_dev)) 175 + goto done; 176 + 177 + if (fops && fops->slave_name_change && 178 + !fops->slave_name_change(slave_dev, failover_dev)) 179 + return NOTIFY_OK; 180 + 181 + done: 182 + return NOTIFY_DONE; 183 + } 184 + 185 + static int 186 + failover_event(struct notifier_block *this, unsigned long event, void *ptr) 187 + { 188 + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 189 + 190 + /* Skip parent events */ 191 + if (netif_is_failover(event_dev)) 192 + return NOTIFY_DONE; 193 + 194 + switch (event) { 195 + case NETDEV_REGISTER: 196 + return failover_slave_register(event_dev); 197 + case NETDEV_UNREGISTER: 198 + return failover_slave_unregister(event_dev); 199 + case NETDEV_UP: 200 + case NETDEV_DOWN: 201 + case NETDEV_CHANGE: 202 + return failover_slave_link_change(event_dev); 203 + case NETDEV_CHANGENAME: 204 + return failover_slave_name_change(event_dev); 205 + default: 206 + return NOTIFY_DONE; 207 + } 208 + } 209 + 210 + static struct notifier_block failover_notifier = { 211 + .notifier_call = failover_event, 212 + }; 213 + 214 + static void 215 + failover_existing_slave_register(struct net_device *failover_dev) 216 + { 217 + struct net *net = dev_net(failover_dev); 218 + struct net_device *dev; 219 + 220 + rtnl_lock(); 221 + for_each_netdev(net, dev) { 222 + if (netif_is_failover(dev)) 223 + continue; 224 + if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) 225 + failover_slave_register(dev); 226 + } 227 + rtnl_unlock(); 228 + } 229 + 230 + /** 231 + * failover_register - Register a failover instance 232 + * 233 + * @dev: failover netdev 234 + * @ops: failover ops 235 + * 236 + * Allocate and register a failover instance for a failover netdev. ops 237 + * provides handlers for slave device register/unregister/link change/ 238 + * name change events. 239 + * 240 + * Return: pointer to failover instance 241 + */ 242 + struct failover *failover_register(struct net_device *dev, 243 + struct failover_ops *ops) 244 + { 245 + struct failover *failover; 246 + 247 + if (dev->type != ARPHRD_ETHER) 248 + return ERR_PTR(-EINVAL); 249 + 250 + failover = kzalloc(sizeof(*failover), GFP_KERNEL); 251 + if (!failover) 252 + return ERR_PTR(-ENOMEM); 253 + 254 + rcu_assign_pointer(failover->ops, ops); 255 + dev_hold(dev); 256 + dev->priv_flags |= IFF_FAILOVER; 257 + rcu_assign_pointer(failover->failover_dev, dev); 258 + 259 + spin_lock(&failover_lock); 260 + list_add_tail(&failover->list, &failover_list); 261 + spin_unlock(&failover_lock); 262 + 263 + netdev_info(dev, "failover master:%s registered\n", dev->name); 264 + 265 + failover_existing_slave_register(dev); 266 + 267 + return failover; 268 + } 269 + EXPORT_SYMBOL_GPL(failover_register); 270 + 271 + /** 272 + * failover_unregister - Unregister a failover instance 273 + * 274 + * @failover: pointer to failover instance 275 + * 276 + * Unregisters and frees a failover instance. 277 + */ 278 + void failover_unregister(struct failover *failover) 279 + { 280 + struct net_device *failover_dev; 281 + 282 + failover_dev = rcu_dereference(failover->failover_dev); 283 + 284 + netdev_info(failover_dev, "failover master:%s unregistered\n", 285 + failover_dev->name); 286 + 287 + failover_dev->priv_flags &= ~IFF_FAILOVER; 288 + dev_put(failover_dev); 289 + 290 + spin_lock(&failover_lock); 291 + list_del(&failover->list); 292 + spin_unlock(&failover_lock); 293 + 294 + kfree(failover); 295 + } 296 + EXPORT_SYMBOL_GPL(failover_unregister); 297 + 298 + static __init int 299 + failover_init(void) 300 + { 301 + register_netdevice_notifier(&failover_notifier); 302 + 303 + return 0; 304 + } 305 + module_init(failover_init); 306 + 307 + static __exit 308 + void failover_exit(void) 309 + { 310 + unregister_netdevice_notifier(&failover_notifier); 311 + } 312 + module_exit(failover_exit); 313 + 314 + MODULE_DESCRIPTION("Generic failover infrastructure/interface"); 315 + MODULE_LICENSE("GPL v2");