Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bonding: remap muticast addresses without using dev_close() and dev_open()

This patch fixes commit e36b9d16c6a6d0f59803b3ef04ff3c22c3844c10. The approach
there is to call dev_close()/dev_open() whenever the device type is changed in
order to remap the device IP multicast addresses to HW multicast addresses.
This approach suffers from 2 drawbacks:

*. It assumes tha the device is UP when calling dev_close(), or otherwise
dev_close() has no affect. It is worth to mention that initscripts (Redhat)
and sysconfig (Suse) doesn't act the same in this matter.
*. dev_close() has other side affects, like deleting entries from the routing
table, which might be unnecessary.

The fix here is to directly remap the IP multicast addresses to HW multicast
addresses for a bonding device that changes its type, and nothing else.

Reported-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Moni Shoua <monis@voltaire.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Moni Shoua and committed by
David S. Miller
75c78500 481a8199

+82 -6
+6 -3
drivers/net/bonding/bond_main.c
··· 1211 1211 write_unlock_bh(&bond->curr_slave_lock); 1212 1212 read_unlock(&bond->lock); 1213 1213 1214 - netdev_bonding_change(bond->dev); 1214 + netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER); 1215 1215 1216 1216 read_lock(&bond->lock); 1217 1217 write_lock_bh(&bond->curr_slave_lock); ··· 1469 1469 */ 1470 1470 if (bond->slave_cnt == 0) { 1471 1471 if (bond_dev->type != slave_dev->type) { 1472 - dev_close(bond_dev); 1473 1472 pr_debug("%s: change device type from %d to %d\n", 1474 1473 bond_dev->name, bond_dev->type, slave_dev->type); 1474 + 1475 + netdev_bonding_change(bond_dev, NETDEV_BONDING_OLDTYPE); 1476 + 1475 1477 if (slave_dev->type != ARPHRD_ETHER) 1476 1478 bond_setup_by_slave(bond_dev, slave_dev); 1477 1479 else 1478 1480 ether_setup(bond_dev); 1479 - dev_open(bond_dev); 1481 + 1482 + netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE); 1480 1483 } 1481 1484 } else if (bond_dev->type != slave_dev->type) { 1482 1485 pr_err(DRV_NAME ": %s ether type (%d) is different "
+2
include/linux/igmp.h
··· 233 233 extern void ip_mc_destroy_dev(struct in_device *); 234 234 extern void ip_mc_up(struct in_device *); 235 235 extern void ip_mc_down(struct in_device *); 236 + extern void ip_mc_unmap(struct in_device *); 237 + extern void ip_mc_remap(struct in_device *); 236 238 extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); 237 239 extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); 238 240 extern void ip_mc_rejoin_group(struct ip_mc_list *im);
+2 -1
include/linux/netdevice.h
··· 1873 1873 extern int dev_set_promiscuity(struct net_device *dev, int inc); 1874 1874 extern int dev_set_allmulti(struct net_device *dev, int inc); 1875 1875 extern void netdev_state_change(struct net_device *dev); 1876 - extern void netdev_bonding_change(struct net_device *dev); 1876 + extern void netdev_bonding_change(struct net_device *dev, 1877 + unsigned long event); 1877 1878 extern void netdev_features_change(struct net_device *dev); 1878 1879 /* Load a device via the kmod */ 1879 1880 extern void dev_load(struct net *net, const char *name);
+2
include/linux/notifier.h
··· 199 199 #define NETDEV_FEAT_CHANGE 0x000B 200 200 #define NETDEV_BONDING_FAILOVER 0x000C 201 201 #define NETDEV_PRE_UP 0x000D 202 + #define NETDEV_BONDING_OLDTYPE 0x000E 203 + #define NETDEV_BONDING_NEWTYPE 0x000F 202 204 203 205 #define SYS_DOWN 0x0001 /* Notify of system down */ 204 206 #define SYS_RESTART SYS_DOWN
+2
include/net/addrconf.h
··· 143 143 extern int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr); 144 144 extern void ipv6_mc_up(struct inet6_dev *idev); 145 145 extern void ipv6_mc_down(struct inet6_dev *idev); 146 + extern void ipv6_mc_unmap(struct inet6_dev *idev); 147 + extern void ipv6_mc_remap(struct inet6_dev *idev); 146 148 extern void ipv6_mc_init_dev(struct inet6_dev *idev); 147 149 extern void ipv6_mc_destroy_dev(struct inet6_dev *idev); 148 150 extern void addrconf_dad_failure(struct inet6_ifaddr *ifp);
+2 -2
net/core/dev.c
··· 1017 1017 } 1018 1018 EXPORT_SYMBOL(netdev_state_change); 1019 1019 1020 - void netdev_bonding_change(struct net_device *dev) 1020 + void netdev_bonding_change(struct net_device *dev, unsigned long event) 1021 1021 { 1022 - call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); 1022 + call_netdevice_notifiers(event, dev); 1023 1023 } 1024 1024 EXPORT_SYMBOL(netdev_bonding_change); 1025 1025
+6
net/ipv4/devinet.c
··· 1087 1087 case NETDEV_DOWN: 1088 1088 ip_mc_down(in_dev); 1089 1089 break; 1090 + case NETDEV_BONDING_OLDTYPE: 1091 + ip_mc_unmap(in_dev); 1092 + break; 1093 + case NETDEV_BONDING_NEWTYPE: 1094 + ip_mc_remap(in_dev); 1095 + break; 1090 1096 case NETDEV_CHANGEMTU: 1091 1097 if (inetdev_valid_mtu(dev->mtu)) 1092 1098 break;
+22
net/ipv4/igmp.c
··· 1298 1298 } 1299 1299 } 1300 1300 1301 + /* Device changing type */ 1302 + 1303 + void ip_mc_unmap(struct in_device *in_dev) 1304 + { 1305 + struct ip_mc_list *i; 1306 + 1307 + ASSERT_RTNL(); 1308 + 1309 + for (i = in_dev->mc_list; i; i = i->next) 1310 + igmp_group_dropped(i); 1311 + } 1312 + 1313 + void ip_mc_remap(struct in_device *in_dev) 1314 + { 1315 + struct ip_mc_list *i; 1316 + 1317 + ASSERT_RTNL(); 1318 + 1319 + for (i = in_dev->mc_list; i; i = i->next) 1320 + igmp_group_added(i); 1321 + } 1322 + 1301 1323 /* Device going down */ 1302 1324 1303 1325 void ip_mc_down(struct in_device *in_dev)
+19
net/ipv6/addrconf.c
··· 137 137 static void addrconf_join_anycast(struct inet6_ifaddr *ifp); 138 138 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); 139 139 140 + static void addrconf_bonding_change(struct net_device *dev, 141 + unsigned long event); 140 142 static int addrconf_ifdown(struct net_device *dev, int how); 141 143 142 144 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); ··· 2584 2582 return notifier_from_errno(err); 2585 2583 } 2586 2584 break; 2585 + case NETDEV_BONDING_OLDTYPE: 2586 + case NETDEV_BONDING_NEWTYPE: 2587 + addrconf_bonding_change(dev, event); 2588 + break; 2587 2589 } 2588 2590 2589 2591 return NOTIFY_OK; ··· 2600 2594 .notifier_call = addrconf_notify, 2601 2595 .priority = 0 2602 2596 }; 2597 + 2598 + static void addrconf_bonding_change(struct net_device *dev, unsigned long event) 2599 + { 2600 + struct inet6_dev *idev; 2601 + ASSERT_RTNL(); 2602 + 2603 + idev = __in6_dev_get(dev); 2604 + 2605 + if (event == NETDEV_BONDING_NEWTYPE) 2606 + ipv6_mc_remap(idev); 2607 + else if (event == NETDEV_BONDING_OLDTYPE) 2608 + ipv6_mc_unmap(idev); 2609 + } 2603 2610 2604 2611 static int addrconf_ifdown(struct net_device *dev, int how) 2605 2612 {
+19
net/ipv6/mcast.c
··· 2249 2249 ma_put(ma); 2250 2250 } 2251 2251 2252 + /* Device changing type */ 2253 + 2254 + void ipv6_mc_unmap(struct inet6_dev *idev) 2255 + { 2256 + struct ifmcaddr6 *i; 2257 + 2258 + /* Install multicast list, except for all-nodes (already installed) */ 2259 + 2260 + read_lock_bh(&idev->lock); 2261 + for (i = idev->mc_list; i; i = i->next) 2262 + igmp6_group_dropped(i); 2263 + read_unlock_bh(&idev->lock); 2264 + } 2265 + 2266 + void ipv6_mc_remap(struct inet6_dev *idev) 2267 + { 2268 + ipv6_mc_up(idev); 2269 + } 2270 + 2252 2271 /* Device going down */ 2253 2272 2254 2273 void ipv6_mc_down(struct inet6_dev *idev)