Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bonding: attempt to better support longer hw addresses

People are using bonding over Infiniband IPoIB connections, and who knows
what else. Infiniband has a hardware address length of 20 octets
(INFINIBAND_ALEN), and the network core defines a MAX_ADDR_LEN of 32.
Various places in the bonding code are currently hard-wired to 6 octets
(ETH_ALEN), such as the 3ad code, which I've left untouched here. Besides,
only alb is currently possible on Infiniband links right now anyway, due
to commit 1533e7731522, so the alb code is where most of the changes are.

One major component of this change is the addition of a bond_hw_addr_copy
function that takes a length argument, instead of using ether_addr_copy
everywhere that hardware addresses need to be copied about. The other
major component of this change is converting the bonding code from using
struct sockaddr for address storage to struct sockaddr_storage, as the
former has an address storage space of only 14, while the latter is 128
minus a few, which is necessary to support bonding over device with up to
MAX_ADDR_LEN octet hardware addresses. Additionally, this probably fixes
up some memory corruption issues with the current code, where it's
possible to write an infiniband hardware address into a sockaddr declared
on the stack.

Lightly tested on a dual mlx4 IPoIB setup, which properly shows a 20-octet
hardware address now:

$ cat /proc/net/bonding/bond0
Ethernet Channel Bonding Driver: v3.7.1 (April 27, 2011)

Bonding Mode: fault-tolerance (active-backup) (fail_over_mac active)
Primary Slave: mlx4_ib0 (primary_reselect always)
Currently Active Slave: mlx4_ib0
MII Status: up
MII Polling Interval (ms): 100
Up Delay (ms): 100
Down Delay (ms): 100

Slave Interface: mlx4_ib0
MII Status: up
Speed: Unknown
Duplex: Unknown
Link Failure Count: 0
Permanent HW addr:
80:00:02:08:fe:80:00:00:00:00:00:00:e4:1d:2d:03:00:1d:67:01
Slave queue ID: 0

Slave Interface: mlx4_ib1
MII Status: up
Speed: Unknown
Duplex: Unknown
Link Failure Count: 0
Permanent HW addr:
80:00:02:09:fe:80:00:00:00:00:00:01:e4:1d:2d:03:00:1d:67:02
Slave queue ID: 0

Also tested with a standard 1Gbps NIC bonding setup (with a mix of
e1000 and e1000e cards), running LNST's bonding tests.

CC: Jay Vosburgh <j.vosburgh@gmail.com>
CC: Veaceslav Falico <vfalico@gmail.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jarod Wilson and committed by
David S. Miller
faeeb317 148cbab6

+108 -68
+53 -35
drivers/net/bonding/bond_alb.c
··· 687 687 /* the arp must be sent on the selected rx channel */ 688 688 tx_slave = rlb_choose_channel(skb, bond); 689 689 if (tx_slave) 690 - ether_addr_copy(arp->mac_src, tx_slave->dev->dev_addr); 690 + bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, 691 + tx_slave->dev->addr_len); 691 692 netdev_dbg(bond->dev, "Server sent ARP Reply packet\n"); 692 693 } else if (arp->op_code == htons(ARPOP_REQUEST)) { 693 694 /* Create an entry in the rx_hashtbl for this client as a ··· 1018 1017 rcu_read_unlock(); 1019 1018 } 1020 1019 1021 - static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[]) 1020 + static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], 1021 + unsigned int len) 1022 1022 { 1023 1023 struct net_device *dev = slave->dev; 1024 - struct sockaddr s_addr; 1024 + struct sockaddr_storage ss; 1025 1025 1026 1026 if (BOND_MODE(slave->bond) == BOND_MODE_TLB) { 1027 - memcpy(dev->dev_addr, addr, dev->addr_len); 1027 + memcpy(dev->dev_addr, addr, len); 1028 1028 return 0; 1029 1029 } 1030 1030 1031 1031 /* for rlb each slave must have a unique hw mac addresses so that 1032 1032 * each slave will receive packets destined to a different mac 1033 1033 */ 1034 - memcpy(s_addr.sa_data, addr, dev->addr_len); 1035 - s_addr.sa_family = dev->type; 1036 - if (dev_set_mac_address(dev, &s_addr)) { 1034 + memcpy(ss.__data, addr, len); 1035 + ss.ss_family = dev->type; 1036 + if (dev_set_mac_address(dev, (struct sockaddr *)&ss)) { 1037 1037 netdev_err(slave->bond->dev, "dev_set_mac_address of dev %s failed! ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n", 1038 1038 dev->name); 1039 1039 return -EOPNOTSUPP; ··· 1048 1046 */ 1049 1047 static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2) 1050 1048 { 1051 - u8 tmp_mac_addr[ETH_ALEN]; 1049 + u8 tmp_mac_addr[MAX_ADDR_LEN]; 1052 1050 1053 - ether_addr_copy(tmp_mac_addr, slave1->dev->dev_addr); 1054 - alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr); 1055 - alb_set_slave_mac_addr(slave2, tmp_mac_addr); 1051 + bond_hw_addr_copy(tmp_mac_addr, slave1->dev->dev_addr, 1052 + slave1->dev->addr_len); 1053 + alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, 1054 + slave2->dev->addr_len); 1055 + alb_set_slave_mac_addr(slave2, tmp_mac_addr, 1056 + slave1->dev->addr_len); 1056 1057 1057 1058 } 1058 1059 ··· 1182 1177 /* Try setting slave mac to bond address and fall-through 1183 1178 * to code handling that situation below... 1184 1179 */ 1185 - alb_set_slave_mac_addr(slave, bond->dev->dev_addr); 1180 + alb_set_slave_mac_addr(slave, bond->dev->dev_addr, 1181 + bond->dev->addr_len); 1186 1182 } 1187 1183 1188 1184 /* The slave's address is equal to the address of the bond. ··· 1208 1202 } 1209 1203 1210 1204 if (free_mac_slave) { 1211 - alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr); 1205 + alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr, 1206 + free_mac_slave->dev->addr_len); 1212 1207 1213 1208 netdev_warn(bond->dev, "the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", 1214 1209 slave->dev->name, free_mac_slave->dev->name); ··· 1241 1234 { 1242 1235 struct slave *slave, *rollback_slave; 1243 1236 struct list_head *iter; 1244 - struct sockaddr sa; 1245 - char tmp_addr[ETH_ALEN]; 1237 + struct sockaddr_storage ss; 1238 + char tmp_addr[MAX_ADDR_LEN]; 1246 1239 int res; 1247 1240 1248 1241 if (bond->alb_info.rlb_enabled) ··· 1250 1243 1251 1244 bond_for_each_slave(bond, slave, iter) { 1252 1245 /* save net_device's current hw address */ 1253 - ether_addr_copy(tmp_addr, slave->dev->dev_addr); 1246 + bond_hw_addr_copy(tmp_addr, slave->dev->dev_addr, 1247 + slave->dev->addr_len); 1254 1248 1255 1249 res = dev_set_mac_address(slave->dev, addr); 1256 1250 1257 1251 /* restore net_device's hw address */ 1258 - ether_addr_copy(slave->dev->dev_addr, tmp_addr); 1252 + bond_hw_addr_copy(slave->dev->dev_addr, tmp_addr, 1253 + slave->dev->addr_len); 1259 1254 1260 1255 if (res) 1261 1256 goto unwind; ··· 1266 1257 return 0; 1267 1258 1268 1259 unwind: 1269 - memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); 1270 - sa.sa_family = bond->dev->type; 1260 + memcpy(ss.__data, bond->dev->dev_addr, bond->dev->addr_len); 1261 + ss.ss_family = bond->dev->type; 1271 1262 1272 1263 /* unwind from head to the slave that failed */ 1273 1264 bond_for_each_slave(bond, rollback_slave, iter) { 1274 1265 if (rollback_slave == slave) 1275 1266 break; 1276 - ether_addr_copy(tmp_addr, rollback_slave->dev->dev_addr); 1277 - dev_set_mac_address(rollback_slave->dev, &sa); 1278 - ether_addr_copy(rollback_slave->dev->dev_addr, tmp_addr); 1267 + bond_hw_addr_copy(tmp_addr, rollback_slave->dev->dev_addr, 1268 + rollback_slave->dev->addr_len); 1269 + dev_set_mac_address(rollback_slave->dev, 1270 + (struct sockaddr *)&ss); 1271 + bond_hw_addr_copy(rollback_slave->dev->dev_addr, tmp_addr, 1272 + rollback_slave->dev->addr_len); 1279 1273 } 1280 1274 1281 1275 return res; ··· 1594 1582 { 1595 1583 int res; 1596 1584 1597 - res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr); 1585 + res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr, 1586 + slave->dev->addr_len); 1598 1587 if (res) 1599 1588 return res; 1600 1589 ··· 1709 1696 * and thus filter bond->dev_addr's packets, so force bond's mac 1710 1697 */ 1711 1698 if (BOND_MODE(bond) == BOND_MODE_TLB) { 1712 - struct sockaddr sa; 1713 - u8 tmp_addr[ETH_ALEN]; 1699 + struct sockaddr_storage ss; 1700 + u8 tmp_addr[MAX_ADDR_LEN]; 1714 1701 1715 - ether_addr_copy(tmp_addr, new_slave->dev->dev_addr); 1702 + bond_hw_addr_copy(tmp_addr, new_slave->dev->dev_addr, 1703 + new_slave->dev->addr_len); 1716 1704 1717 - memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); 1718 - sa.sa_family = bond->dev->type; 1705 + bond_hw_addr_copy(ss.__data, bond->dev->dev_addr, 1706 + bond->dev->addr_len); 1707 + ss.ss_family = bond->dev->type; 1719 1708 /* we don't care if it can't change its mac, best effort */ 1720 - dev_set_mac_address(new_slave->dev, &sa); 1709 + dev_set_mac_address(new_slave->dev, (struct sockaddr *)&ss); 1721 1710 1722 - ether_addr_copy(new_slave->dev->dev_addr, tmp_addr); 1711 + bond_hw_addr_copy(new_slave->dev->dev_addr, tmp_addr, 1712 + new_slave->dev->addr_len); 1723 1713 } 1724 1714 1725 1715 /* curr_active_slave must be set before calling alb_swap_mac_addr */ ··· 1732 1716 alb_fasten_mac_swap(bond, swap_slave, new_slave); 1733 1717 } else { 1734 1718 /* set the new_slave to the bond mac address */ 1735 - alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr); 1719 + alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr, 1720 + bond->dev->addr_len); 1736 1721 alb_send_learning_packets(new_slave, bond->dev->dev_addr, 1737 1722 false); 1738 1723 } ··· 1743 1726 int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) 1744 1727 { 1745 1728 struct bonding *bond = netdev_priv(bond_dev); 1746 - struct sockaddr *sa = addr; 1729 + struct sockaddr_storage *ss = addr; 1747 1730 struct slave *curr_active; 1748 1731 struct slave *swap_slave; 1749 1732 int res; 1750 1733 1751 - if (!is_valid_ether_addr(sa->sa_data)) 1734 + if (!is_valid_ether_addr(ss->__data)) 1752 1735 return -EADDRNOTAVAIL; 1753 1736 1754 1737 res = alb_set_mac_address(bond, addr); 1755 1738 if (res) 1756 1739 return res; 1757 1740 1758 - memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 1741 + bond_hw_addr_copy(bond_dev->dev_addr, ss->__data, bond_dev->addr_len); 1759 1742 1760 1743 /* If there is no curr_active_slave there is nothing else to do. 1761 1744 * Otherwise we'll need to pass the new address to it and handle ··· 1771 1754 alb_swap_mac_addr(swap_slave, curr_active); 1772 1755 alb_fasten_mac_swap(bond, swap_slave, curr_active); 1773 1756 } else { 1774 - alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr); 1757 + alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr, 1758 + bond_dev->addr_len); 1775 1759 1776 1760 alb_send_learning_packets(curr_active, 1777 1761 bond_dev->dev_addr, false);
+42 -31
drivers/net/bonding/bond_main.c
··· 645 645 struct slave *new_active, 646 646 struct slave *old_active) 647 647 { 648 - u8 tmp_mac[ETH_ALEN]; 649 - struct sockaddr saddr; 648 + u8 tmp_mac[MAX_ADDR_LEN]; 649 + struct sockaddr_storage ss; 650 650 int rv; 651 651 652 652 switch (bond->params.fail_over_mac) { ··· 666 666 old_active = bond_get_old_active(bond, new_active); 667 667 668 668 if (old_active) { 669 - ether_addr_copy(tmp_mac, new_active->dev->dev_addr); 670 - ether_addr_copy(saddr.sa_data, 671 - old_active->dev->dev_addr); 672 - saddr.sa_family = new_active->dev->type; 669 + bond_hw_addr_copy(tmp_mac, new_active->dev->dev_addr, 670 + new_active->dev->addr_len); 671 + bond_hw_addr_copy(ss.__data, 672 + old_active->dev->dev_addr, 673 + old_active->dev->addr_len); 674 + ss.ss_family = new_active->dev->type; 673 675 } else { 674 - ether_addr_copy(saddr.sa_data, bond->dev->dev_addr); 675 - saddr.sa_family = bond->dev->type; 676 + bond_hw_addr_copy(ss.__data, bond->dev->dev_addr, 677 + bond->dev->addr_len); 678 + ss.ss_family = bond->dev->type; 676 679 } 677 680 678 - rv = dev_set_mac_address(new_active->dev, &saddr); 681 + rv = dev_set_mac_address(new_active->dev, 682 + (struct sockaddr *)&ss); 679 683 if (rv) { 680 684 netdev_err(bond->dev, "Error %d setting MAC of slave %s\n", 681 685 -rv, new_active->dev->name); ··· 689 685 if (!old_active) 690 686 goto out; 691 687 692 - ether_addr_copy(saddr.sa_data, tmp_mac); 693 - saddr.sa_family = old_active->dev->type; 688 + bond_hw_addr_copy(ss.__data, tmp_mac, 689 + new_active->dev->addr_len); 690 + ss.ss_family = old_active->dev->type; 694 691 695 - rv = dev_set_mac_address(old_active->dev, &saddr); 692 + rv = dev_set_mac_address(old_active->dev, 693 + (struct sockaddr *)&ss); 696 694 if (rv) 697 695 netdev_err(bond->dev, "Error %d setting MAC of slave %s\n", 698 696 -rv, new_active->dev->name); ··· 1190 1184 kfree_skb(skb); 1191 1185 return RX_HANDLER_CONSUMED; 1192 1186 } 1193 - ether_addr_copy(eth_hdr(skb)->h_dest, bond->dev->dev_addr); 1187 + bond_hw_addr_copy(eth_hdr(skb)->h_dest, bond->dev->dev_addr, 1188 + bond->dev->addr_len); 1194 1189 } 1195 1190 1196 1191 return ret; ··· 1330 1323 struct bonding *bond = netdev_priv(bond_dev); 1331 1324 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 1332 1325 struct slave *new_slave = NULL, *prev_slave; 1333 - struct sockaddr addr; 1326 + struct sockaddr_storage ss; 1334 1327 int link_reporting; 1335 1328 int res = 0, i; 1336 1329 ··· 1481 1474 * that need it, and for restoring it upon release, and then 1482 1475 * set it to the master's address 1483 1476 */ 1484 - ether_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr); 1477 + bond_hw_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr, 1478 + slave_dev->addr_len); 1485 1479 1486 1480 if (!bond->params.fail_over_mac || 1487 1481 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 1488 1482 /* Set slave to master's mac address. The application already 1489 1483 * set the master's mac address to that of the first slave 1490 1484 */ 1491 - memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 1492 - addr.sa_family = slave_dev->type; 1493 - res = dev_set_mac_address(slave_dev, &addr); 1485 + memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len); 1486 + ss.ss_family = slave_dev->type; 1487 + res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss); 1494 1488 if (res) { 1495 1489 netdev_dbg(bond_dev, "Error %d calling set_mac_address\n", res); 1496 1490 goto err_restore_mtu; ··· 1775 1767 * MAC if this slave's MAC is in use by the bond, or at 1776 1768 * least print a warning. 1777 1769 */ 1778 - ether_addr_copy(addr.sa_data, new_slave->perm_hwaddr); 1779 - addr.sa_family = slave_dev->type; 1780 - dev_set_mac_address(slave_dev, &addr); 1770 + bond_hw_addr_copy(ss.__data, new_slave->perm_hwaddr, 1771 + new_slave->dev->addr_len); 1772 + ss.ss_family = slave_dev->type; 1773 + dev_set_mac_address(slave_dev, (struct sockaddr *)&ss); 1781 1774 } 1782 1775 1783 1776 err_restore_mtu: ··· 1821 1812 { 1822 1813 struct bonding *bond = netdev_priv(bond_dev); 1823 1814 struct slave *slave, *oldcurrent; 1824 - struct sockaddr addr; 1815 + struct sockaddr_storage ss; 1825 1816 int old_flags = bond_dev->flags; 1826 1817 netdev_features_t old_features = bond_dev->features; 1827 1818 ··· 1956 1947 if (bond->params.fail_over_mac != BOND_FOM_ACTIVE || 1957 1948 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 1958 1949 /* restore original ("permanent") mac address */ 1959 - ether_addr_copy(addr.sa_data, slave->perm_hwaddr); 1960 - addr.sa_family = slave_dev->type; 1961 - dev_set_mac_address(slave_dev, &addr); 1950 + bond_hw_addr_copy(ss.__data, slave->perm_hwaddr, 1951 + slave->dev->addr_len); 1952 + ss.ss_family = slave_dev->type; 1953 + dev_set_mac_address(slave_dev, (struct sockaddr *)&ss); 1962 1954 } 1963 1955 1964 1956 dev_set_mtu(slave_dev, slave->original_mtu); ··· 3636 3626 { 3637 3627 struct bonding *bond = netdev_priv(bond_dev); 3638 3628 struct slave *slave, *rollback_slave; 3639 - struct sockaddr *sa = addr, tmp_sa; 3629 + struct sockaddr_storage *ss = addr, tmp_ss; 3640 3630 struct list_head *iter; 3641 3631 int res = 0; 3642 3632 ··· 3653 3643 BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) 3654 3644 return 0; 3655 3645 3656 - if (!is_valid_ether_addr(sa->sa_data)) 3646 + if (!is_valid_ether_addr(ss->__data)) 3657 3647 return -EADDRNOTAVAIL; 3658 3648 3659 3649 bond_for_each_slave(bond, slave, iter) { ··· 3672 3662 } 3673 3663 3674 3664 /* success */ 3675 - memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 3665 + memcpy(bond_dev->dev_addr, ss->__data, bond_dev->addr_len); 3676 3666 return 0; 3677 3667 3678 3668 unwind: 3679 - memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 3680 - tmp_sa.sa_family = bond_dev->type; 3669 + memcpy(tmp_ss.__data, bond_dev->dev_addr, bond_dev->addr_len); 3670 + tmp_ss.ss_family = bond_dev->type; 3681 3671 3682 3672 /* unwind from head to the slave that failed */ 3683 3673 bond_for_each_slave(bond, rollback_slave, iter) { ··· 3686 3676 if (rollback_slave == slave) 3687 3677 break; 3688 3678 3689 - tmp_res = dev_set_mac_address(rollback_slave->dev, &tmp_sa); 3679 + tmp_res = dev_set_mac_address(rollback_slave->dev, 3680 + (struct sockaddr *)&tmp_ss); 3690 3681 if (tmp_res) { 3691 3682 netdev_dbg(bond_dev, "unwind err %d dev %s\n", 3692 3683 tmp_res, rollback_slave->dev->name);
+2 -1
drivers/net/bonding/bond_procfs.c
··· 183 183 seq_printf(seq, "Link Failure Count: %u\n", 184 184 slave->link_failure_count); 185 185 186 - seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr); 186 + seq_printf(seq, "Permanent HW addr: %*phC\n", 187 + slave->dev->addr_len, slave->perm_hwaddr); 187 188 seq_printf(seq, "Slave queue ID: %d\n", slave->queue_id); 188 189 189 190 if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+11 -1
include/net/bonding.h
··· 166 166 u32 link_failure_count; 167 167 u32 speed; 168 168 u16 queue_id; 169 - u8 perm_hwaddr[ETH_ALEN]; 169 + u8 perm_hwaddr[MAX_ADDR_LEN]; 170 170 struct ad_slave_info *ad_info; 171 171 struct tlb_slave_info tlb_info; 172 172 #ifdef CONFIG_NET_POLL_CONTROLLER ··· 400 400 { 401 401 return bond_slave_is_up(slave) && slave->link == BOND_LINK_UP && 402 402 bond_is_active_slave(slave); 403 + } 404 + 405 + static inline void bond_hw_addr_copy(u8 *dst, const u8 *src, unsigned int len) 406 + { 407 + if (len == ETH_ALEN) { 408 + ether_addr_copy(dst, src); 409 + return; 410 + } 411 + 412 + memcpy(dst, src, len); 403 413 } 404 414 405 415 #define BOND_PRI_RESELECT_ALWAYS 0