Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bonding: Add independent control state machine

Add support for the independent control state machine per IEEE
802.1AX-2008 5.4.15 in addition to the existing implementation of the
coupled control state machine.

Introduces two new states, AD_MUX_COLLECTING and AD_MUX_DISTRIBUTING in
the LACP MUX state machine for separated handling of an initial
Collecting state before the Collecting and Distributing state. This
enables a port to be in a state where it can receive incoming packets
while not still distributing. This is useful for reducing packet loss when
a port begins distributing before its partner is able to collect.

Added new functions such as bond_set_slave_tx_disabled_flags and
bond_set_slave_rx_enabled_flags to precisely manage the port's collecting
and distributing states. Previously, there was no dedicated method to
disable TX while keeping RX enabled, which this patch addresses.

Note that the regular flow process in the kernel's bonding driver remains
unaffected by this patch. The extension requires explicit opt-in by the
user (in order to ensure no disruptions for existing setups) via netlink
support using the new bonding parameter coupled_control. The default value
for coupled_control is set to 1 so as to preserve existing behaviour.

Signed-off-by: Aahil Awatramani <aahila@google.com>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20240202175858.1573852-1-aahila@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Aahil Awatramani and committed by
Paolo Abeni
240fd405 04006731

+234 -8
+12
Documentation/networking/bonding.rst
··· 444 444 445 445 The default value is 2, and the allowable range is 1 - 255. 446 446 447 + coupled_control 448 + 449 + Specifies whether the LACP state machine's MUX in the 802.3ad mode 450 + should have separate Collecting and Distributing states. 451 + 452 + This is by implementing the independent control state machine per 453 + IEEE 802.1AX-2008 5.4.15 in addition to the existing coupled control 454 + state machine. 455 + 456 + The default value is 1. This setting does not separate the Collecting 457 + and Distributing states, maintaining the bond in coupled control. 458 + 447 459 downdelay 448 460 449 461 Specifies the time, in milliseconds, to wait before disabling
+150 -7
drivers/net/bonding/bond_3ad.c
··· 106 106 static void ad_clear_agg(struct aggregator *aggregator); 107 107 static void ad_initialize_agg(struct aggregator *aggregator); 108 108 static void ad_initialize_port(struct port *port, int lacp_fast); 109 + static void ad_enable_collecting(struct port *port); 110 + static void ad_disable_distributing(struct port *port, 111 + bool *update_slave_arr); 109 112 static void ad_enable_collecting_distributing(struct port *port, 110 113 bool *update_slave_arr); 111 114 static void ad_disable_collecting_distributing(struct port *port, ··· 175 172 } 176 173 177 174 /** 175 + * __disable_distributing_port - disable the port's slave for distributing. 176 + * Port will still be able to collect. 177 + * @port: the port we're looking at 178 + * 179 + * This will disable only distributing on the port's slave. 180 + */ 181 + static void __disable_distributing_port(struct port *port) 182 + { 183 + bond_set_slave_tx_disabled_flags(port->slave, BOND_SLAVE_NOTIFY_LATER); 184 + } 185 + 186 + /** 187 + * __enable_collecting_port - enable the port's slave for collecting, 188 + * if it's up 189 + * @port: the port we're looking at 190 + * 191 + * This will enable only collecting on the port's slave. 192 + */ 193 + static void __enable_collecting_port(struct port *port) 194 + { 195 + struct slave *slave = port->slave; 196 + 197 + if (slave->link == BOND_LINK_UP && bond_slave_is_up(slave)) 198 + bond_set_slave_rx_enabled_flags(slave, BOND_SLAVE_NOTIFY_LATER); 199 + } 200 + 201 + /** 178 202 * __disable_port - disable the port's slave 179 203 * @port: the port we're looking at 204 + * 205 + * This will disable both collecting and distributing on the port's slave. 180 206 */ 181 207 static inline void __disable_port(struct port *port) 182 208 { ··· 215 183 /** 216 184 * __enable_port - enable the port's slave, if it's up 217 185 * @port: the port we're looking at 186 + * 187 + * This will enable both collecting and distributing on the port's slave. 218 188 */ 219 189 static inline void __enable_port(struct port *port) 220 190 { ··· 227 193 } 228 194 229 195 /** 230 - * __port_is_enabled - check if the port's slave is in active state 196 + * __port_move_to_attached_state - check if port should transition back to attached 197 + * state. 231 198 * @port: the port we're looking at 232 199 */ 233 - static inline int __port_is_enabled(struct port *port) 200 + static bool __port_move_to_attached_state(struct port *port) 201 + { 202 + if (!(port->sm_vars & AD_PORT_SELECTED) || 203 + (port->sm_vars & AD_PORT_STANDBY) || 204 + !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || 205 + !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) 206 + port->sm_mux_state = AD_MUX_ATTACHED; 207 + 208 + return port->sm_mux_state == AD_MUX_ATTACHED; 209 + } 210 + 211 + /** 212 + * __port_is_collecting_distributing - check if the port's slave is in the 213 + * combined collecting/distributing state 214 + * @port: the port we're looking at 215 + */ 216 + static int __port_is_collecting_distributing(struct port *port) 234 217 { 235 218 return bond_is_active_slave(port->slave); 236 219 } ··· 993 942 */ 994 943 static void ad_mux_machine(struct port *port, bool *update_slave_arr) 995 944 { 945 + struct bonding *bond = __get_bond_by_port(port); 996 946 mux_states_t last_state; 997 947 998 948 /* keep current State Machine state to compare later if it was ··· 1051 999 if ((port->sm_vars & AD_PORT_SELECTED) && 1052 1000 (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && 1053 1001 !__check_agg_selection_timer(port)) { 1054 - if (port->aggregator->is_active) 1055 - port->sm_mux_state = 1056 - AD_MUX_COLLECTING_DISTRIBUTING; 1002 + if (port->aggregator->is_active) { 1003 + int state = AD_MUX_COLLECTING_DISTRIBUTING; 1004 + 1005 + if (!bond->params.coupled_control) 1006 + state = AD_MUX_COLLECTING; 1007 + port->sm_mux_state = state; 1008 + } 1057 1009 } else if (!(port->sm_vars & AD_PORT_SELECTED) || 1058 1010 (port->sm_vars & AD_PORT_STANDBY)) { 1059 1011 /* if UNSELECTED or STANDBY */ ··· 1075 1019 } 1076 1020 break; 1077 1021 case AD_MUX_COLLECTING_DISTRIBUTING: 1022 + if (!__port_move_to_attached_state(port)) { 1023 + /* if port state hasn't changed make 1024 + * sure that a collecting distributing 1025 + * port in an active aggregator is enabled 1026 + */ 1027 + if (port->aggregator->is_active && 1028 + !__port_is_collecting_distributing(port)) { 1029 + __enable_port(port); 1030 + *update_slave_arr = true; 1031 + } 1032 + } 1033 + break; 1034 + case AD_MUX_COLLECTING: 1035 + if (!__port_move_to_attached_state(port)) { 1036 + if ((port->sm_vars & AD_PORT_SELECTED) && 1037 + (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && 1038 + (port->partner_oper.port_state & LACP_STATE_COLLECTING)) { 1039 + port->sm_mux_state = AD_MUX_DISTRIBUTING; 1040 + } else { 1041 + /* If port state hasn't changed, make sure that a collecting 1042 + * port is enabled for an active aggregator. 1043 + */ 1044 + struct slave *slave = port->slave; 1045 + 1046 + if (port->aggregator->is_active && 1047 + bond_is_slave_rx_disabled(slave)) { 1048 + ad_enable_collecting(port); 1049 + *update_slave_arr = true; 1050 + } 1051 + } 1052 + } 1053 + break; 1054 + case AD_MUX_DISTRIBUTING: 1078 1055 if (!(port->sm_vars & AD_PORT_SELECTED) || 1079 1056 (port->sm_vars & AD_PORT_STANDBY) || 1057 + !(port->partner_oper.port_state & LACP_STATE_COLLECTING) || 1080 1058 !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || 1081 1059 !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) { 1082 - port->sm_mux_state = AD_MUX_ATTACHED; 1060 + port->sm_mux_state = AD_MUX_COLLECTING; 1083 1061 } else { 1084 1062 /* if port state hasn't changed make 1085 1063 * sure that a collecting distributing ··· 1121 1031 */ 1122 1032 if (port->aggregator && 1123 1033 port->aggregator->is_active && 1124 - !__port_is_enabled(port)) { 1034 + !__port_is_collecting_distributing(port)) { 1125 1035 __enable_port(port); 1126 1036 *update_slave_arr = true; 1127 1037 } ··· 1171 1081 ad_enable_collecting_distributing(port, 1172 1082 update_slave_arr); 1173 1083 port->ntt = true; 1084 + break; 1085 + case AD_MUX_COLLECTING: 1086 + port->actor_oper_port_state |= LACP_STATE_COLLECTING; 1087 + port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; 1088 + port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; 1089 + ad_enable_collecting(port); 1090 + ad_disable_distributing(port, update_slave_arr); 1091 + port->ntt = true; 1092 + break; 1093 + case AD_MUX_DISTRIBUTING: 1094 + port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING; 1095 + port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; 1096 + ad_enable_collecting_distributing(port, 1097 + update_slave_arr); 1174 1098 break; 1175 1099 default: 1176 1100 break; ··· 2007 1903 port->churn_partner_count = 0; 2008 1904 2009 1905 memcpy(&port->lacpdu, &lacpdu, sizeof(lacpdu)); 1906 + } 1907 + } 1908 + 1909 + /** 1910 + * ad_enable_collecting - enable a port's receive 1911 + * @port: the port we're looking at 1912 + * 1913 + * Enable @port if it's in an active aggregator 1914 + */ 1915 + static void ad_enable_collecting(struct port *port) 1916 + { 1917 + if (port->aggregator->is_active) { 1918 + struct slave *slave = port->slave; 1919 + 1920 + slave_dbg(slave->bond->dev, slave->dev, 1921 + "Enabling collecting on port %d (LAG %d)\n", 1922 + port->actor_port_number, 1923 + port->aggregator->aggregator_identifier); 1924 + __enable_collecting_port(port); 1925 + } 1926 + } 1927 + 1928 + /** 1929 + * ad_disable_distributing - disable a port's transmit 1930 + * @port: the port we're looking at 1931 + * @update_slave_arr: Does slave array need update? 1932 + */ 1933 + static void ad_disable_distributing(struct port *port, bool *update_slave_arr) 1934 + { 1935 + if (port->aggregator && 1936 + !MAC_ADDRESS_EQUAL(&port->aggregator->partner_system, 1937 + &(null_mac_addr))) { 1938 + slave_dbg(port->slave->bond->dev, port->slave->dev, 1939 + "Disabling distributing on port %d (LAG %d)\n", 1940 + port->actor_port_number, 1941 + port->aggregator->aggregator_identifier); 1942 + __disable_distributing_port(port); 1943 + /* Slave array needs an update */ 1944 + *update_slave_arr = true; 2010 1945 } 2011 1946 } 2012 1947
+1
drivers/net/bonding/bond_main.c
··· 6306 6306 params->ad_actor_sys_prio = ad_actor_sys_prio; 6307 6307 eth_zero_addr(params->ad_actor_system); 6308 6308 params->ad_user_port_key = ad_user_port_key; 6309 + params->coupled_control = 1; 6309 6310 if (packets_per_slave > 0) { 6310 6311 params->reciprocal_packets_per_slave = 6311 6312 reciprocal_value(packets_per_slave);
+16
drivers/net/bonding/bond_netlink.c
··· 122 122 [IFLA_BOND_PEER_NOTIF_DELAY] = NLA_POLICY_FULL_RANGE(NLA_U32, &delay_range), 123 123 [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, 124 124 [IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED }, 125 + [IFLA_BOND_COUPLED_CONTROL] = { .type = NLA_U8 }, 125 126 }; 126 127 127 128 static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { ··· 550 549 return err; 551 550 } 552 551 552 + if (data[IFLA_BOND_COUPLED_CONTROL]) { 553 + int coupled_control = nla_get_u8(data[IFLA_BOND_COUPLED_CONTROL]); 554 + 555 + bond_opt_initval(&newval, coupled_control); 556 + err = __bond_opt_set(bond, BOND_OPT_COUPLED_CONTROL, &newval, 557 + data[IFLA_BOND_COUPLED_CONTROL], extack); 558 + if (err) 559 + return err; 560 + } 561 + 553 562 return 0; 554 563 } 555 564 ··· 626 615 /* IFLA_BOND_NS_IP6_TARGET */ 627 616 nla_total_size(sizeof(struct nlattr)) + 628 617 nla_total_size(sizeof(struct in6_addr)) * BOND_MAX_NS_TARGETS + 618 + nla_total_size(sizeof(u8)) + /* IFLA_BOND_COUPLED_CONTROL */ 629 619 0; 630 620 } 631 621 ··· 784 772 785 773 if (nla_put_u8(skb, IFLA_BOND_MISSED_MAX, 786 774 bond->params.missed_max)) 775 + goto nla_put_failure; 776 + 777 + if (nla_put_u8(skb, IFLA_BOND_COUPLED_CONTROL, 778 + bond->params.coupled_control)) 787 779 goto nla_put_failure; 788 780 789 781 if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+27 -1
drivers/net/bonding/bond_options.c
··· 84 84 const struct bond_opt_value *newval); 85 85 static int bond_option_missed_max_set(struct bonding *bond, 86 86 const struct bond_opt_value *newval); 87 - 87 + static int bond_option_coupled_control_set(struct bonding *bond, 88 + const struct bond_opt_value *newval); 88 89 89 90 static const struct bond_opt_value bond_mode_tbl[] = { 90 91 { "balance-rr", BOND_MODE_ROUNDROBIN, BOND_VALFLAG_DEFAULT}, ··· 231 230 { "maxval", 255, BOND_VALFLAG_MAX}, 232 231 { "default", 2, BOND_VALFLAG_DEFAULT}, 233 232 { NULL, -1, 0}, 233 + }; 234 + 235 + static const struct bond_opt_value bond_coupled_control_tbl[] = { 236 + { "on", 1, BOND_VALFLAG_DEFAULT}, 237 + { "off", 0, 0}, 238 + { NULL, -1, 0}, 234 239 }; 235 240 236 241 static const struct bond_option bond_opts[BOND_OPT_LAST] = { ··· 503 496 .desc = "Delay between each peer notification on failover event, in milliseconds", 504 497 .values = bond_peer_notif_delay_tbl, 505 498 .set = bond_option_peer_notif_delay_set 499 + }, 500 + [BOND_OPT_COUPLED_CONTROL] = { 501 + .id = BOND_OPT_COUPLED_CONTROL, 502 + .name = "coupled_control", 503 + .desc = "Opt into using coupled control MUX for LACP states", 504 + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), 505 + .flags = BOND_OPTFLAG_IFDOWN, 506 + .values = bond_coupled_control_tbl, 507 + .set = bond_option_coupled_control_set, 506 508 } 507 509 }; 508 510 ··· 1706 1690 newval->value); 1707 1691 1708 1692 bond->params.ad_user_port_key = newval->value; 1693 + return 0; 1694 + } 1695 + 1696 + static int bond_option_coupled_control_set(struct bonding *bond, 1697 + const struct bond_opt_value *newval) 1698 + { 1699 + netdev_info(bond->dev, "Setting coupled_control to %s (%llu)\n", 1700 + newval->string, newval->value); 1701 + 1702 + bond->params.coupled_control = newval->value; 1709 1703 return 0; 1710 1704 }
+2
include/net/bond_3ad.h
··· 54 54 AD_MUX_DETACHED, /* mux machine */ 55 55 AD_MUX_WAITING, /* mux machine */ 56 56 AD_MUX_ATTACHED, /* mux machine */ 57 + AD_MUX_COLLECTING, /* mux machine */ 58 + AD_MUX_DISTRIBUTING, /* mux machine */ 57 59 AD_MUX_COLLECTING_DISTRIBUTING /* mux machine */ 58 60 } mux_states_t; 59 61
+1
include/net/bond_options.h
··· 76 76 BOND_OPT_MISSED_MAX, 77 77 BOND_OPT_NS_TARGETS, 78 78 BOND_OPT_PRIO, 79 + BOND_OPT_COUPLED_CONTROL, 79 80 BOND_OPT_LAST 80 81 }; 81 82
+23
include/net/bonding.h
··· 148 148 #if IS_ENABLED(CONFIG_IPV6) 149 149 struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; 150 150 #endif 151 + int coupled_control; 151 152 152 153 /* 2 bytes of padding : see ether_addr_equal_64bits() */ 153 154 u8 ad_actor_system[ETH_ALEN + 2]; ··· 168 167 u8 backup:1, /* indicates backup slave. Value corresponds with 169 168 BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ 170 169 inactive:1, /* indicates inactive slave */ 170 + rx_disabled:1, /* indicates whether slave's Rx is disabled */ 171 171 should_notify:1, /* indicates whether the state changed */ 172 172 should_notify_link:1; /* indicates whether the link changed */ 173 173 u8 duplex; ··· 570 568 bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); 571 569 if (!slave->bond->params.all_slaves_active) 572 570 slave->inactive = 1; 571 + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) 572 + slave->rx_disabled = 1; 573 + } 574 + 575 + static inline void bond_set_slave_tx_disabled_flags(struct slave *slave, 576 + bool notify) 577 + { 578 + bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); 573 579 } 574 580 575 581 static inline void bond_set_slave_active_flags(struct slave *slave, ··· 585 575 { 586 576 bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify); 587 577 slave->inactive = 0; 578 + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) 579 + slave->rx_disabled = 0; 580 + } 581 + 582 + static inline void bond_set_slave_rx_enabled_flags(struct slave *slave, 583 + bool notify) 584 + { 585 + slave->rx_disabled = 0; 588 586 } 589 587 590 588 static inline bool bond_is_slave_inactive(struct slave *slave) 591 589 { 592 590 return slave->inactive; 591 + } 592 + 593 + static inline bool bond_is_slave_rx_disabled(struct slave *slave) 594 + { 595 + return slave->rx_disabled; 593 596 } 594 597 595 598 static inline void bond_propose_link_state(struct slave *slave, int state)
+1
include/uapi/linux/if_link.h
··· 1505 1505 IFLA_BOND_AD_LACP_ACTIVE, 1506 1506 IFLA_BOND_MISSED_MAX, 1507 1507 IFLA_BOND_NS_IP6_TARGET, 1508 + IFLA_BOND_COUPLED_CONTROL, 1508 1509 __IFLA_BOND_MAX, 1509 1510 }; 1510 1511
+1
tools/include/uapi/linux/if_link.h
··· 974 974 IFLA_BOND_AD_LACP_ACTIVE, 975 975 IFLA_BOND_MISSED_MAX, 976 976 IFLA_BOND_NS_IP6_TARGET, 977 + IFLA_BOND_COUPLED_CONTROL, 977 978 __IFLA_BOND_MAX, 978 979 }; 979 980