Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: ipv6: Make address flushing on ifdown optional

Currently, all ipv6 addresses are flushed when the interface is configured
down, including global, static addresses:

$ ip -6 addr show dev eth1
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
inet6 2100:1::2/120 scope global
valid_lft forever preferred_lft forever
inet6 fe80::e0:f9ff:fe79:34bd/64 scope link
valid_lft forever preferred_lft forever
$ ip link set dev eth1 down
$ ip -6 addr show dev eth1
<< nothing; all addresses have been flushed>>

Add a new sysctl to make this behavior optional. The new setting defaults to
flush all addresses to maintain backwards compatibility. When the set global
addresses with no expire times are not flushed on an admin down. The sysctl
is per-interface or system-wide for all interfaces

$ sysctl -w net.ipv6.conf.eth1.keep_addr_on_down=1
or
$ sysctl -w net.ipv6.conf.all.keep_addr_on_down=1

Will keep addresses on eth1 on an admin down.

$ ip -6 addr show dev eth1
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 state UP qlen 1000
inet6 2100:1::2/120 scope global
valid_lft forever preferred_lft forever
inet6 fe80::e0:f9ff:fe79:34bd/64 scope link
valid_lft forever preferred_lft forever
$ ip link set dev eth1 down
$ ip -6 addr show dev eth1
3: eth1: <BROADCAST,MULTICAST> mtu 1500 state DOWN qlen 1000
inet6 2100:1::2/120 scope global tentative
valid_lft forever preferred_lft forever
inet6 fe80::e0:f9ff:fe79:34bd/64 scope link tentative
valid_lft forever preferred_lft forever

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

David Ahern and committed by
David S. Miller
f1705ec1 619b1745

+132 -15
+9
Documentation/networking/ip-sysctl.txt
··· 1563 1563 Preferred lifetime (in seconds) for temporary addresses. 1564 1564 Default: 86400 (1 day) 1565 1565 1566 + keep_addr_on_down - INTEGER 1567 + Keep all IPv6 addresses on an interface down event. If set static 1568 + global addresses with no expiration time are not flushed. 1569 + >0 : enabled 1570 + 0 : system default 1571 + <0 : disabled 1572 + 1573 + Default: 0 (addresses are removed) 1574 + 1566 1575 max_desync_factor - INTEGER 1567 1576 Maximum value for DESYNC_FACTOR, which is a random value 1568 1577 that ensures that clients don't synchronize with each
+1
include/linux/ipv6.h
··· 62 62 struct in6_addr secret; 63 63 } stable_secret; 64 64 __s32 use_oif_addrs_only; 65 + __s32 keep_addr_on_down; 65 66 void *sysctl; 66 67 }; 67 68
+1
include/uapi/linux/ipv6.h
··· 176 176 DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, 177 177 DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, 178 178 DEVCONF_DROP_UNSOLICITED_NA, 179 + DEVCONF_KEEP_ADDR_ON_DOWN, 179 180 DEVCONF_MAX 180 181 }; 181 182
+121 -15
net/ipv6/addrconf.c
··· 216 216 }, 217 217 .use_oif_addrs_only = 0, 218 218 .ignore_routes_with_linkdown = 0, 219 + .keep_addr_on_down = 0, 219 220 }; 220 221 221 222 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { ··· 261 260 }, 262 261 .use_oif_addrs_only = 0, 263 262 .ignore_routes_with_linkdown = 0, 263 + .keep_addr_on_down = 0, 264 264 }; 265 265 266 266 /* Check if a valid qdisc is available */ ··· 3170 3168 } 3171 3169 #endif 3172 3170 3171 + static int fixup_permanent_addr(struct inet6_dev *idev, 3172 + struct inet6_ifaddr *ifp) 3173 + { 3174 + if (!ifp->rt) { 3175 + struct rt6_info *rt; 3176 + 3177 + rt = addrconf_dst_alloc(idev, &ifp->addr, false); 3178 + if (unlikely(IS_ERR(rt))) 3179 + return PTR_ERR(rt); 3180 + 3181 + ifp->rt = rt; 3182 + } 3183 + 3184 + if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { 3185 + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, 3186 + idev->dev, 0, 0); 3187 + } 3188 + 3189 + addrconf_dad_start(ifp); 3190 + 3191 + return 0; 3192 + } 3193 + 3194 + static void addrconf_permanent_addr(struct net_device *dev) 3195 + { 3196 + struct inet6_ifaddr *ifp, *tmp; 3197 + struct inet6_dev *idev; 3198 + 3199 + idev = __in6_dev_get(dev); 3200 + if (!idev) 3201 + return; 3202 + 3203 + write_lock_bh(&idev->lock); 3204 + 3205 + list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { 3206 + if ((ifp->flags & IFA_F_PERMANENT) && 3207 + fixup_permanent_addr(idev, ifp) < 0) { 3208 + write_unlock_bh(&idev->lock); 3209 + ipv6_del_addr(ifp); 3210 + write_lock_bh(&idev->lock); 3211 + 3212 + net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n", 3213 + idev->dev->name, &ifp->addr); 3214 + } 3215 + } 3216 + 3217 + write_unlock_bh(&idev->lock); 3218 + } 3219 + 3173 3220 static int addrconf_notify(struct notifier_block *this, unsigned long event, 3174 3221 void *ptr) 3175 3222 { ··· 3303 3252 3304 3253 run_pending = 1; 3305 3254 } 3255 + 3256 + /* restore routes for permanent addresses */ 3257 + addrconf_permanent_addr(dev); 3306 3258 3307 3259 switch (dev->type) { 3308 3260 #if IS_ENABLED(CONFIG_IPV6_SIT) ··· 3410 3356 { 3411 3357 struct net *net = dev_net(dev); 3412 3358 struct inet6_dev *idev; 3413 - struct inet6_ifaddr *ifa; 3359 + struct inet6_ifaddr *ifa, *tmp; 3360 + struct list_head del_list; 3361 + int _keep_addr; 3362 + bool keep_addr; 3414 3363 int state, i; 3415 3364 3416 3365 ASSERT_RTNL(); ··· 3440 3383 3441 3384 } 3442 3385 3386 + /* aggregate the system setting and interface setting */ 3387 + _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; 3388 + if (!_keep_addr) 3389 + _keep_addr = idev->cnf.keep_addr_on_down; 3390 + 3391 + /* combine the user config with event to determine if permanent 3392 + * addresses are to be removed from address hash table 3393 + */ 3394 + keep_addr = !(how || _keep_addr <= 0); 3395 + 3443 3396 /* Step 2: clear hash table */ 3444 3397 for (i = 0; i < IN6_ADDR_HSIZE; i++) { 3445 3398 struct hlist_head *h = &inet6_addr_lst[i]; ··· 3458 3391 restart: 3459 3392 hlist_for_each_entry_rcu(ifa, h, addr_lst) { 3460 3393 if (ifa->idev == idev) { 3461 - hlist_del_init_rcu(&ifa->addr_lst); 3462 3394 addrconf_del_dad_work(ifa); 3463 - goto restart; 3395 + /* combined flag + permanent flag decide if 3396 + * address is retained on a down event 3397 + */ 3398 + if (!keep_addr || 3399 + !(ifa->flags & IFA_F_PERMANENT)) { 3400 + hlist_del_init_rcu(&ifa->addr_lst); 3401 + goto restart; 3402 + } 3464 3403 } 3465 3404 } 3466 3405 spin_unlock_bh(&addrconf_hash_lock); ··· 3500 3427 write_lock_bh(&idev->lock); 3501 3428 } 3502 3429 3503 - while (!list_empty(&idev->addr_list)) { 3504 - ifa = list_first_entry(&idev->addr_list, 3505 - struct inet6_ifaddr, if_list); 3430 + /* re-combine the user config with event to determine if permanent 3431 + * addresses are to be removed from the interface list 3432 + */ 3433 + keep_addr = (!how && _keep_addr > 0); 3434 + 3435 + INIT_LIST_HEAD(&del_list); 3436 + list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { 3506 3437 addrconf_del_dad_work(ifa); 3507 3438 3508 - list_del(&ifa->if_list); 3509 - 3510 3439 write_unlock_bh(&idev->lock); 3511 - 3512 3440 spin_lock_bh(&ifa->lock); 3513 - state = ifa->state; 3514 - ifa->state = INET6_IFADDR_STATE_DEAD; 3441 + 3442 + if (keep_addr && (ifa->flags & IFA_F_PERMANENT)) { 3443 + /* set state to skip the notifier below */ 3444 + state = INET6_IFADDR_STATE_DEAD; 3445 + ifa->state = 0; 3446 + if (!(ifa->flags & IFA_F_NODAD)) 3447 + ifa->flags |= IFA_F_TENTATIVE; 3448 + } else { 3449 + state = ifa->state; 3450 + ifa->state = INET6_IFADDR_STATE_DEAD; 3451 + 3452 + list_del(&ifa->if_list); 3453 + list_add(&ifa->if_list, &del_list); 3454 + } 3455 + 3515 3456 spin_unlock_bh(&ifa->lock); 3516 3457 3517 3458 if (state != INET6_IFADDR_STATE_DEAD) { 3518 3459 __ipv6_ifa_notify(RTM_DELADDR, ifa); 3519 3460 inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); 3520 3461 } 3521 - in6_ifa_put(ifa); 3522 3462 3523 3463 write_lock_bh(&idev->lock); 3524 3464 } 3525 3465 3526 3466 write_unlock_bh(&idev->lock); 3467 + 3468 + /* now clean up addresses to be removed */ 3469 + while (!list_empty(&del_list)) { 3470 + ifa = list_first_entry(&del_list, 3471 + struct inet6_ifaddr, if_list); 3472 + list_del(&ifa->if_list); 3473 + 3474 + in6_ifa_put(ifa); 3475 + } 3527 3476 3528 3477 /* Step 5: Discard anycast and multicast list */ 3529 3478 if (how) { ··· 4811 4716 array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; 4812 4717 array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; 4813 4718 array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; 4719 + array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; 4814 4720 } 4815 4721 4816 4722 static inline size_t inet6_ifla6_size(void) ··· 5293 5197 if (rt) 5294 5198 ip6_del_rt(rt); 5295 5199 } 5296 - dst_hold(&ifp->rt->dst); 5200 + if (ifp->rt) { 5201 + dst_hold(&ifp->rt->dst); 5297 5202 5298 - ip6_del_rt(ifp->rt); 5299 - 5203 + ip6_del_rt(ifp->rt); 5204 + ifp->rt = NULL; 5205 + } 5300 5206 rt_genid_bump_ipv6(net); 5301 5207 break; 5302 5208 } ··· 5900 5802 .maxlen = sizeof(int), 5901 5803 .mode = 0644, 5902 5804 .proc_handler = proc_dointvec, 5805 + }, 5806 + { 5807 + .procname = "keep_addr_on_down", 5808 + .data = &ipv6_devconf.keep_addr_on_down, 5809 + .maxlen = sizeof(int), 5810 + .mode = 0644, 5811 + .proc_handler = proc_dointvec, 5812 + 5903 5813 }, 5904 5814 { 5905 5815 /* sentinel */