Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv6 netns: Address labels per namespace

This pacth makes IPv6 address labels per network namespace.
It keeps the global label tables, ip6addrlbl_table, but
adds a 'net' member to each ip6addrlbl_entry.
This new member is taken into account when matching labels.

Changelog
=========
* v1: Initial version
* v2:
* Minize the penalty when network namespaces are not configured:
* the 'net' member is added only if CONFIG_NET_NS is
defined. This saves space when network namespaces are not
configured.
* 'net' value is retrieved with the inlined function
ip6addrlbl_net() that always return &init_net when
CONFIG_NET_NS is not defined.
* 'net' member in ip6addrlbl_entry renamed to the less generic
'lbl_net' name (helps code search).

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>

authored by

Benjamin Thery and committed by
YOSHIFUJI Hideaki
3de23255 2b5ead46

+84 -37
+2 -1
include/net/addrconf.h
··· 121 121 */ 122 122 extern int ipv6_addr_label_init(void); 123 123 extern void ipv6_addr_label_rtnl_register(void); 124 - extern u32 ipv6_addr_label(const struct in6_addr *addr, 124 + extern u32 ipv6_addr_label(struct net *net, 125 + const struct in6_addr *addr, 125 126 int type, int ifindex); 126 127 127 128 /*
+7 -5
net/ipv6/addrconf.c
··· 964 964 return 0; 965 965 } 966 966 967 - static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score, 967 + static int ipv6_get_saddr_eval(struct net *net, 968 + struct ipv6_saddr_score *score, 968 969 struct ipv6_saddr_dst *dst, 969 970 int i) 970 971 { ··· 1044 1043 break; 1045 1044 case IPV6_SADDR_RULE_LABEL: 1046 1045 /* Rule 6: Prefer matching label */ 1047 - ret = ipv6_addr_label(&score->ifa->addr, score->addr_type, 1046 + ret = ipv6_addr_label(net, 1047 + &score->ifa->addr, score->addr_type, 1048 1048 score->ifa->idev->dev->ifindex) == dst->label; 1049 1049 break; 1050 1050 #ifdef CONFIG_IPV6_PRIVACY ··· 1099 1097 dst.addr = daddr; 1100 1098 dst.ifindex = dst_dev ? dst_dev->ifindex : 0; 1101 1099 dst.scope = __ipv6_addr_src_scope(dst_type); 1102 - dst.label = ipv6_addr_label(daddr, dst_type, dst.ifindex); 1100 + dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex); 1103 1101 dst.prefs = prefs; 1104 1102 1105 1103 hiscore->rule = -1; ··· 1167 1165 for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { 1168 1166 int minihiscore, miniscore; 1169 1167 1170 - minihiscore = ipv6_get_saddr_eval(hiscore, &dst, i); 1171 - miniscore = ipv6_get_saddr_eval(score, &dst, i); 1168 + minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i); 1169 + miniscore = ipv6_get_saddr_eval(net, score, &dst, i); 1172 1170 1173 1171 if (minihiscore > miniscore) { 1174 1172 if (i == IPV6_SADDR_RULE_SCOPE &&
+75 -31
net/ipv6/addrlabel.c
··· 29 29 */ 30 30 struct ip6addrlbl_entry 31 31 { 32 + #ifdef CONFIG_NET_NS 33 + struct net *lbl_net; 34 + #endif 32 35 struct in6_addr prefix; 33 36 int prefixlen; 34 37 int ifindex; ··· 48 45 spinlock_t lock; 49 46 u32 seq; 50 47 } ip6addrlbl_table; 48 + 49 + static inline 50 + struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 51 + { 52 + #ifdef CONFIG_NET_NS 53 + return lbl->lbl_net; 54 + #else 55 + return &init_net; 56 + #endif 57 + } 51 58 52 59 /* 53 60 * Default policy table (RFC3484 + extensions) ··· 78 65 79 66 #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 80 67 81 - static const __initdata struct ip6addrlbl_init_table 68 + static const __net_initdata struct ip6addrlbl_init_table 82 69 { 83 70 const struct in6_addr *prefix; 84 71 int prefixlen; ··· 121 108 /* Object management */ 122 109 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 123 110 { 111 + #ifdef CONFIG_NET_NS 112 + release_net(p->lbl_net); 113 + #endif 124 114 kfree(p); 125 115 } 126 116 ··· 144 128 } 145 129 146 130 /* Find label */ 147 - static int __ip6addrlbl_match(struct ip6addrlbl_entry *p, 131 + static int __ip6addrlbl_match(struct net *net, 132 + struct ip6addrlbl_entry *p, 148 133 const struct in6_addr *addr, 149 134 int addrtype, int ifindex) 150 135 { 136 + if (!net_eq(ip6addrlbl_net(p), net)) 137 + return 0; 151 138 if (p->ifindex && p->ifindex != ifindex) 152 139 return 0; 153 140 if (p->addrtype && p->addrtype != addrtype) ··· 160 141 return 1; 161 142 } 162 143 163 - static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr, 144 + static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 145 + const struct in6_addr *addr, 164 146 int type, int ifindex) 165 147 { 166 148 struct hlist_node *pos; 167 149 struct ip6addrlbl_entry *p; 168 150 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { 169 - if (__ip6addrlbl_match(p, addr, type, ifindex)) 151 + if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 170 152 return p; 171 153 } 172 154 return NULL; 173 155 } 174 156 175 - u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) 157 + u32 ipv6_addr_label(struct net *net, 158 + const struct in6_addr *addr, int type, int ifindex) 176 159 { 177 160 u32 label; 178 161 struct ip6addrlbl_entry *p; ··· 182 161 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 183 162 184 163 rcu_read_lock(); 185 - p = __ipv6_addr_label(addr, type, ifindex); 164 + p = __ipv6_addr_label(net, addr, type, ifindex); 186 165 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 187 166 rcu_read_unlock(); 188 167 ··· 195 174 } 196 175 197 176 /* allocate one entry */ 198 - static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, 177 + static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 178 + const struct in6_addr *prefix, 199 179 int prefixlen, int ifindex, 200 180 u32 label) 201 181 { ··· 238 216 newp->addrtype = addrtype; 239 217 newp->label = label; 240 218 INIT_HLIST_NODE(&newp->list); 219 + #ifdef CONFIG_NET_NS 220 + newp->lbl_net = hold_net(net); 221 + #endif 241 222 atomic_set(&newp->refcnt, 1); 242 223 return newp; 243 224 } ··· 262 237 hlist_for_each_entry_safe(p, pos, n, 263 238 &ip6addrlbl_table.head, list) { 264 239 if (p->prefixlen == newp->prefixlen && 240 + net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && 265 241 p->ifindex == newp->ifindex && 266 242 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 267 243 if (!replace) { ··· 287 261 } 288 262 289 263 /* add a label */ 290 - static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, 264 + static int ip6addrlbl_add(struct net *net, 265 + const struct in6_addr *prefix, int prefixlen, 291 266 int ifindex, u32 label, int replace) 292 267 { 293 268 struct ip6addrlbl_entry *newp; ··· 301 274 (unsigned int)label, 302 275 replace); 303 276 304 - newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); 277 + newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 305 278 if (IS_ERR(newp)) 306 279 return PTR_ERR(newp); 307 280 spin_lock(&ip6addrlbl_table.lock); ··· 313 286 } 314 287 315 288 /* remove a label */ 316 - static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, 289 + static int __ip6addrlbl_del(struct net *net, 290 + const struct in6_addr *prefix, int prefixlen, 317 291 int ifindex) 318 292 { 319 293 struct ip6addrlbl_entry *p = NULL; ··· 328 300 329 301 hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { 330 302 if (p->prefixlen == prefixlen && 303 + net_eq(ip6addrlbl_net(p), net) && 331 304 p->ifindex == ifindex && 332 305 ipv6_addr_equal(&p->prefix, prefix)) { 333 306 hlist_del_rcu(&p->list); ··· 340 311 return ret; 341 312 } 342 313 343 - static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, 314 + static int ip6addrlbl_del(struct net *net, 315 + const struct in6_addr *prefix, int prefixlen, 344 316 int ifindex) 345 317 { 346 318 struct in6_addr prefix_buf; ··· 354 324 355 325 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 356 326 spin_lock(&ip6addrlbl_table.lock); 357 - ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex); 327 + ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 358 328 spin_unlock(&ip6addrlbl_table.lock); 359 329 return ret; 360 330 } 361 331 362 332 /* add default label */ 363 - static __init int ip6addrlbl_init(void) 333 + static int __net_init ip6addrlbl_net_init(struct net *net) 364 334 { 365 335 int err = 0; 366 336 int i; ··· 368 338 ADDRLABEL(KERN_DEBUG "%s()\n", __func__); 369 339 370 340 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 371 - int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix, 341 + int ret = ip6addrlbl_add(net, 342 + ip6addrlbl_init_table[i].prefix, 372 343 ip6addrlbl_init_table[i].prefixlen, 373 344 0, 374 345 ip6addrlbl_init_table[i].label, 0); ··· 380 349 return err; 381 350 } 382 351 352 + static void __net_exit ip6addrlbl_net_exit(struct net *net) 353 + { 354 + struct ip6addrlbl_entry *p = NULL; 355 + struct hlist_node *pos, *n; 356 + 357 + /* Remove all labels belonging to the exiting net */ 358 + spin_lock(&ip6addrlbl_table.lock); 359 + hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { 360 + if (net_eq(ip6addrlbl_net(p), net)) { 361 + hlist_del_rcu(&p->list); 362 + ip6addrlbl_put(p); 363 + } 364 + } 365 + spin_unlock(&ip6addrlbl_table.lock); 366 + } 367 + 368 + static struct pernet_operations ipv6_addr_label_ops = { 369 + .init = ip6addrlbl_net_init, 370 + .exit = ip6addrlbl_net_exit, 371 + }; 372 + 383 373 int __init ipv6_addr_label_init(void) 384 374 { 385 375 spin_lock_init(&ip6addrlbl_table.lock); 386 376 387 - return ip6addrlbl_init(); 377 + return register_pernet_subsys(&ipv6_addr_label_ops); 388 378 } 389 379 390 380 static const struct nla_policy ifal_policy[IFAL_MAX+1] = { ··· 423 371 u32 label; 424 372 int err = 0; 425 373 426 - if (net != &init_net) 427 - return 0; 428 - 429 374 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 430 375 if (err < 0) 431 376 return err; ··· 434 385 return -EINVAL; 435 386 436 387 if (ifal->ifal_index && 437 - !__dev_get_by_index(&init_net, ifal->ifal_index)) 388 + !__dev_get_by_index(net, ifal->ifal_index)) 438 389 return -EINVAL; 439 390 440 391 if (!tb[IFAL_ADDRESS]) ··· 452 403 453 404 switch(nlh->nlmsg_type) { 454 405 case RTM_NEWADDRLABEL: 455 - err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen, 406 + err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 456 407 ifal->ifal_index, label, 457 408 nlh->nlmsg_flags & NLM_F_REPLACE); 458 409 break; 459 410 case RTM_DELADDRLABEL: 460 - err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen, 411 + err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, 461 412 ifal->ifal_index); 462 413 break; 463 414 default: ··· 507 458 int idx = 0, s_idx = cb->args[0]; 508 459 int err; 509 460 510 - if (net != &init_net) 511 - return 0; 512 - 513 461 rcu_read_lock(); 514 462 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { 515 - if (idx >= s_idx) { 463 + if (idx >= s_idx && 464 + net_eq(ip6addrlbl_net(p), net)) { 516 465 if ((err = ip6addrlbl_fill(skb, p, 517 466 ip6addrlbl_table.seq, 518 467 NETLINK_CB(cb->skb).pid, ··· 546 499 struct ip6addrlbl_entry *p; 547 500 struct sk_buff *skb; 548 501 549 - if (net != &init_net) 550 - return 0; 551 - 552 502 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 553 503 if (err < 0) 554 504 return err; ··· 557 513 return -EINVAL; 558 514 559 515 if (ifal->ifal_index && 560 - !__dev_get_by_index(&init_net, ifal->ifal_index)) 516 + !__dev_get_by_index(net, ifal->ifal_index)) 561 517 return -EINVAL; 562 518 563 519 if (!tb[IFAL_ADDRESS]) ··· 568 524 return -EINVAL; 569 525 570 526 rcu_read_lock(); 571 - p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index); 527 + p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 572 528 if (p && ip6addrlbl_hold(p)) 573 529 p = NULL; 574 530 lseq = ip6addrlbl_table.seq; ··· 596 552 goto out; 597 553 } 598 554 599 - err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); 555 + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 600 556 out: 601 557 return err; 602 558 }