Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

flowcache: Make flow cache name space aware

Inserting a entry into flowcache, or flushing flowcache should be based
on per net scope. The reason to do so is flushing operation from fat
netns crammed with flow entries will also making the slim netns with only
a few flow cache entries go away in original implementation.

Since flowcache is tightly coupled with IPsec, so it would be easier to
put flow cache global parameters into xfrm namespace part. And one last
thing needs to do is bumping flow cache genid, and flush flow cache should
also be made in per net style.

Signed-off-by: Fan Du <fan.du@windriver.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

authored by

Fan Du and committed by
Steffen Klassert
ca925cf1 01714109

+103 -77
+3 -2
include/net/flow.h
··· 218 218 const struct flowi *key, u16 family, 219 219 u8 dir, flow_resolve_t resolver, 220 220 void *ctx); 221 + int flow_cache_init(struct net *net); 221 222 222 - void flow_cache_flush(void); 223 - void flow_cache_flush_deferred(void); 223 + void flow_cache_flush(struct net *net); 224 + void flow_cache_flush_deferred(struct net *net); 224 225 extern atomic_t flow_cache_genid; 225 226 226 227 #endif
+25
include/net/flowcache.h
··· 1 + #ifndef _NET_FLOWCACHE_H 2 + #define _NET_FLOWCACHE_H 3 + 4 + #include <linux/interrupt.h> 5 + #include <linux/types.h> 6 + #include <linux/timer.h> 7 + #include <linux/notifier.h> 8 + 9 + struct flow_cache_percpu { 10 + struct hlist_head *hash_table; 11 + int hash_count; 12 + u32 hash_rnd; 13 + int hash_rnd_recalc; 14 + struct tasklet_struct flush_tasklet; 15 + }; 16 + 17 + struct flow_cache { 18 + u32 hash_shift; 19 + struct flow_cache_percpu __percpu *percpu; 20 + struct notifier_block hotcpu_notifier; 21 + int low_watermark; 22 + int high_watermark; 23 + struct timer_list rnd_timer; 24 + }; 25 + #endif /* _NET_FLOWCACHE_H */
+11
include/net/netns/xfrm.h
··· 6 6 #include <linux/workqueue.h> 7 7 #include <linux/xfrm.h> 8 8 #include <net/dst_ops.h> 9 + #include <net/flowcache.h> 9 10 10 11 struct ctl_table_header; 11 12 ··· 62 61 spinlock_t xfrm_policy_sk_bundle_lock; 63 62 rwlock_t xfrm_policy_lock; 64 63 struct mutex xfrm_cfg_mutex; 64 + 65 + /* flow cache part */ 66 + struct flow_cache flow_cache_global; 67 + struct kmem_cache *flow_cachep; 68 + atomic_t flow_cache_genid; 69 + struct list_head flow_cache_gc_list; 70 + spinlock_t flow_cache_gc_lock; 71 + struct work_struct flow_cache_gc_work; 72 + struct work_struct flow_cache_flush_work; 73 + struct mutex flow_flush_sem; 65 74 }; 66 75 67 76 #endif
+57 -70
net/core/flow.c
··· 24 24 #include <net/flow.h> 25 25 #include <linux/atomic.h> 26 26 #include <linux/security.h> 27 + #include <net/net_namespace.h> 27 28 28 29 struct flow_cache_entry { 29 30 union { ··· 39 38 struct flow_cache_object *object; 40 39 }; 41 40 42 - struct flow_cache_percpu { 43 - struct hlist_head *hash_table; 44 - int hash_count; 45 - u32 hash_rnd; 46 - int hash_rnd_recalc; 47 - struct tasklet_struct flush_tasklet; 48 - }; 49 - 50 41 struct flow_flush_info { 51 42 struct flow_cache *cache; 52 43 atomic_t cpuleft; 53 44 struct completion completion; 54 45 }; 55 - 56 - struct flow_cache { 57 - u32 hash_shift; 58 - struct flow_cache_percpu __percpu *percpu; 59 - struct notifier_block hotcpu_notifier; 60 - int low_watermark; 61 - int high_watermark; 62 - struct timer_list rnd_timer; 63 - }; 64 - 65 - atomic_t flow_cache_genid = ATOMIC_INIT(0); 66 - EXPORT_SYMBOL(flow_cache_genid); 67 - static struct flow_cache flow_cache_global; 68 - static struct kmem_cache *flow_cachep __read_mostly; 69 - 70 - static DEFINE_SPINLOCK(flow_cache_gc_lock); 71 - static LIST_HEAD(flow_cache_gc_list); 72 46 73 47 #define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) 74 48 #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) ··· 60 84 add_timer(&fc->rnd_timer); 61 85 } 62 86 63 - static int flow_entry_valid(struct flow_cache_entry *fle) 87 + static int flow_entry_valid(struct flow_cache_entry *fle, 88 + struct netns_xfrm *xfrm) 64 89 { 65 - if (atomic_read(&flow_cache_genid) != fle->genid) 90 + if (atomic_read(&xfrm->flow_cache_genid) != fle->genid) 66 91 return 0; 67 92 if (fle->object && !fle->object->ops->check(fle->object)) 68 93 return 0; 69 94 return 1; 70 95 } 71 96 72 - static void flow_entry_kill(struct flow_cache_entry *fle) 97 + static void flow_entry_kill(struct flow_cache_entry *fle, 98 + struct netns_xfrm *xfrm) 73 99 { 74 100 if (fle->object) 75 101 fle->object->ops->delete(fle->object); 76 - kmem_cache_free(flow_cachep, fle); 102 + kmem_cache_free(xfrm->flow_cachep, fle); 77 103 } 78 104 79 105 static void flow_cache_gc_task(struct work_struct *work) 80 106 { 81 107 struct list_head gc_list; 82 108 struct flow_cache_entry *fce, *n; 109 + struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, 110 + flow_cache_gc_work); 83 111 84 112 INIT_LIST_HEAD(&gc_list); 85 - spin_lock_bh(&flow_cache_gc_lock); 86 - list_splice_tail_init(&flow_cache_gc_list, &gc_list); 87 - spin_unlock_bh(&flow_cache_gc_lock); 113 + spin_lock_bh(&xfrm->flow_cache_gc_lock); 114 + list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); 115 + spin_unlock_bh(&xfrm->flow_cache_gc_lock); 88 116 89 117 list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) 90 - flow_entry_kill(fce); 118 + flow_entry_kill(fce, xfrm); 91 119 } 92 - static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task); 93 120 94 121 static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, 95 - int deleted, struct list_head *gc_list) 122 + int deleted, struct list_head *gc_list, 123 + struct netns_xfrm *xfrm) 96 124 { 97 125 if (deleted) { 98 126 fcp->hash_count -= deleted; 99 - spin_lock_bh(&flow_cache_gc_lock); 100 - list_splice_tail(gc_list, &flow_cache_gc_list); 101 - spin_unlock_bh(&flow_cache_gc_lock); 102 - schedule_work(&flow_cache_gc_work); 127 + spin_lock_bh(&xfrm->flow_cache_gc_lock); 128 + list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); 129 + spin_unlock_bh(&xfrm->flow_cache_gc_lock); 130 + schedule_work(&xfrm->flow_cache_gc_work); 103 131 } 104 132 } 105 133 ··· 115 135 struct hlist_node *tmp; 116 136 LIST_HEAD(gc_list); 117 137 int i, deleted = 0; 138 + struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, 139 + flow_cache_global); 118 140 119 141 for (i = 0; i < flow_cache_hash_size(fc); i++) { 120 142 int saved = 0; ··· 124 142 hlist_for_each_entry_safe(fle, tmp, 125 143 &fcp->hash_table[i], u.hlist) { 126 144 if (saved < shrink_to && 127 - flow_entry_valid(fle)) { 145 + flow_entry_valid(fle, xfrm)) { 128 146 saved++; 129 147 } else { 130 148 deleted++; ··· 134 152 } 135 153 } 136 154 137 - flow_cache_queue_garbage(fcp, deleted, &gc_list); 155 + flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); 138 156 } 139 157 140 158 static void flow_cache_shrink(struct flow_cache *fc, ··· 190 208 flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, 191 209 flow_resolve_t resolver, void *ctx) 192 210 { 193 - struct flow_cache *fc = &flow_cache_global; 211 + struct flow_cache *fc = &net->xfrm.flow_cache_global; 194 212 struct flow_cache_percpu *fcp; 195 213 struct flow_cache_entry *fle, *tfle; 196 214 struct flow_cache_object *flo; ··· 230 248 if (fcp->hash_count > fc->high_watermark) 231 249 flow_cache_shrink(fc, fcp); 232 250 233 - fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); 251 + fle = kmem_cache_alloc(net->xfrm.flow_cachep, GFP_ATOMIC); 234 252 if (fle) { 235 253 fle->net = net; 236 254 fle->family = family; ··· 240 258 hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); 241 259 fcp->hash_count++; 242 260 } 243 - } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) { 261 + } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) { 244 262 flo = fle->object; 245 263 if (!flo) 246 264 goto ret_object; ··· 261 279 } 262 280 flo = resolver(net, key, family, dir, flo, ctx); 263 281 if (fle) { 264 - fle->genid = atomic_read(&flow_cache_genid); 282 + fle->genid = atomic_read(&net->xfrm.flow_cache_genid); 265 283 if (!IS_ERR(flo)) 266 284 fle->object = flo; 267 285 else ··· 285 303 struct hlist_node *tmp; 286 304 LIST_HEAD(gc_list); 287 305 int i, deleted = 0; 306 + struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, 307 + flow_cache_global); 288 308 289 309 fcp = this_cpu_ptr(fc->percpu); 290 310 for (i = 0; i < flow_cache_hash_size(fc); i++) { 291 311 hlist_for_each_entry_safe(fle, tmp, 292 312 &fcp->hash_table[i], u.hlist) { 293 - if (flow_entry_valid(fle)) 313 + if (flow_entry_valid(fle, xfrm)) 294 314 continue; 295 315 296 316 deleted++; ··· 301 317 } 302 318 } 303 319 304 - flow_cache_queue_garbage(fcp, deleted, &gc_list); 320 + flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); 305 321 306 322 if (atomic_dec_and_test(&info->cpuleft)) 307 323 complete(&info->completion); ··· 335 351 tasklet_schedule(tasklet); 336 352 } 337 353 338 - void flow_cache_flush(void) 354 + void flow_cache_flush(struct net *net) 339 355 { 340 356 struct flow_flush_info info; 341 - static DEFINE_MUTEX(flow_flush_sem); 342 357 cpumask_var_t mask; 343 358 int i, self; 344 359 ··· 348 365 349 366 /* Don't want cpus going down or up during this. */ 350 367 get_online_cpus(); 351 - mutex_lock(&flow_flush_sem); 352 - info.cache = &flow_cache_global; 368 + mutex_lock(&net->xfrm.flow_flush_sem); 369 + info.cache = &net->xfrm.flow_cache_global; 353 370 for_each_online_cpu(i) 354 371 if (!flow_cache_percpu_empty(info.cache, i)) 355 372 cpumask_set_cpu(i, mask); ··· 369 386 wait_for_completion(&info.completion); 370 387 371 388 done: 372 - mutex_unlock(&flow_flush_sem); 389 + mutex_unlock(&net->xfrm.flow_flush_sem); 373 390 put_online_cpus(); 374 391 free_cpumask_var(mask); 375 392 } 376 393 377 394 static void flow_cache_flush_task(struct work_struct *work) 378 395 { 379 - flow_cache_flush(); 396 + struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, 397 + flow_cache_gc_work); 398 + struct net *net = container_of(xfrm, struct net, xfrm); 399 + 400 + flow_cache_flush(net); 380 401 } 381 402 382 - static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task); 383 - 384 - void flow_cache_flush_deferred(void) 403 + void flow_cache_flush_deferred(struct net *net) 385 404 { 386 - schedule_work(&flow_cache_flush_work); 405 + schedule_work(&net->xfrm.flow_cache_flush_work); 387 406 } 388 407 389 408 static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) ··· 410 425 unsigned long action, 411 426 void *hcpu) 412 427 { 413 - struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); 428 + struct flow_cache *fc = container_of(nfb, struct flow_cache, 429 + hotcpu_notifier); 414 430 int res, cpu = (unsigned long) hcpu; 415 431 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); 416 432 ··· 430 444 return NOTIFY_OK; 431 445 } 432 446 433 - static int __init flow_cache_init(struct flow_cache *fc) 447 + int flow_cache_init(struct net *net) 434 448 { 435 449 int i; 450 + struct flow_cache *fc = &net->xfrm.flow_cache_global; 451 + 452 + /* Initialize per-net flow cache global variables here */ 453 + net->xfrm.flow_cachep = kmem_cache_create("flow_cache", 454 + sizeof(struct flow_cache_entry), 455 + 0, SLAB_PANIC, NULL); 456 + spin_lock_init(&net->xfrm.flow_cache_gc_lock); 457 + INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list); 458 + INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); 459 + INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); 460 + mutex_init(&net->xfrm.flow_flush_sem); 436 461 437 462 fc->hash_shift = 10; 438 463 fc->low_watermark = 2 * flow_cache_hash_size(fc); ··· 481 484 482 485 return -ENOMEM; 483 486 } 484 - 485 - static int __init flow_cache_init_global(void) 486 - { 487 - flow_cachep = kmem_cache_create("flow_cache", 488 - sizeof(struct flow_cache_entry), 489 - 0, SLAB_PANIC, NULL); 490 - 491 - return flow_cache_init(&flow_cache_global); 492 - } 493 - 494 - module_init(flow_cache_init_global); 487 + EXPORT_SYMBOL(flow_cache_init);
+4 -3
net/xfrm/xfrm_policy.c
··· 661 661 hlist_add_head(&policy->bydst, chain); 662 662 xfrm_pol_hold(policy); 663 663 net->xfrm.policy_count[dir]++; 664 - atomic_inc(&flow_cache_genid); 664 + atomic_inc(&net->xfrm.flow_cache_genid); 665 665 666 666 /* After previous checking, family can either be AF_INET or AF_INET6 */ 667 667 if (policy->family == AF_INET) ··· 2567 2567 2568 2568 void xfrm_garbage_collect(struct net *net) 2569 2569 { 2570 - flow_cache_flush(); 2570 + flow_cache_flush(net); 2571 2571 __xfrm_garbage_collect(net); 2572 2572 } 2573 2573 EXPORT_SYMBOL(xfrm_garbage_collect); 2574 2574 2575 2575 static void xfrm_garbage_collect_deferred(struct net *net) 2576 2576 { 2577 - flow_cache_flush_deferred(); 2577 + flow_cache_flush_deferred(net); 2578 2578 __xfrm_garbage_collect(net); 2579 2579 } 2580 2580 ··· 2947 2947 spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock); 2948 2948 mutex_init(&net->xfrm.xfrm_cfg_mutex); 2949 2949 2950 + flow_cache_init(net); 2950 2951 return 0; 2951 2952 2952 2953 out_sysctl:
+3 -2
security/selinux/include/xfrm.h
··· 45 45 { 46 46 struct net *net; 47 47 48 - atomic_inc(&flow_cache_genid); 49 48 rtnl_lock(); 50 - for_each_net(net) 49 + for_each_net(net) { 50 + atomic_inc(&net->xfrm.flow_cache_genid); 51 51 rt_genid_bump_all(net); 52 + } 52 53 rtnl_unlock(); 53 54 } 54 55 #else