Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: make lru_add_drain_all() selective

make lru_add_drain_all() only selectively interrupt the cpus that have
per-cpu free pages that can be drained.

This is important in nohz mode where calling mlockall(), for example,
otherwise will interrupt every core unnecessarily.

This is important on workloads where nohz cores are handling 10 Gb traffic
in userspace. Those CPUs do not enter the kernel and place pages into LRU
pagevecs and they really, really don't want to be interrupted, or they
drop packets on the floor.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Chris Metcalf and committed by
Linus Torvalds
5fbc4616 9cb2dc1c

+40 -6
+1 -1
include/linux/swap.h
··· 280 280 extern void mark_page_accessed(struct page *); 281 281 extern void lru_add_drain(void); 282 282 extern void lru_add_drain_cpu(int cpu); 283 - extern int lru_add_drain_all(void); 283 + extern void lru_add_drain_all(void); 284 284 extern void rotate_reclaimable_page(struct page *page); 285 285 extern void deactivate_page(struct page *page); 286 286 extern void swap_setup(void);
+39 -5
mm/swap.c
··· 432 432 pagevec_lru_move_fn(pvec, __activate_page, NULL); 433 433 } 434 434 435 + static bool need_activate_page_drain(int cpu) 436 + { 437 + return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0; 438 + } 439 + 435 440 void activate_page(struct page *page) 436 441 { 437 442 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { ··· 452 447 #else 453 448 static inline void activate_page_drain(int cpu) 454 449 { 450 + } 451 + 452 + static bool need_activate_page_drain(int cpu) 453 + { 454 + return false; 455 455 } 456 456 457 457 void activate_page(struct page *page) ··· 711 701 lru_add_drain(); 712 702 } 713 703 714 - /* 715 - * Returns 0 for success 716 - */ 717 - int lru_add_drain_all(void) 704 + static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); 705 + 706 + void lru_add_drain_all(void) 718 707 { 719 - return schedule_on_each_cpu(lru_add_drain_per_cpu); 708 + static DEFINE_MUTEX(lock); 709 + static struct cpumask has_work; 710 + int cpu; 711 + 712 + mutex_lock(&lock); 713 + get_online_cpus(); 714 + cpumask_clear(&has_work); 715 + 716 + for_each_online_cpu(cpu) { 717 + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); 718 + 719 + if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || 720 + pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || 721 + pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || 722 + need_activate_page_drain(cpu)) { 723 + INIT_WORK(work, lru_add_drain_per_cpu); 724 + schedule_work_on(cpu, work); 725 + cpumask_set_cpu(cpu, &has_work); 726 + } 727 + } 728 + 729 + for_each_cpu(cpu, &has_work) 730 + flush_work(&per_cpu(lru_add_drain_work, cpu)); 731 + 732 + put_online_cpus(); 733 + mutex_unlock(&lock); 720 734 } 721 735 722 736 /*