mm: batch activate_page() to reduce lock contention

The zone->lru_lock is heavily contented in workload where activate_page()
is frequently used. We could do batch activate_page() to reduce the lock
contention. The batched pages will be added into zone list when the pool
is full or page reclaim is trying to drain them.

For example, in a 4 socket 64 CPU system, create a sparse file and 64
processes, processes shared map to the file. Each process read access the
whole file and then exit. The process exit will do unmap_vmas() and cause
a lot of activate_page() call. In such workload, we saw about 58% total
time reduction with below patch. Other workloads with a lot of
activate_page also benefits a lot too.

I tested some microbenchmarks:
case-anon-cow-rand-mt 0.58%
case-anon-cow-rand -3.30%
case-anon-cow-seq-mt -0.51%
case-anon-cow-seq -5.68%
case-anon-r-rand-mt 0.23%
case-anon-r-rand 0.81%
case-anon-r-seq-mt -0.71%
case-anon-r-seq -1.99%
case-anon-rx-rand-mt 2.11%
case-anon-rx-seq-mt 3.46%
case-anon-w-rand-mt -0.03%
case-anon-w-rand -0.50%
case-anon-w-seq-mt -1.08%
case-anon-w-seq -0.12%
case-anon-wx-rand-mt -5.02%
case-anon-wx-seq-mt -1.43%
case-fork 1.65%
case-fork-sleep -0.07%
case-fork-withmem 1.39%
case-hugetlb -0.59%
case-lru-file-mmap-read-mt -0.54%
case-lru-file-mmap-read 0.61%
case-lru-file-mmap-read-rand -2.24%
case-lru-file-readonce -0.64%
case-lru-file-readtwice -11.69%
case-lru-memcg -1.35%
case-mmap-pread-rand-mt 1.88%
case-mmap-pread-rand -15.26%
case-mmap-pread-seq-mt 0.89%
case-mmap-pread-seq -69.72%
case-mmap-xread-rand-mt 0.71%
case-mmap-xread-seq-mt 0.38%

The most significent are:
case-lru-file-readtwice -11.69%
case-mmap-pread-rand -15.26%
case-mmap-pread-seq -69.72%

which use activate_page a lot. others are basically variations because
each run has slightly difference.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Shaohua Li and committed by
Linus Torvalds
744ed144 d8505dee

+94 -15
+9
mm/internal.h
··· 39 39 40 40 extern unsigned long highest_memmap_pfn; 41 41 42 + #ifdef CONFIG_SMP 43 + extern int putback_active_lru_page(struct zone *zone, struct page *page); 44 + #else 45 + static inline int putback_active_lru_page(struct zone *zone, struct page *page) 46 + { 47 + return 0; 48 + } 49 + #endif 50 + 42 51 /* 43 52 * in mm/vmscan.c: 44 53 */
+81 -13
mm/swap.c
··· 271 271 } 272 272 273 273 /* 274 - * FIXME: speed this up? 274 + * A page will go to active list either by activate_page or putback_lru_page. 275 + * In the activate_page case, the page hasn't active bit set. The page might 276 + * not in LRU list because it's isolated before it gets a chance to be moved to 277 + * active list. The window is small because pagevec just stores several pages. 278 + * For such case, we do nothing for such page. 279 + * In the putback_lru_page case, the page isn't in lru list but has active 280 + * bit set 275 281 */ 282 + static void __activate_page(struct page *page, void *arg) 283 + { 284 + struct zone *zone = page_zone(page); 285 + int file = page_is_file_cache(page); 286 + int lru = page_lru_base_type(page); 287 + bool putback = !PageLRU(page); 288 + 289 + /* The page is isolated before it's moved to active list */ 290 + if (!PageLRU(page) && !PageActive(page)) 291 + return; 292 + if ((PageLRU(page) && PageActive(page)) || PageUnevictable(page)) 293 + return; 294 + 295 + if (!putback) 296 + del_page_from_lru_list(zone, page, lru); 297 + else 298 + SetPageLRU(page); 299 + 300 + SetPageActive(page); 301 + lru += LRU_ACTIVE; 302 + add_page_to_lru_list(zone, page, lru); 303 + 304 + if (putback) 305 + return; 306 + __count_vm_event(PGACTIVATE); 307 + update_page_reclaim_stat(zone, page, file, 1); 308 + } 309 + 310 + #ifdef CONFIG_SMP 311 + static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); 312 + 313 + static void activate_page_drain(int cpu) 314 + { 315 + struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu); 316 + 317 + if (pagevec_count(pvec)) 318 + pagevec_lru_move_fn(pvec, __activate_page, NULL); 319 + } 320 + 321 + void activate_page(struct page *page) 322 + { 323 + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 324 + struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); 325 + 326 + page_cache_get(page); 327 + if (!pagevec_add(pvec, page)) 328 + pagevec_lru_move_fn(pvec, __activate_page, NULL); 329 + put_cpu_var(activate_page_pvecs); 330 + } 331 + } 332 + 333 + /* Caller should hold zone->lru_lock */ 334 + int putback_active_lru_page(struct zone *zone, struct page *page) 335 + { 336 + struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); 337 + 338 + if (!pagevec_add(pvec, page)) { 339 + spin_unlock_irq(&zone->lru_lock); 340 + pagevec_lru_move_fn(pvec, __activate_page, NULL); 341 + spin_lock_irq(&zone->lru_lock); 342 + } 343 + put_cpu_var(activate_page_pvecs); 344 + return 1; 345 + } 346 + 347 + #else 348 + static inline void activate_page_drain(int cpu) 349 + { 350 + } 351 + 276 352 void activate_page(struct page *page) 277 353 { 278 354 struct zone *zone = page_zone(page); 279 355 280 356 spin_lock_irq(&zone->lru_lock); 281 - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 282 - int file = page_is_file_cache(page); 283 - int lru = page_lru_base_type(page); 284 - del_page_from_lru_list(zone, page, lru); 285 - 286 - SetPageActive(page); 287 - lru += LRU_ACTIVE; 288 - add_page_to_lru_list(zone, page, lru); 289 - __count_vm_event(PGACTIVATE); 290 - 291 - update_page_reclaim_stat(zone, page, file, 1); 292 - } 357 + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) 358 + __activate_page(page, NULL); 293 359 spin_unlock_irq(&zone->lru_lock); 294 360 } 361 + #endif 295 362 296 363 /* 297 364 * Mark a page as having seen activity. ··· 457 390 pagevec_move_tail(pvec); 458 391 local_irq_restore(flags); 459 392 } 393 + activate_page_drain(cpu); 460 394 } 461 395 462 396 void lru_add_drain(void)
+4 -2
mm/vmscan.c
··· 1271 1271 spin_lock_irq(&zone->lru_lock); 1272 1272 continue; 1273 1273 } 1274 - SetPageLRU(page); 1275 1274 lru = page_lru(page); 1276 - add_page_to_lru_list(zone, page, lru); 1277 1275 if (is_active_lru(lru)) { 1278 1276 int file = is_file_lru(lru); 1279 1277 int numpages = hpage_nr_pages(page); 1280 1278 reclaim_stat->recent_rotated[file] += numpages; 1279 + if (putback_active_lru_page(zone, page)) 1280 + continue; 1281 1281 } 1282 + SetPageLRU(page); 1283 + add_page_to_lru_list(zone, page, lru); 1282 1284 if (!pagevec_add(&pvec, page)) { 1283 1285 spin_unlock_irq(&zone->lru_lock); 1284 1286 __pagevec_release(&pvec);