Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: factor out common parts between MADV_COLD and MADV_PAGEOUT

There are many common parts between MADV_COLD and MADV_PAGEOUT.
This patch factor them out to save code duplication.

Link: http://lkml.kernel.org/r/20190726023435.214162-6-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Daniel Colascione <dancol@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: kbuild test robot <lkp@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Oleksandr Natalenko <oleksandr@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Sonny Rao <sonnyrao@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tim Murray <timmurray@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Minchan Kim and committed by
Linus Torvalds
d616d512 1a4e58cc

+45 -147
+45 -147
mm/madvise.c
··· 32 32 33 33 #include "internal.h" 34 34 35 + struct madvise_walk_private { 36 + struct mmu_gather *tlb; 37 + bool pageout; 38 + }; 39 + 35 40 /* 36 41 * Any behaviour which results in changes to the vma->vm_flags needs to 37 42 * take mmap_sem for writing. Others, which simply traverse vmas, need ··· 297 292 return 0; 298 293 } 299 294 300 - static int madvise_cold_pte_range(pmd_t *pmd, unsigned long addr, 301 - unsigned long end, struct mm_walk *walk) 295 + static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, 296 + unsigned long addr, unsigned long end, 297 + struct mm_walk *walk) 302 298 { 303 - struct mmu_gather *tlb = walk->private; 299 + struct madvise_walk_private *private = walk->private; 300 + struct mmu_gather *tlb = private->tlb; 301 + bool pageout = private->pageout; 304 302 struct mm_struct *mm = tlb->mm; 305 303 struct vm_area_struct *vma = walk->vma; 306 304 pte_t *orig_pte, *pte, ptent; 307 305 spinlock_t *ptl; 308 - struct page *page; 306 + struct page *page = NULL; 307 + LIST_HEAD(page_list); 308 + 309 + if (fatal_signal_pending(current)) 310 + return -EINTR; 309 311 310 312 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 311 313 if (pmd_trans_huge(*pmd)) { ··· 360 348 tlb_remove_pmd_tlb_entry(tlb, pmd, addr); 361 349 } 362 350 351 + ClearPageReferenced(page); 363 352 test_and_clear_page_young(page); 364 - deactivate_page(page); 353 + if (pageout) { 354 + if (!isolate_lru_page(page)) 355 + list_add(&page->lru, &page_list); 356 + } else 357 + deactivate_page(page); 365 358 huge_unlock: 366 359 spin_unlock(ptl); 360 + if (pageout) 361 + reclaim_pages(&page_list); 367 362 return 0; 368 363 } 369 364 ··· 438 419 * As a side effect, it makes confuse idle-page tracking 439 420 * because they will miss recent referenced history. 440 421 */ 422 + ClearPageReferenced(page); 441 423 test_and_clear_page_young(page); 442 - deactivate_page(page); 424 + if (pageout) { 425 + if (!isolate_lru_page(page)) 426 + list_add(&page->lru, &page_list); 427 + } else 428 + deactivate_page(page); 443 429 } 444 430 445 431 arch_leave_lazy_mmu_mode(); 446 432 pte_unmap_unlock(orig_pte, ptl); 433 + if (pageout) 434 + reclaim_pages(&page_list); 447 435 cond_resched(); 448 436 449 437 return 0; 450 438 } 451 439 452 440 static const struct mm_walk_ops cold_walk_ops = { 453 - .pmd_entry = madvise_cold_pte_range, 441 + .pmd_entry = madvise_cold_or_pageout_pte_range, 454 442 }; 455 443 456 444 static void madvise_cold_page_range(struct mmu_gather *tlb, 457 445 struct vm_area_struct *vma, 458 446 unsigned long addr, unsigned long end) 459 447 { 448 + struct madvise_walk_private walk_private = { 449 + .pageout = false, 450 + .tlb = tlb, 451 + }; 452 + 460 453 tlb_start_vma(tlb, vma); 461 - walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, NULL); 454 + walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); 462 455 tlb_end_vma(tlb, vma); 463 456 } 464 457 ··· 493 462 return 0; 494 463 } 495 464 496 - static int madvise_pageout_pte_range(pmd_t *pmd, unsigned long addr, 497 - unsigned long end, struct mm_walk *walk) 498 - { 499 - struct mmu_gather *tlb = walk->private; 500 - struct mm_struct *mm = tlb->mm; 501 - struct vm_area_struct *vma = walk->vma; 502 - pte_t *orig_pte, *pte, ptent; 503 - spinlock_t *ptl; 504 - LIST_HEAD(page_list); 505 - struct page *page; 506 - 507 - if (fatal_signal_pending(current)) 508 - return -EINTR; 509 - 510 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE 511 - if (pmd_trans_huge(*pmd)) { 512 - pmd_t orig_pmd; 513 - unsigned long next = pmd_addr_end(addr, end); 514 - 515 - tlb_change_page_size(tlb, HPAGE_PMD_SIZE); 516 - ptl = pmd_trans_huge_lock(pmd, vma); 517 - if (!ptl) 518 - return 0; 519 - 520 - orig_pmd = *pmd; 521 - if (is_huge_zero_pmd(orig_pmd)) 522 - goto huge_unlock; 523 - 524 - if (unlikely(!pmd_present(orig_pmd))) { 525 - VM_BUG_ON(thp_migration_supported() && 526 - !is_pmd_migration_entry(orig_pmd)); 527 - goto huge_unlock; 528 - } 529 - 530 - page = pmd_page(orig_pmd); 531 - if (next - addr != HPAGE_PMD_SIZE) { 532 - int err; 533 - 534 - if (page_mapcount(page) != 1) 535 - goto huge_unlock; 536 - get_page(page); 537 - spin_unlock(ptl); 538 - lock_page(page); 539 - err = split_huge_page(page); 540 - unlock_page(page); 541 - put_page(page); 542 - if (!err) 543 - goto regular_page; 544 - return 0; 545 - } 546 - 547 - if (pmd_young(orig_pmd)) { 548 - pmdp_invalidate(vma, addr, pmd); 549 - orig_pmd = pmd_mkold(orig_pmd); 550 - 551 - set_pmd_at(mm, addr, pmd, orig_pmd); 552 - tlb_remove_tlb_entry(tlb, pmd, addr); 553 - } 554 - 555 - ClearPageReferenced(page); 556 - test_and_clear_page_young(page); 557 - 558 - if (!isolate_lru_page(page)) 559 - list_add(&page->lru, &page_list); 560 - huge_unlock: 561 - spin_unlock(ptl); 562 - reclaim_pages(&page_list); 563 - return 0; 564 - } 565 - 566 - if (pmd_trans_unstable(pmd)) 567 - return 0; 568 - regular_page: 569 - #endif 570 - tlb_change_page_size(tlb, PAGE_SIZE); 571 - orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 572 - flush_tlb_batched_pending(mm); 573 - arch_enter_lazy_mmu_mode(); 574 - for (; addr < end; pte++, addr += PAGE_SIZE) { 575 - ptent = *pte; 576 - if (!pte_present(ptent)) 577 - continue; 578 - 579 - page = vm_normal_page(vma, addr, ptent); 580 - if (!page) 581 - continue; 582 - 583 - /* 584 - * creating a THP page is expensive so split it only if we 585 - * are sure it's worth. Split it if we are only owner. 586 - */ 587 - if (PageTransCompound(page)) { 588 - if (page_mapcount(page) != 1) 589 - break; 590 - get_page(page); 591 - if (!trylock_page(page)) { 592 - put_page(page); 593 - break; 594 - } 595 - pte_unmap_unlock(orig_pte, ptl); 596 - if (split_huge_page(page)) { 597 - unlock_page(page); 598 - put_page(page); 599 - pte_offset_map_lock(mm, pmd, addr, &ptl); 600 - break; 601 - } 602 - unlock_page(page); 603 - put_page(page); 604 - pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 605 - pte--; 606 - addr -= PAGE_SIZE; 607 - continue; 608 - } 609 - 610 - VM_BUG_ON_PAGE(PageTransCompound(page), page); 611 - 612 - if (pte_young(ptent)) { 613 - ptent = ptep_get_and_clear_full(mm, addr, pte, 614 - tlb->fullmm); 615 - ptent = pte_mkold(ptent); 616 - set_pte_at(mm, addr, pte, ptent); 617 - tlb_remove_tlb_entry(tlb, pte, addr); 618 - } 619 - ClearPageReferenced(page); 620 - test_and_clear_page_young(page); 621 - 622 - if (!isolate_lru_page(page)) 623 - list_add(&page->lru, &page_list); 624 - } 625 - 626 - arch_leave_lazy_mmu_mode(); 627 - pte_unmap_unlock(orig_pte, ptl); 628 - reclaim_pages(&page_list); 629 - cond_resched(); 630 - 631 - return 0; 632 - } 633 - 634 465 static void madvise_pageout_page_range(struct mmu_gather *tlb, 635 466 struct vm_area_struct *vma, 636 467 unsigned long addr, unsigned long end) 637 468 { 469 + struct madvise_walk_private walk_private = { 470 + .pageout = true, 471 + .tlb = tlb, 472 + }; 473 + 638 474 tlb_start_vma(tlb, vma); 639 - walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, NULL); 475 + walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); 640 476 tlb_end_vma(tlb, vma); 641 477 } 642 478