Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

thp: KSM on THP

This makes KSM full operational with THP pages. Subpages are scanned
while the hugepage is still in place and delivering max cpu performance,
and only if there's a match and we're going to deduplicate memory, the
single hugepages with the subpage match is split.

There will be no false sharing between ksmd and khugepaged. khugepaged
won't collapse 2m virtual regions with KSM pages inside. ksmd also should
only split pages when the checksum matches and we're likely to split an
hugepage for some long living ksm page (usual ksm heuristic to avoid
sharing pages that get de-cowed).

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Andrea Arcangeli and committed by
Linus Torvalds
29ad768c 60ab3244

+58 -9
+58 -9
mm/ksm.c
··· 412 412 up_read(&mm->mmap_sem); 413 413 } 414 414 415 + static struct page *page_trans_compound_anon(struct page *page) 416 + { 417 + if (PageTransCompound(page)) { 418 + struct page *head; 419 + head = compound_head(page); 420 + /* 421 + * head may be a dangling pointer. 422 + * __split_huge_page_refcount clears PageTail 423 + * before overwriting first_page, so if 424 + * PageTail is still there it means the head 425 + * pointer isn't dangling. 426 + */ 427 + if (head != page) { 428 + smp_rmb(); 429 + if (!PageTransCompound(page)) 430 + return NULL; 431 + } 432 + if (PageAnon(head)) 433 + return head; 434 + } 435 + return NULL; 436 + } 437 + 415 438 static struct page *get_mergeable_page(struct rmap_item *rmap_item) 416 439 { 417 440 struct mm_struct *mm = rmap_item->mm; ··· 454 431 page = follow_page(vma, addr, FOLL_GET); 455 432 if (IS_ERR_OR_NULL(page)) 456 433 goto out; 457 - if (PageAnon(page) && !PageTransCompound(page)) { 434 + if (PageAnon(page) || page_trans_compound_anon(page)) { 458 435 flush_anon_page(vma, page, addr); 459 436 flush_dcache_page(page); 460 437 } else { ··· 732 709 if (addr == -EFAULT) 733 710 goto out; 734 711 712 + BUG_ON(PageTransCompound(page)); 735 713 ptep = page_check_address(page, mm, addr, &ptl, 0); 736 714 if (!ptep) 737 715 goto out; ··· 808 784 goto out; 809 785 810 786 pmd = pmd_offset(pud, addr); 787 + BUG_ON(pmd_trans_huge(*pmd)); 811 788 if (!pmd_present(*pmd)) 812 789 goto out; 813 790 ··· 836 811 return err; 837 812 } 838 813 814 + static int page_trans_compound_anon_split(struct page *page) 815 + { 816 + int ret = 0; 817 + struct page *transhuge_head = page_trans_compound_anon(page); 818 + if (transhuge_head) { 819 + /* Get the reference on the head to split it. */ 820 + if (get_page_unless_zero(transhuge_head)) { 821 + /* 822 + * Recheck we got the reference while the head 823 + * was still anonymous. 824 + */ 825 + if (PageAnon(transhuge_head)) 826 + ret = split_huge_page(transhuge_head); 827 + else 828 + /* 829 + * Retry later if split_huge_page run 830 + * from under us. 831 + */ 832 + ret = 1; 833 + put_page(transhuge_head); 834 + } else 835 + /* Retry later if split_huge_page run from under us. */ 836 + ret = 1; 837 + } 838 + return ret; 839 + } 840 + 839 841 /* 840 842 * try_to_merge_one_page - take two pages and merge them into one 841 843 * @vma: the vma that holds the pte pointing to page ··· 883 831 884 832 if (!(vma->vm_flags & VM_MERGEABLE)) 885 833 goto out; 834 + if (PageTransCompound(page) && page_trans_compound_anon_split(page)) 835 + goto out; 836 + BUG_ON(PageTransCompound(page)); 886 837 if (!PageAnon(page)) 887 838 goto out; 888 839 ··· 1340 1285 cond_resched(); 1341 1286 continue; 1342 1287 } 1343 - if (PageTransCompound(*page)) { 1344 - put_page(*page); 1345 - ksm_scan.address &= HPAGE_PMD_MASK; 1346 - ksm_scan.address += HPAGE_PMD_SIZE; 1347 - cond_resched(); 1348 - continue; 1349 - } 1350 - if (PageAnon(*page)) { 1288 + if (PageAnon(*page) || 1289 + page_trans_compound_anon(*page)) { 1351 1290 flush_anon_page(vma, *page, ksm_scan.address); 1352 1291 flush_dcache_page(*page); 1353 1292 rmap_item = get_next_rmap_item(slot,