Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm/rmap: use rmap_walk() in page_referenced()

Now, we have an infrastructure in rmap_walk() to handle difference from
variants of rmap traversing functions.

So, just use it in page_referenced().

In this patch, I change following things.

1. remove some variants of rmap traversing functions.
cf> page_referenced_ksm, page_referenced_anon,
page_referenced_file

2. introduce new struct page_referenced_arg and pass it to
page_referenced_one(), main function of rmap_walk, in order to count
reference, to store vm_flags and to check finish condition.

3. mechanical change to use rmap_walk() in page_referenced().

[liwanp@linux.vnet.ibm.com: fix BUG at rmap_walk]
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Hillf Danton <dhillf@gmail.com>
Signed-off-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Joonsoo Kim and committed by
Linus Torvalds
9f32624b e8351ac9

+80 -194
-2
include/linux/ksm.h
··· 73 73 struct page *ksm_might_need_to_copy(struct page *page, 74 74 struct vm_area_struct *vma, unsigned long address); 75 75 76 - int page_referenced_ksm(struct page *page, 77 - struct mem_cgroup *memcg, unsigned long *vm_flags); 78 76 int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); 79 77 void ksm_migrate_page(struct page *newpage, struct page *oldpage); 80 78
+1 -1
include/linux/rmap.h
··· 184 184 int page_referenced(struct page *, int is_locked, 185 185 struct mem_cgroup *memcg, unsigned long *vm_flags); 186 186 int page_referenced_one(struct page *, struct vm_area_struct *, 187 - unsigned long address, unsigned int *mapcount, unsigned long *vm_flags); 187 + unsigned long address, void *arg); 188 188 189 189 #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) 190 190
+5 -55
mm/ksm.c
··· 1891 1891 return new_page; 1892 1892 } 1893 1893 1894 - int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg, 1895 - unsigned long *vm_flags) 1896 - { 1897 - struct stable_node *stable_node; 1898 - struct rmap_item *rmap_item; 1899 - unsigned int mapcount = page_mapcount(page); 1900 - int referenced = 0; 1901 - int search_new_forks = 0; 1902 - 1903 - VM_BUG_ON(!PageKsm(page)); 1904 - VM_BUG_ON(!PageLocked(page)); 1905 - 1906 - stable_node = page_stable_node(page); 1907 - if (!stable_node) 1908 - return 0; 1909 - again: 1910 - hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { 1911 - struct anon_vma *anon_vma = rmap_item->anon_vma; 1912 - struct anon_vma_chain *vmac; 1913 - struct vm_area_struct *vma; 1914 - 1915 - anon_vma_lock_read(anon_vma); 1916 - anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 1917 - 0, ULONG_MAX) { 1918 - vma = vmac->vma; 1919 - if (rmap_item->address < vma->vm_start || 1920 - rmap_item->address >= vma->vm_end) 1921 - continue; 1922 - /* 1923 - * Initially we examine only the vma which covers this 1924 - * rmap_item; but later, if there is still work to do, 1925 - * we examine covering vmas in other mms: in case they 1926 - * were forked from the original since ksmd passed. 1927 - */ 1928 - if ((rmap_item->mm == vma->vm_mm) == search_new_forks) 1929 - continue; 1930 - 1931 - if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) 1932 - continue; 1933 - 1934 - referenced += page_referenced_one(page, vma, 1935 - rmap_item->address, &mapcount, vm_flags); 1936 - if (!search_new_forks || !mapcount) 1937 - break; 1938 - } 1939 - anon_vma_unlock_read(anon_vma); 1940 - if (!mapcount) 1941 - goto out; 1942 - } 1943 - if (!search_new_forks++) 1944 - goto again; 1945 - out: 1946 - return referenced; 1947 - } 1948 - 1949 1894 int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) 1950 1895 { 1951 1896 struct stable_node *stable_node; ··· 1899 1954 int search_new_forks = 0; 1900 1955 1901 1956 VM_BUG_ON(!PageKsm(page)); 1957 + 1958 + /* 1959 + * Rely on the page lock to protect against concurrent modifications 1960 + * to that page's node of the stable tree. 1961 + */ 1902 1962 VM_BUG_ON(!PageLocked(page)); 1903 1963 1904 1964 stable_node = page_stable_node(page);
+74 -136
mm/rmap.c
··· 660 660 return 1; 661 661 } 662 662 663 + struct page_referenced_arg { 664 + int mapcount; 665 + int referenced; 666 + unsigned long vm_flags; 667 + struct mem_cgroup *memcg; 668 + }; 663 669 /* 664 - * Subfunctions of page_referenced: page_referenced_one called 665 - * repeatedly from either page_referenced_anon or page_referenced_file. 670 + * arg: page_referenced_arg will be passed 666 671 */ 667 672 int page_referenced_one(struct page *page, struct vm_area_struct *vma, 668 - unsigned long address, unsigned int *mapcount, 669 - unsigned long *vm_flags) 673 + unsigned long address, void *arg) 670 674 { 671 675 struct mm_struct *mm = vma->vm_mm; 672 676 spinlock_t *ptl; 673 677 int referenced = 0; 678 + struct page_referenced_arg *pra = arg; 674 679 675 680 if (unlikely(PageTransHuge(page))) { 676 681 pmd_t *pmd; ··· 687 682 pmd = page_check_address_pmd(page, mm, address, 688 683 PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); 689 684 if (!pmd) 690 - goto out; 685 + return SWAP_AGAIN; 691 686 692 687 if (vma->vm_flags & VM_LOCKED) { 693 688 spin_unlock(ptl); 694 - *mapcount = 0; /* break early from loop */ 695 - *vm_flags |= VM_LOCKED; 696 - goto out; 689 + pra->vm_flags |= VM_LOCKED; 690 + return SWAP_FAIL; /* To break the loop */ 697 691 } 698 692 699 693 /* go ahead even if the pmd is pmd_trans_splitting() */ ··· 708 704 */ 709 705 pte = page_check_address(page, mm, address, &ptl, 0); 710 706 if (!pte) 711 - goto out; 707 + return SWAP_AGAIN; 712 708 713 709 if (vma->vm_flags & VM_LOCKED) { 714 710 pte_unmap_unlock(pte, ptl); 715 - *mapcount = 0; /* break early from loop */ 716 - *vm_flags |= VM_LOCKED; 717 - goto out; 711 + pra->vm_flags |= VM_LOCKED; 712 + return SWAP_FAIL; /* To break the loop */ 718 713 } 719 714 720 715 if (ptep_clear_flush_young_notify(vma, address, pte)) { ··· 730 727 pte_unmap_unlock(pte, ptl); 731 728 } 732 729 733 - (*mapcount)--; 734 - 735 - if (referenced) 736 - *vm_flags |= vma->vm_flags; 737 - out: 738 - return referenced; 739 - } 740 - 741 - static int page_referenced_anon(struct page *page, 742 - struct mem_cgroup *memcg, 743 - unsigned long *vm_flags) 744 - { 745 - unsigned int mapcount; 746 - struct anon_vma *anon_vma; 747 - pgoff_t pgoff; 748 - struct anon_vma_chain *avc; 749 - int referenced = 0; 750 - 751 - anon_vma = page_lock_anon_vma_read(page); 752 - if (!anon_vma) 753 - return referenced; 754 - 755 - mapcount = page_mapcount(page); 756 - pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 757 - anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { 758 - struct vm_area_struct *vma = avc->vma; 759 - unsigned long address = vma_address(page, vma); 760 - /* 761 - * If we are reclaiming on behalf of a cgroup, skip 762 - * counting on behalf of references from different 763 - * cgroups 764 - */ 765 - if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) 766 - continue; 767 - referenced += page_referenced_one(page, vma, address, 768 - &mapcount, vm_flags); 769 - if (!mapcount) 770 - break; 730 + if (referenced) { 731 + pra->referenced++; 732 + pra->vm_flags |= vma->vm_flags; 771 733 } 772 734 773 - page_unlock_anon_vma_read(anon_vma); 774 - return referenced; 735 + pra->mapcount--; 736 + if (!pra->mapcount) 737 + return SWAP_SUCCESS; /* To break the loop */ 738 + 739 + return SWAP_AGAIN; 775 740 } 776 741 777 - /** 778 - * page_referenced_file - referenced check for object-based rmap 779 - * @page: the page we're checking references on. 780 - * @memcg: target memory control group 781 - * @vm_flags: collect encountered vma->vm_flags who actually referenced the page 782 - * 783 - * For an object-based mapped page, find all the places it is mapped and 784 - * check/clear the referenced flag. This is done by following the page->mapping 785 - * pointer, then walking the chain of vmas it holds. It returns the number 786 - * of references it found. 787 - * 788 - * This function is only called from page_referenced for object-based pages. 789 - */ 790 - static int page_referenced_file(struct page *page, 791 - struct mem_cgroup *memcg, 792 - unsigned long *vm_flags) 742 + static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg) 793 743 { 794 - unsigned int mapcount; 795 - struct address_space *mapping = page->mapping; 796 - pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 797 - struct vm_area_struct *vma; 798 - int referenced = 0; 744 + struct page_referenced_arg *pra = arg; 745 + struct mem_cgroup *memcg = pra->memcg; 799 746 800 - /* 801 - * The caller's checks on page->mapping and !PageAnon have made 802 - * sure that this is a file page: the check for page->mapping 803 - * excludes the case just before it gets set on an anon page. 804 - */ 805 - BUG_ON(PageAnon(page)); 747 + if (!mm_match_cgroup(vma->vm_mm, memcg)) 748 + return true; 806 749 807 - /* 808 - * The page lock not only makes sure that page->mapping cannot 809 - * suddenly be NULLified by truncation, it makes sure that the 810 - * structure at mapping cannot be freed and reused yet, 811 - * so we can safely take mapping->i_mmap_mutex. 812 - */ 813 - BUG_ON(!PageLocked(page)); 814 - 815 - mutex_lock(&mapping->i_mmap_mutex); 816 - 817 - /* 818 - * i_mmap_mutex does not stabilize mapcount at all, but mapcount 819 - * is more likely to be accurate if we note it after spinning. 820 - */ 821 - mapcount = page_mapcount(page); 822 - 823 - vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { 824 - unsigned long address = vma_address(page, vma); 825 - /* 826 - * If we are reclaiming on behalf of a cgroup, skip 827 - * counting on behalf of references from different 828 - * cgroups 829 - */ 830 - if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) 831 - continue; 832 - referenced += page_referenced_one(page, vma, address, 833 - &mapcount, vm_flags); 834 - if (!mapcount) 835 - break; 836 - } 837 - 838 - mutex_unlock(&mapping->i_mmap_mutex); 839 - return referenced; 750 + return false; 840 751 } 841 752 842 753 /** ··· 768 851 struct mem_cgroup *memcg, 769 852 unsigned long *vm_flags) 770 853 { 771 - int referenced = 0; 854 + int ret; 772 855 int we_locked = 0; 856 + struct page_referenced_arg pra = { 857 + .mapcount = page_mapcount(page), 858 + .memcg = memcg, 859 + }; 860 + struct rmap_walk_control rwc = { 861 + .rmap_one = page_referenced_one, 862 + .arg = (void *)&pra, 863 + .anon_lock = page_lock_anon_vma_read, 864 + }; 773 865 774 866 *vm_flags = 0; 775 - if (page_mapped(page) && page_rmapping(page)) { 776 - if (!is_locked && (!PageAnon(page) || PageKsm(page))) { 777 - we_locked = trylock_page(page); 778 - if (!we_locked) { 779 - referenced++; 780 - goto out; 781 - } 782 - } 783 - if (unlikely(PageKsm(page))) 784 - referenced += page_referenced_ksm(page, memcg, 785 - vm_flags); 786 - else if (PageAnon(page)) 787 - referenced += page_referenced_anon(page, memcg, 788 - vm_flags); 789 - else if (page->mapping) 790 - referenced += page_referenced_file(page, memcg, 791 - vm_flags); 792 - if (we_locked) 793 - unlock_page(page); 867 + if (!page_mapped(page)) 868 + return 0; 869 + 870 + if (!page_rmapping(page)) 871 + return 0; 872 + 873 + if (!is_locked && (!PageAnon(page) || PageKsm(page))) { 874 + we_locked = trylock_page(page); 875 + if (!we_locked) 876 + return 1; 794 877 } 795 - out: 796 - return referenced; 878 + 879 + /* 880 + * If we are reclaiming on behalf of a cgroup, skip 881 + * counting on behalf of references from different 882 + * cgroups 883 + */ 884 + if (memcg) { 885 + rwc.invalid_vma = invalid_page_referenced_vma; 886 + } 887 + 888 + ret = rmap_walk(page, &rwc); 889 + *vm_flags = pra.vm_flags; 890 + 891 + if (we_locked) 892 + unlock_page(page); 893 + 894 + return pra.referenced; 797 895 } 798 896 799 897 static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, ··· 1632 1700 struct vm_area_struct *vma; 1633 1701 int ret = SWAP_AGAIN; 1634 1702 1703 + /* 1704 + * The page lock not only makes sure that page->mapping cannot 1705 + * suddenly be NULLified by truncation, it makes sure that the 1706 + * structure at mapping cannot be freed and reused yet, 1707 + * so we can safely take mapping->i_mmap_mutex. 1708 + */ 1709 + VM_BUG_ON(!PageLocked(page)); 1710 + 1635 1711 if (!mapping) 1636 1712 return ret; 1637 1713 mutex_lock(&mapping->i_mmap_mutex); ··· 1671 1731 1672 1732 int rmap_walk(struct page *page, struct rmap_walk_control *rwc) 1673 1733 { 1674 - VM_BUG_ON(!PageLocked(page)); 1675 - 1676 1734 if (unlikely(PageKsm(page))) 1677 1735 return rmap_walk_ksm(page, rwc); 1678 1736 else if (PageAnon(page))