Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: page cache: store only head pages in i_pages

Transparent Huge Pages are currently stored in i_pages as pointers to
consecutive subpages. This patch changes that to storing consecutive
pointers to the head page in preparation for storing huge pages more
efficiently in i_pages.

Large parts of this are "inspired" by Kirill's patch
https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@linux.intel.com/

[willy@infradead.org: fix swapcache pages]
Link: http://lkml.kernel.org/r/20190324155441.GF10344@bombadil.infradead.org
[kirill@shutemov.name: hugetlb stores pages in page cache differently]
Link: http://lkml.kernel.org/r/20190404134553.vuvhgmghlkiw2hgl@kshutemo-mobl1
Link: http://lkml.kernel.org/r/20190307153051.18815-1-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Acked-by: Jan Kara <jack@suse.cz>
Reviewed-by: Kirill Shutemov <kirill@shutemov.name>
Reviewed-and-tested-by: Song Liu <songliubraving@fb.com>
Tested-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Tested-by: Qian Cai <cai@lca.pw>
Cc: Hugh Dickins <hughd@google.com>
Cc: Song Liu <liu.song.a23@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Matthew Wilcox and committed by
Linus Torvalds
5fd4ca2d cefdca0a

+87 -104
+13
include/linux/pagemap.h
··· 333 333 mapping_gfp_mask(mapping)); 334 334 } 335 335 336 + static inline struct page *find_subpage(struct page *page, pgoff_t offset) 337 + { 338 + unsigned long mask; 339 + 340 + if (PageHuge(page)) 341 + return page; 342 + 343 + VM_BUG_ON_PAGE(PageTail(page), page); 344 + 345 + mask = (1UL << compound_order(page)) - 1; 346 + return page + (offset & mask); 347 + } 348 + 336 349 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); 337 350 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); 338 351 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+63 -98
mm/filemap.c
··· 279 279 * @pvec: pagevec with pages to delete 280 280 * 281 281 * The function walks over mapping->i_pages and removes pages passed in @pvec 282 - * from the mapping. The function expects @pvec to be sorted by page index. 282 + * from the mapping. The function expects @pvec to be sorted by page index 283 + * and is optimised for it to be dense. 283 284 * It tolerates holes in @pvec (mapping entries at those indices are not 284 285 * modified). The function expects only THP head pages to be present in the 285 - * @pvec and takes care to delete all corresponding tail pages from the 286 - * mapping as well. 286 + * @pvec. 287 287 * 288 288 * The function expects the i_pages lock to be held. 289 289 */ ··· 292 292 { 293 293 XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); 294 294 int total_pages = 0; 295 - int i = 0, tail_pages = 0; 295 + int i = 0; 296 296 struct page *page; 297 297 298 298 mapping_set_update(&xas, mapping); 299 299 xas_for_each(&xas, page, ULONG_MAX) { 300 - if (i >= pagevec_count(pvec) && !tail_pages) 300 + if (i >= pagevec_count(pvec)) 301 301 break; 302 + 303 + /* A swap/dax/shadow entry got inserted? Skip it. */ 302 304 if (xa_is_value(page)) 303 305 continue; 304 - if (!tail_pages) { 305 - /* 306 - * Some page got inserted in our range? Skip it. We 307 - * have our pages locked so they are protected from 308 - * being removed. 309 - */ 310 - if (page != pvec->pages[i]) { 311 - VM_BUG_ON_PAGE(page->index > 312 - pvec->pages[i]->index, page); 313 - continue; 314 - } 315 - WARN_ON_ONCE(!PageLocked(page)); 316 - if (PageTransHuge(page) && !PageHuge(page)) 317 - tail_pages = HPAGE_PMD_NR - 1; 318 - page->mapping = NULL; 319 - /* 320 - * Leave page->index set: truncation lookup relies 321 - * upon it 322 - */ 323 - i++; 324 - } else { 325 - VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages 326 - != pvec->pages[i]->index, page); 327 - tail_pages--; 306 + /* 307 + * A page got inserted in our range? Skip it. We have our 308 + * pages locked so they are protected from being removed. 309 + * If we see a page whose index is higher than ours, it 310 + * means our page has been removed, which shouldn't be 311 + * possible because we're holding the PageLock. 312 + */ 313 + if (page != pvec->pages[i]) { 314 + VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index, 315 + page); 316 + continue; 328 317 } 318 + 319 + WARN_ON_ONCE(!PageLocked(page)); 320 + 321 + if (page->index == xas.xa_index) 322 + page->mapping = NULL; 323 + /* Leave page->index set: truncation lookup relies on it */ 324 + 325 + /* 326 + * Move to the next page in the vector if this is a regular 327 + * page or the index is of the last sub-page of this compound 328 + * page. 329 + */ 330 + if (page->index + (1UL << compound_order(page)) - 1 == 331 + xas.xa_index) 332 + i++; 329 333 xas_store(&xas, NULL); 330 334 total_pages++; 331 335 } ··· 1495 1491 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) 1496 1492 { 1497 1493 XA_STATE(xas, &mapping->i_pages, offset); 1498 - struct page *head, *page; 1494 + struct page *page; 1499 1495 1500 1496 rcu_read_lock(); 1501 1497 repeat: ··· 1510 1506 if (!page || xa_is_value(page)) 1511 1507 goto out; 1512 1508 1513 - head = compound_head(page); 1514 - if (!page_cache_get_speculative(head)) 1509 + if (!page_cache_get_speculative(page)) 1515 1510 goto repeat; 1516 - 1517 - /* The page was split under us? */ 1518 - if (compound_head(page) != head) { 1519 - put_page(head); 1520 - goto repeat; 1521 - } 1522 1511 1523 1512 /* 1524 - * Has the page moved? 1513 + * Has the page moved or been split? 1525 1514 * This is part of the lockless pagecache protocol. See 1526 1515 * include/linux/pagemap.h for details. 1527 1516 */ 1528 1517 if (unlikely(page != xas_reload(&xas))) { 1529 - put_page(head); 1518 + put_page(page); 1530 1519 goto repeat; 1531 1520 } 1521 + page = find_subpage(page, offset); 1532 1522 out: 1533 1523 rcu_read_unlock(); 1534 1524 ··· 1704 1706 1705 1707 rcu_read_lock(); 1706 1708 xas_for_each(&xas, page, ULONG_MAX) { 1707 - struct page *head; 1708 1709 if (xas_retry(&xas, page)) 1709 1710 continue; 1710 1711 /* ··· 1714 1717 if (xa_is_value(page)) 1715 1718 goto export; 1716 1719 1717 - head = compound_head(page); 1718 - if (!page_cache_get_speculative(head)) 1720 + if (!page_cache_get_speculative(page)) 1719 1721 goto retry; 1720 1722 1721 - /* The page was split under us? */ 1722 - if (compound_head(page) != head) 1723 - goto put_page; 1724 - 1725 - /* Has the page moved? */ 1723 + /* Has the page moved or been split? */ 1726 1724 if (unlikely(page != xas_reload(&xas))) 1727 1725 goto put_page; 1726 + page = find_subpage(page, xas.xa_index); 1728 1727 1729 1728 export: 1730 1729 indices[ret] = xas.xa_index; ··· 1729 1736 break; 1730 1737 continue; 1731 1738 put_page: 1732 - put_page(head); 1739 + put_page(page); 1733 1740 retry: 1734 1741 xas_reset(&xas); 1735 1742 } ··· 1771 1778 1772 1779 rcu_read_lock(); 1773 1780 xas_for_each(&xas, page, end) { 1774 - struct page *head; 1775 1781 if (xas_retry(&xas, page)) 1776 1782 continue; 1777 1783 /* Skip over shadow, swap and DAX entries */ 1778 1784 if (xa_is_value(page)) 1779 1785 continue; 1780 1786 1781 - head = compound_head(page); 1782 - if (!page_cache_get_speculative(head)) 1787 + if (!page_cache_get_speculative(page)) 1783 1788 goto retry; 1784 1789 1785 - /* The page was split under us? */ 1786 - if (compound_head(page) != head) 1787 - goto put_page; 1788 - 1789 - /* Has the page moved? */ 1790 + /* Has the page moved or been split? */ 1790 1791 if (unlikely(page != xas_reload(&xas))) 1791 1792 goto put_page; 1792 1793 1793 - pages[ret] = page; 1794 + pages[ret] = find_subpage(page, xas.xa_index); 1794 1795 if (++ret == nr_pages) { 1795 1796 *start = xas.xa_index + 1; 1796 1797 goto out; 1797 1798 } 1798 1799 continue; 1799 1800 put_page: 1800 - put_page(head); 1801 + put_page(page); 1801 1802 retry: 1802 1803 xas_reset(&xas); 1803 1804 } ··· 1836 1849 1837 1850 rcu_read_lock(); 1838 1851 for (page = xas_load(&xas); page; page = xas_next(&xas)) { 1839 - struct page *head; 1840 1852 if (xas_retry(&xas, page)) 1841 1853 continue; 1842 1854 /* ··· 1845 1859 if (xa_is_value(page)) 1846 1860 break; 1847 1861 1848 - head = compound_head(page); 1849 - if (!page_cache_get_speculative(head)) 1862 + if (!page_cache_get_speculative(page)) 1850 1863 goto retry; 1851 1864 1852 - /* The page was split under us? */ 1853 - if (compound_head(page) != head) 1854 - goto put_page; 1855 - 1856 - /* Has the page moved? */ 1865 + /* Has the page moved or been split? */ 1857 1866 if (unlikely(page != xas_reload(&xas))) 1858 1867 goto put_page; 1859 1868 1860 - pages[ret] = page; 1869 + pages[ret] = find_subpage(page, xas.xa_index); 1861 1870 if (++ret == nr_pages) 1862 1871 break; 1863 1872 continue; 1864 1873 put_page: 1865 - put_page(head); 1874 + put_page(page); 1866 1875 retry: 1867 1876 xas_reset(&xas); 1868 1877 } ··· 1893 1912 1894 1913 rcu_read_lock(); 1895 1914 xas_for_each_marked(&xas, page, end, tag) { 1896 - struct page *head; 1897 1915 if (xas_retry(&xas, page)) 1898 1916 continue; 1899 1917 /* ··· 1903 1923 if (xa_is_value(page)) 1904 1924 continue; 1905 1925 1906 - head = compound_head(page); 1907 - if (!page_cache_get_speculative(head)) 1926 + if (!page_cache_get_speculative(page)) 1908 1927 goto retry; 1909 1928 1910 - /* The page was split under us? */ 1911 - if (compound_head(page) != head) 1912 - goto put_page; 1913 - 1914 - /* Has the page moved? */ 1929 + /* Has the page moved or been split? */ 1915 1930 if (unlikely(page != xas_reload(&xas))) 1916 1931 goto put_page; 1917 1932 1918 - pages[ret] = page; 1933 + pages[ret] = find_subpage(page, xas.xa_index); 1919 1934 if (++ret == nr_pages) { 1920 1935 *index = xas.xa_index + 1; 1921 1936 goto out; 1922 1937 } 1923 1938 continue; 1924 1939 put_page: 1925 - put_page(head); 1940 + put_page(page); 1926 1941 retry: 1927 1942 xas_reset(&xas); 1928 1943 } ··· 1966 1991 1967 1992 rcu_read_lock(); 1968 1993 xas_for_each_marked(&xas, page, ULONG_MAX, tag) { 1969 - struct page *head; 1970 1994 if (xas_retry(&xas, page)) 1971 1995 continue; 1972 1996 /* ··· 1976 2002 if (xa_is_value(page)) 1977 2003 goto export; 1978 2004 1979 - head = compound_head(page); 1980 - if (!page_cache_get_speculative(head)) 2005 + if (!page_cache_get_speculative(page)) 1981 2006 goto retry; 1982 2007 1983 - /* The page was split under us? */ 1984 - if (compound_head(page) != head) 1985 - goto put_page; 1986 - 1987 - /* Has the page moved? */ 2008 + /* Has the page moved or been split? */ 1988 2009 if (unlikely(page != xas_reload(&xas))) 1989 2010 goto put_page; 2011 + page = find_subpage(page, xas.xa_index); 1990 2012 1991 2013 export: 1992 2014 indices[ret] = xas.xa_index; ··· 1991 2021 break; 1992 2022 continue; 1993 2023 put_page: 1994 - put_page(head); 2024 + put_page(page); 1995 2025 retry: 1996 2026 xas_reset(&xas); 1997 2027 } ··· 2661 2691 pgoff_t last_pgoff = start_pgoff; 2662 2692 unsigned long max_idx; 2663 2693 XA_STATE(xas, &mapping->i_pages, start_pgoff); 2664 - struct page *head, *page; 2694 + struct page *page; 2665 2695 2666 2696 rcu_read_lock(); 2667 2697 xas_for_each(&xas, page, end_pgoff) { ··· 2670 2700 if (xa_is_value(page)) 2671 2701 goto next; 2672 2702 2673 - head = compound_head(page); 2674 - 2675 2703 /* 2676 2704 * Check for a locked page first, as a speculative 2677 2705 * reference may adversely influence page migration. 2678 2706 */ 2679 - if (PageLocked(head)) 2707 + if (PageLocked(page)) 2680 2708 goto next; 2681 - if (!page_cache_get_speculative(head)) 2709 + if (!page_cache_get_speculative(page)) 2682 2710 goto next; 2683 2711 2684 - /* The page was split under us? */ 2685 - if (compound_head(page) != head) 2686 - goto skip; 2687 - 2688 - /* Has the page moved? */ 2712 + /* Has the page moved or been split? */ 2689 2713 if (unlikely(page != xas_reload(&xas))) 2690 2714 goto skip; 2715 + page = find_subpage(page, xas.xa_index); 2691 2716 2692 2717 if (!PageUptodate(page) || 2693 2718 PageReadahead(page) ||
+3
mm/huge_memory.c
··· 2496 2496 if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) 2497 2497 shmem_uncharge(head->mapping->host, 1); 2498 2498 put_page(head + i); 2499 + } else if (!PageAnon(page)) { 2500 + __xa_store(&head->mapping->i_pages, head[i].index, 2501 + head + i, 0); 2499 2502 } 2500 2503 } 2501 2504
+2 -2
mm/khugepaged.c
··· 1374 1374 result = SCAN_FAIL; 1375 1375 goto xa_locked; 1376 1376 } 1377 - xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); 1377 + xas_store(&xas, new_page); 1378 1378 nr_none++; 1379 1379 continue; 1380 1380 } ··· 1450 1450 list_add_tail(&page->lru, &pagelist); 1451 1451 1452 1452 /* Finally, replace with the new page. */ 1453 - xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); 1453 + xas_store(&xas, new_page); 1454 1454 continue; 1455 1455 out_unlock: 1456 1456 unlock_page(page);
+2
mm/memfd.c
··· 39 39 xas_for_each(xas, page, ULONG_MAX) { 40 40 if (xa_is_value(page)) 41 41 continue; 42 + page = find_subpage(page, xas->xa_index); 42 43 if (page_count(page) - page_mapcount(page) > 1) 43 44 xas_set_mark(xas, MEMFD_TAG_PINNED); 44 45 ··· 89 88 bool clear = true; 90 89 if (xa_is_value(page)) 91 90 continue; 91 + page = find_subpage(page, xas.xa_index); 92 92 if (page_count(page) - page_mapcount(page) != 1) { 93 93 /* 94 94 * On the last scan, we clean up all those tags
+1 -1
mm/migrate.c
··· 463 463 464 464 for (i = 1; i < HPAGE_PMD_NR; i++) { 465 465 xas_next(&xas); 466 - xas_store(&xas, newpage + i); 466 + xas_store(&xas, newpage); 467 467 } 468 468 } 469 469
+1 -1
mm/shmem.c
··· 614 614 if (xas_error(&xas)) 615 615 goto unlock; 616 616 next: 617 - xas_store(&xas, page + i); 617 + xas_store(&xas, page); 618 618 if (++i < nr) { 619 619 xas_next(&xas); 620 620 goto next;
+2 -2
mm/swap_state.c
··· 132 132 for (i = 0; i < nr; i++) { 133 133 VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); 134 134 set_page_private(page + i, entry.val + i); 135 - xas_store(&xas, page + i); 135 + xas_store(&xas, page); 136 136 xas_next(&xas); 137 137 } 138 138 address_space->nrpages += nr; ··· 167 167 168 168 for (i = 0; i < nr; i++) { 169 169 void *entry = xas_store(&xas, NULL); 170 - VM_BUG_ON_PAGE(entry != page + i, entry); 170 + VM_BUG_ON_PAGE(entry != page, entry); 171 171 set_page_private(page + i, 0); 172 172 xas_next(&xas); 173 173 }