Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

zsmalloc: remove zspage isolation for migration

zspage isolation for migration introduced additional exceptions to be
dealt with since the zspage was isolated from class list. The reason
why I isolated zspage from class list was to prevent race between
obj_malloc and page migration via allocating zpage from the zspage
further. However, it couldn't prevent object freeing from zspage so it
needed corner case handling.

This patch removes the whole mess. Now, we are fine since class->lock
and zspage->lock can prevent the race.

Link: https://lkml.kernel.org/r/20211115185909.3949505-7-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Minchan Kim and committed by
Linus Torvalds
c4549b87 a41ec880

+8 -149
+8 -149
mm/zsmalloc.c
··· 254 254 #ifdef CONFIG_COMPACTION 255 255 struct inode *inode; 256 256 struct work_struct free_work; 257 - /* A wait queue for when migration races with async_free_zspage() */ 258 - struct wait_queue_head migration_wait; 259 - atomic_long_t isolated_pages; 260 - bool destroying; 261 257 #endif 262 258 }; 263 259 ··· 449 453 450 454 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ 451 455 static DEFINE_PER_CPU(struct mapping_area, zs_map_area); 452 - 453 - static bool is_zspage_isolated(struct zspage *zspage) 454 - { 455 - return zspage->isolated; 456 - } 457 456 458 457 static __maybe_unused int is_first_page(struct page *page) 459 458 { ··· 735 744 enum fullness_group fullness) 736 745 { 737 746 VM_BUG_ON(list_empty(&class->fullness_list[fullness])); 738 - VM_BUG_ON(is_zspage_isolated(zspage)); 739 747 740 748 list_del_init(&zspage->list); 741 749 class_stat_dec(class, fullness, 1); ··· 760 770 if (newfg == currfg) 761 771 goto out; 762 772 763 - if (!is_zspage_isolated(zspage)) { 764 - remove_zspage(class, zspage, currfg); 765 - insert_zspage(class, zspage, newfg); 766 - } 767 - 773 + remove_zspage(class, zspage, currfg); 774 + insert_zspage(class, zspage, newfg); 768 775 set_zspage_mapping(zspage, class_idx, newfg); 769 - 770 776 out: 771 777 return newfg; 772 778 } ··· 1497 1511 unsigned long obj; 1498 1512 struct size_class *class; 1499 1513 enum fullness_group fullness; 1500 - bool isolated; 1501 1514 1502 1515 if (unlikely(!handle)) 1503 1516 return; ··· 1518 1533 goto out; 1519 1534 } 1520 1535 1521 - isolated = is_zspage_isolated(zspage); 1522 1536 migrate_read_unlock(zspage); 1523 1537 /* If zspage is isolated, zs_page_putback will free the zspage */ 1524 - if (likely(!isolated)) 1525 - free_zspage(pool, class, zspage); 1538 + free_zspage(pool, class, zspage); 1526 1539 out: 1527 1540 1528 1541 spin_unlock(&class->lock); ··· 1701 1718 zspage = list_first_entry_or_null(&class->fullness_list[fg[i]], 1702 1719 struct zspage, list); 1703 1720 if (zspage) { 1704 - VM_BUG_ON(is_zspage_isolated(zspage)); 1705 1721 remove_zspage(class, zspage, fg[i]); 1706 1722 return zspage; 1707 1723 } ··· 1720 1738 struct zspage *zspage) 1721 1739 { 1722 1740 enum fullness_group fullness; 1723 - 1724 - VM_BUG_ON(is_zspage_isolated(zspage)); 1725 1741 1726 1742 fullness = get_fullness_group(class, zspage); 1727 1743 insert_zspage(class, zspage, fullness); ··· 1802 1822 1803 1823 static void dec_zspage_isolation(struct zspage *zspage) 1804 1824 { 1825 + VM_BUG_ON(zspage->isolated == 0); 1805 1826 zspage->isolated--; 1806 - } 1807 - 1808 - static void putback_zspage_deferred(struct zs_pool *pool, 1809 - struct size_class *class, 1810 - struct zspage *zspage) 1811 - { 1812 - enum fullness_group fg; 1813 - 1814 - fg = putback_zspage(class, zspage); 1815 - if (fg == ZS_EMPTY) 1816 - schedule_work(&pool->free_work); 1817 - 1818 - } 1819 - 1820 - static inline void zs_pool_dec_isolated(struct zs_pool *pool) 1821 - { 1822 - VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); 1823 - atomic_long_dec(&pool->isolated_pages); 1824 - /* 1825 - * Checking pool->destroying must happen after atomic_long_dec() 1826 - * for pool->isolated_pages above. Paired with the smp_mb() in 1827 - * zs_unregister_migration(). 1828 - */ 1829 - smp_mb__after_atomic(); 1830 - if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) 1831 - wake_up_all(&pool->migration_wait); 1832 1827 } 1833 1828 1834 1829 static void replace_sub_page(struct size_class *class, struct zspage *zspage, ··· 1831 1876 1832 1877 static bool zs_page_isolate(struct page *page, isolate_mode_t mode) 1833 1878 { 1834 - struct zs_pool *pool; 1835 - struct size_class *class; 1836 1879 struct zspage *zspage; 1837 - struct address_space *mapping; 1838 1880 1839 1881 /* 1840 1882 * Page is locked so zspage couldn't be destroyed. For detail, look at ··· 1841 1889 VM_BUG_ON_PAGE(PageIsolated(page), page); 1842 1890 1843 1891 zspage = get_zspage(page); 1844 - 1845 - mapping = page_mapping(page); 1846 - pool = mapping->private_data; 1847 - 1848 - class = zspage_class(pool, zspage); 1849 - 1850 - spin_lock(&class->lock); 1851 - if (get_zspage_inuse(zspage) == 0) { 1852 - spin_unlock(&class->lock); 1853 - return false; 1854 - } 1855 - 1856 - /* zspage is isolated for object migration */ 1857 - if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { 1858 - spin_unlock(&class->lock); 1859 - return false; 1860 - } 1861 - 1862 - /* 1863 - * If this is first time isolation for the zspage, isolate zspage from 1864 - * size_class to prevent further object allocation from the zspage. 1865 - */ 1866 - if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { 1867 - enum fullness_group fullness; 1868 - unsigned int class_idx; 1869 - 1870 - get_zspage_mapping(zspage, &class_idx, &fullness); 1871 - atomic_long_inc(&pool->isolated_pages); 1872 - remove_zspage(class, zspage, fullness); 1873 - } 1874 - 1892 + migrate_write_lock(zspage); 1875 1893 inc_zspage_isolation(zspage); 1876 - spin_unlock(&class->lock); 1894 + migrate_write_unlock(zspage); 1877 1895 1878 1896 return true; 1879 1897 } ··· 1926 2004 1927 2005 dec_zspage_isolation(zspage); 1928 2006 1929 - /* 1930 - * Page migration is done so let's putback isolated zspage to 1931 - * the list if @page is final isolated subpage in the zspage. 1932 - */ 1933 - if (!is_zspage_isolated(zspage)) { 1934 - /* 1935 - * We cannot race with zs_destroy_pool() here because we wait 1936 - * for isolation to hit zero before we start destroying. 1937 - * Also, we ensure that everyone can see pool->destroying before 1938 - * we start waiting. 1939 - */ 1940 - putback_zspage_deferred(pool, class, zspage); 1941 - zs_pool_dec_isolated(pool); 1942 - } 1943 - 1944 2007 if (page_zone(newpage) != page_zone(page)) { 1945 2008 dec_zone_page_state(page, NR_ZSPAGES); 1946 2009 inc_zone_page_state(newpage, NR_ZSPAGES); ··· 1953 2046 1954 2047 static void zs_page_putback(struct page *page) 1955 2048 { 1956 - struct zs_pool *pool; 1957 - struct size_class *class; 1958 - struct address_space *mapping; 1959 2049 struct zspage *zspage; 1960 2050 1961 2051 VM_BUG_ON_PAGE(!PageMovable(page), page); 1962 2052 VM_BUG_ON_PAGE(!PageIsolated(page), page); 1963 2053 1964 2054 zspage = get_zspage(page); 1965 - mapping = page_mapping(page); 1966 - pool = mapping->private_data; 1967 - class = zspage_class(pool, zspage); 1968 - 1969 - spin_lock(&class->lock); 2055 + migrate_write_lock(zspage); 1970 2056 dec_zspage_isolation(zspage); 1971 - if (!is_zspage_isolated(zspage)) { 1972 - /* 1973 - * Due to page_lock, we cannot free zspage immediately 1974 - * so let's defer. 1975 - */ 1976 - putback_zspage_deferred(pool, class, zspage); 1977 - zs_pool_dec_isolated(pool); 1978 - } 1979 - spin_unlock(&class->lock); 2057 + migrate_write_unlock(zspage); 1980 2058 } 1981 2059 1982 2060 static const struct address_space_operations zsmalloc_aops = { ··· 1983 2091 return 0; 1984 2092 } 1985 2093 1986 - static bool pool_isolated_are_drained(struct zs_pool *pool) 1987 - { 1988 - return atomic_long_read(&pool->isolated_pages) == 0; 1989 - } 1990 - 1991 - /* Function for resolving migration */ 1992 - static void wait_for_isolated_drain(struct zs_pool *pool) 1993 - { 1994 - 1995 - /* 1996 - * We're in the process of destroying the pool, so there are no 1997 - * active allocations. zs_page_isolate() fails for completely free 1998 - * zspages, so we need only wait for the zs_pool's isolated 1999 - * count to hit zero. 2000 - */ 2001 - wait_event(pool->migration_wait, 2002 - pool_isolated_are_drained(pool)); 2003 - } 2004 - 2005 2094 static void zs_unregister_migration(struct zs_pool *pool) 2006 2095 { 2007 - pool->destroying = true; 2008 - /* 2009 - * We need a memory barrier here to ensure global visibility of 2010 - * pool->destroying. Thus pool->isolated pages will either be 0 in which 2011 - * case we don't care, or it will be > 0 and pool->destroying will 2012 - * ensure that we wake up once isolation hits 0. 2013 - */ 2014 - smp_mb(); 2015 - wait_for_isolated_drain(pool); /* This can block */ 2016 2096 flush_work(&pool->free_work); 2017 2097 iput(pool->inode); 2018 2098 } ··· 2013 2149 list_splice_init(&class->fullness_list[ZS_EMPTY], &free_pages); 2014 2150 spin_unlock(&class->lock); 2015 2151 } 2016 - 2017 2152 2018 2153 list_for_each_entry_safe(zspage, tmp, &free_pages, list) { 2019 2154 list_del(&zspage->list); ··· 2225 2362 pool->name = kstrdup(name, GFP_KERNEL); 2226 2363 if (!pool->name) 2227 2364 goto err; 2228 - 2229 - #ifdef CONFIG_COMPACTION 2230 - init_waitqueue_head(&pool->migration_wait); 2231 - #endif 2232 2365 2233 2366 if (create_cache(pool)) 2234 2367 goto err;