Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm,hwpoison: refactor soft_offline_huge_page and __soft_offline_page

Merging soft_offline_huge_page and __soft_offline_page let us get rid of
quite some duplicated code, and makes the code much easier to follow.

Now, __soft_offline_page will handle both normal and hugetlb pages.

Signed-off-by: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Aristeu Rozanski <aris@ruivo.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dmitry Yakunin <zeil@yandex-team.ru>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Oscar Salvador <osalvador@suse.com>
Cc: Qian Cai <cai@lca.pw>
Cc: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20200922135650.1634-11-osalvador@suse.de
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Oscar Salvador and committed by
Linus Torvalds
6b9a217e 79f5f8fa

+83 -101
+83 -101
mm/memory-failure.c
··· 65 65 66 66 atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); 67 67 68 - static void page_handle_poison(struct page *page, bool release) 68 + static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release) 69 69 { 70 + if (hugepage_or_freepage) { 71 + /* 72 + * Doing this check for free pages is also fine since dissolve_free_huge_page 73 + * returns 0 for non-hugetlb pages as well. 74 + */ 75 + if (dissolve_free_huge_page(page) || !take_page_off_buddy(page)) 76 + /* 77 + * We could fail to take off the target page from buddy 78 + * for example due to racy page allocaiton, but that's 79 + * acceptable because soft-offlined page is not broken 80 + * and if someone really want to use it, they should 81 + * take it. 82 + */ 83 + return false; 84 + } 85 + 70 86 SetPageHWPoison(page); 71 87 if (release) 72 88 put_page(page); 73 89 page_ref_inc(page); 74 90 num_poisoned_pages_inc(); 91 + 92 + return true; 75 93 } 76 94 77 95 #if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE) ··· 1743 1725 return ret; 1744 1726 } 1745 1727 1746 - static int soft_offline_huge_page(struct page *page, int flags) 1728 + static bool isolate_page(struct page *page, struct list_head *pagelist) 1747 1729 { 1748 - int ret; 1749 - unsigned long pfn = page_to_pfn(page); 1750 - struct page *hpage = compound_head(page); 1751 - LIST_HEAD(pagelist); 1730 + bool isolated = false; 1731 + bool lru = PageLRU(page); 1752 1732 1753 - /* 1754 - * This double-check of PageHWPoison is to avoid the race with 1755 - * memory_failure(). See also comment in __soft_offline_page(). 1756 - */ 1757 - lock_page(hpage); 1758 - if (PageHWPoison(hpage)) { 1759 - unlock_page(hpage); 1760 - put_page(hpage); 1761 - pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); 1762 - return -EBUSY; 1763 - } 1764 - unlock_page(hpage); 1765 - 1766 - ret = isolate_huge_page(hpage, &pagelist); 1767 - /* 1768 - * get_any_page() and isolate_huge_page() takes a refcount each, 1769 - * so need to drop one here. 1770 - */ 1771 - put_page(hpage); 1772 - if (!ret) { 1773 - pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn); 1774 - return -EBUSY; 1775 - } 1776 - 1777 - ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, 1778 - MIGRATE_SYNC, MR_MEMORY_FAILURE); 1779 - if (ret) { 1780 - pr_info("soft offline: %#lx: hugepage migration failed %d, type %lx (%pGp)\n", 1781 - pfn, ret, page->flags, &page->flags); 1782 - if (!list_empty(&pagelist)) 1783 - putback_movable_pages(&pagelist); 1784 - if (ret > 0) 1785 - ret = -EIO; 1733 + if (PageHuge(page)) { 1734 + isolated = isolate_huge_page(page, pagelist); 1786 1735 } else { 1787 - /* 1788 - * We set PG_hwpoison only when we were able to take the page 1789 - * off the buddy. 1790 - */ 1791 - if (!dissolve_free_huge_page(page) && take_page_off_buddy(page)) 1792 - page_handle_poison(page, false); 1736 + if (lru) 1737 + isolated = !isolate_lru_page(page); 1793 1738 else 1794 - ret = -EBUSY; 1739 + isolated = !isolate_movable_page(page, ISOLATE_UNEVICTABLE); 1740 + 1741 + if (isolated) 1742 + list_add(&page->lru, pagelist); 1795 1743 } 1796 - return ret; 1744 + 1745 + if (isolated && lru) 1746 + inc_node_page_state(page, NR_ISOLATED_ANON + 1747 + page_is_file_lru(page)); 1748 + 1749 + /* 1750 + * If we succeed to isolate the page, we grabbed another refcount on 1751 + * the page, so we can safely drop the one we got from get_any_pages(). 1752 + * If we failed to isolate the page, it means that we cannot go further 1753 + * and we will return an error, so drop the reference we got from 1754 + * get_any_pages() as well. 1755 + */ 1756 + put_page(page); 1757 + return isolated; 1797 1758 } 1798 1759 1799 - static int __soft_offline_page(struct page *page, int flags) 1760 + /* 1761 + * __soft_offline_page handles hugetlb-pages and non-hugetlb pages. 1762 + * If the page is a non-dirty unmapped page-cache page, it simply invalidates. 1763 + * If the page is mapped, it migrates the contents over. 1764 + */ 1765 + static int __soft_offline_page(struct page *page) 1800 1766 { 1801 - int ret; 1767 + int ret = 0; 1802 1768 unsigned long pfn = page_to_pfn(page); 1769 + struct page *hpage = compound_head(page); 1770 + char const *msg_page[] = {"page", "hugepage"}; 1771 + bool huge = PageHuge(page); 1772 + LIST_HEAD(pagelist); 1803 1773 1804 1774 /* 1805 1775 * Check PageHWPoison again inside page lock because PageHWPoison ··· 1796 1790 * so there's no race between soft_offline_page() and memory_failure(). 1797 1791 */ 1798 1792 lock_page(page); 1799 - wait_on_page_writeback(page); 1793 + if (!PageHuge(page)) 1794 + wait_on_page_writeback(page); 1800 1795 if (PageHWPoison(page)) { 1801 1796 unlock_page(page); 1802 1797 put_page(page); 1803 1798 pr_info("soft offline: %#lx page already poisoned\n", pfn); 1804 1799 return -EBUSY; 1805 1800 } 1806 - /* 1807 - * Try to invalidate first. This should work for 1808 - * non dirty unmapped page cache pages. 1809 - */ 1810 - ret = invalidate_inode_page(page); 1801 + 1802 + if (!PageHuge(page)) 1803 + /* 1804 + * Try to invalidate first. This should work for 1805 + * non dirty unmapped page cache pages. 1806 + */ 1807 + ret = invalidate_inode_page(page); 1811 1808 unlock_page(page); 1809 + 1812 1810 /* 1813 1811 * RED-PEN would be better to keep it isolated here, but we 1814 1812 * would need to fix isolation locking first. 1815 1813 */ 1816 - if (ret == 1) { 1814 + if (ret) { 1817 1815 pr_info("soft_offline: %#lx: invalidated\n", pfn); 1818 - page_handle_poison(page, true); 1816 + page_handle_poison(page, false, true); 1819 1817 return 0; 1820 1818 } 1821 1819 1822 - /* 1823 - * Simple invalidation didn't work. 1824 - * Try to migrate to a new page instead. migrate.c 1825 - * handles a large number of cases for us. 1826 - */ 1827 - if (PageLRU(page)) 1828 - ret = isolate_lru_page(page); 1829 - else 1830 - ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); 1831 - /* 1832 - * Drop page reference which is came from get_any_page() 1833 - * successful isolate_lru_page() already took another one. 1834 - */ 1835 - put_page(page); 1836 - if (!ret) { 1837 - LIST_HEAD(pagelist); 1838 - /* 1839 - * After isolated lru page, the PageLRU will be cleared, 1840 - * so use !__PageMovable instead for LRU page's mapping 1841 - * cannot have PAGE_MAPPING_MOVABLE. 1842 - */ 1843 - if (!__PageMovable(page)) 1844 - inc_node_page_state(page, NR_ISOLATED_ANON + 1845 - page_is_file_lru(page)); 1846 - list_add(&page->lru, &pagelist); 1820 + if (isolate_page(hpage, &pagelist)) { 1847 1821 ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, 1848 1822 MIGRATE_SYNC, MR_MEMORY_FAILURE); 1849 1823 if (!ret) { 1850 - page_handle_poison(page, true); 1824 + bool release = !huge; 1825 + 1826 + if (!page_handle_poison(page, huge, release)) 1827 + ret = -EBUSY; 1851 1828 } else { 1852 1829 if (!list_empty(&pagelist)) 1853 1830 putback_movable_pages(&pagelist); 1854 1831 1855 - pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n", 1856 - pfn, ret, page->flags, &page->flags); 1832 + pr_info("soft offline: %#lx: %s migration failed %d, type %lx (%pGp)\n", 1833 + pfn, msg_page[huge], ret, page->flags, &page->flags); 1857 1834 if (ret > 0) 1858 1835 ret = -EIO; 1859 1836 } 1860 1837 } else { 1861 - pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx (%pGp)\n", 1862 - pfn, ret, page_count(page), page->flags, &page->flags); 1838 + pr_info("soft offline: %#lx: %s isolation failed: %d, page count %d, type %lx (%pGp)\n", 1839 + pfn, msg_page[huge], ret, page_count(page), page->flags, &page->flags); 1840 + ret = -EBUSY; 1863 1841 } 1864 1842 return ret; 1865 1843 } 1866 1844 1867 - static int soft_offline_in_use_page(struct page *page, int flags) 1845 + static int soft_offline_in_use_page(struct page *page) 1868 1846 { 1869 - int ret; 1870 1847 struct page *hpage = compound_head(page); 1871 1848 1872 1849 if (!PageHuge(page) && PageTransHuge(hpage)) 1873 1850 if (try_to_split_thp_page(page, "soft offline") < 0) 1874 1851 return -EBUSY; 1875 - 1876 - if (PageHuge(page)) 1877 - ret = soft_offline_huge_page(page, flags); 1878 - else 1879 - ret = __soft_offline_page(page, flags); 1880 - return ret; 1852 + return __soft_offline_page(page); 1881 1853 } 1882 1854 1883 1855 static int soft_offline_free_page(struct page *page) 1884 1856 { 1885 - int rc = -EBUSY; 1857 + int rc = 0; 1886 1858 1887 - if (!dissolve_free_huge_page(page) && take_page_off_buddy(page)) { 1888 - page_handle_poison(page, false); 1889 - rc = 0; 1890 - } 1859 + if (!page_handle_poison(page, true, false)) 1860 + rc = -EBUSY; 1891 1861 1892 1862 return rc; 1893 1863 } ··· 1914 1932 put_online_mems(); 1915 1933 1916 1934 if (ret > 0) 1917 - ret = soft_offline_in_use_page(page, flags); 1935 + ret = soft_offline_in_use_page(page); 1918 1936 else if (ret == 0) 1919 1937 ret = soft_offline_free_page(page); 1920 1938