Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

base/memory, hotplug: fix a kernel oops in show_valid_zones()

Reading a sysfs "memoryN/valid_zones" file leads to the following oops
when the first page of a range is not backed by struct page.
show_valid_zones() assumes that 'start_pfn' is always valid for
page_zone().

BUG: unable to handle kernel paging request at ffffea017a000000
IP: show_valid_zones+0x6f/0x160

This issue may happen on x86-64 systems with 64GiB or more memory since
their memory block size is bumped up to 2GiB. [1] An example of such
systems is desribed below. 0x3240000000 is only aligned by 1GiB and
this memory block starts from 0x3200000000, which is not backed by
struct page.

BIOS-e820: [mem 0x0000003240000000-0x000000603fffffff] usable

Since test_pages_in_a_zone() already checks holes, fix this issue by
extending this function to return 'valid_start' and 'valid_end' for a
given range. show_valid_zones() then proceeds with the valid range.

[1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on
large-memory x86-64 systems")'

Link: http://lkml.kernel.org/r/20170127222149.30893-3-toshi.kani@hpe.com
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Zhang Zhen <zhenzhang.zhang@huawei.com>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org> [4.4+]

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Toshi Kani and committed by
Linus Torvalds
a96dfddb deb88a2a

+23 -12
+6 -6
drivers/base/memory.c
··· 389 389 { 390 390 struct memory_block *mem = to_memory_block(dev); 391 391 unsigned long start_pfn, end_pfn; 392 + unsigned long valid_start, valid_end, valid_pages; 392 393 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 393 - struct page *first_page; 394 394 struct zone *zone; 395 395 int zone_shift = 0; 396 396 397 397 start_pfn = section_nr_to_pfn(mem->start_section_nr); 398 398 end_pfn = start_pfn + nr_pages; 399 - first_page = pfn_to_page(start_pfn); 400 399 401 400 /* The block contains more than one zone can not be offlined. */ 402 - if (!test_pages_in_a_zone(start_pfn, end_pfn)) 401 + if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) 403 402 return sprintf(buf, "none\n"); 404 403 405 - zone = page_zone(first_page); 404 + zone = page_zone(pfn_to_page(valid_start)); 405 + valid_pages = valid_end - valid_start; 406 406 407 407 /* MMOP_ONLINE_KEEP */ 408 408 sprintf(buf, "%s", zone->name); 409 409 410 410 /* MMOP_ONLINE_KERNEL */ 411 - zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL, &zone_shift); 411 + zone_can_shift(valid_start, valid_pages, ZONE_NORMAL, &zone_shift); 412 412 if (zone_shift) { 413 413 strcat(buf, " "); 414 414 strcat(buf, (zone + zone_shift)->name); 415 415 } 416 416 417 417 /* MMOP_ONLINE_MOVABLE */ 418 - zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE, &zone_shift); 418 + zone_can_shift(valid_start, valid_pages, ZONE_MOVABLE, &zone_shift); 419 419 if (zone_shift) { 420 420 strcat(buf, " "); 421 421 strcat(buf, (zone + zone_shift)->name);
+2 -1
include/linux/memory_hotplug.h
··· 85 85 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); 86 86 /* VM interface that may be used by firmware interface */ 87 87 extern int online_pages(unsigned long, unsigned long, int); 88 - extern int test_pages_in_a_zone(unsigned long, unsigned long); 88 + extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, 89 + unsigned long *valid_start, unsigned long *valid_end); 89 90 extern void __offline_isolated_pages(unsigned long, unsigned long); 90 91 91 92 typedef void (*online_page_callback_t)(struct page *page);
+15 -5
mm/memory_hotplug.c
··· 1484 1484 1485 1485 /* 1486 1486 * Confirm all pages in a range [start, end) belong to the same zone. 1487 + * When true, return its valid [start, end). 1487 1488 */ 1488 - int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) 1489 + int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, 1490 + unsigned long *valid_start, unsigned long *valid_end) 1489 1491 { 1490 1492 unsigned long pfn, sec_end_pfn; 1493 + unsigned long start, end; 1491 1494 struct zone *zone = NULL; 1492 1495 struct page *page; 1493 1496 int i; ··· 1512 1509 page = pfn_to_page(pfn + i); 1513 1510 if (zone && page_zone(page) != zone) 1514 1511 return 0; 1512 + if (!zone) 1513 + start = pfn + i; 1515 1514 zone = page_zone(page); 1515 + end = pfn + MAX_ORDER_NR_PAGES; 1516 1516 } 1517 1517 } 1518 1518 1519 - if (zone) 1519 + if (zone) { 1520 + *valid_start = start; 1521 + *valid_end = end; 1520 1522 return 1; 1521 - else 1523 + } else { 1522 1524 return 0; 1525 + } 1523 1526 } 1524 1527 1525 1528 /* ··· 1852 1843 long offlined_pages; 1853 1844 int ret, drain, retry_max, node; 1854 1845 unsigned long flags; 1846 + unsigned long valid_start, valid_end; 1855 1847 struct zone *zone; 1856 1848 struct memory_notify arg; 1857 1849 ··· 1863 1853 return -EINVAL; 1864 1854 /* This makes hotplug much easier...and readable. 1865 1855 we assume this for now. .*/ 1866 - if (!test_pages_in_a_zone(start_pfn, end_pfn)) 1856 + if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) 1867 1857 return -EINVAL; 1868 1858 1869 - zone = page_zone(pfn_to_page(start_pfn)); 1859 + zone = page_zone(pfn_to_page(valid_start)); 1870 1860 node = zone_to_nid(zone); 1871 1861 nr_pages = end_pfn - start_pfn; 1872 1862