Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: fix GFP_THISNODE callers and clarify

GFP_THISNODE is for callers that implement their own clever fallback to
remote nodes. It restricts the allocation to the specified node and
does not invoke reclaim, assuming that the caller will take care of it
when the fallback fails, e.g. through a subsequent allocation request
without GFP_THISNODE set.

However, many current GFP_THISNODE users only want the node exclusive
aspect of the flag, without actually implementing their own fallback or
triggering reclaim if necessary. This results in things like page
migration failing prematurely even when there is easily reclaimable
memory available, unless kswapd happens to be running already or a
concurrent allocation attempt triggers the necessary reclaim.

Convert all callsites that don't implement their own fallback strategy
to __GFP_THISNODE. This restricts the allocation a single node too, but
at the same time allows the allocator to enter the slowpath, wake
kswapd, and invoke direct reclaim if necessary, to make the allocation
happen when memory is full.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Jan Stancek <jstancek@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Johannes Weiner and committed by
Linus Torvalds
e97ca8e5 fa389e22

+19 -13
+1 -1
arch/ia64/kernel/uncached.c
··· 98 98 /* attempt to allocate a granule's worth of cached memory pages */ 99 99 100 100 page = alloc_pages_exact_node(nid, 101 - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 101 + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 102 102 IA64_GRANULE_SHIFT-PAGE_SHIFT); 103 103 if (!page) { 104 104 mutex_unlock(&uc_pool->add_chunk_mutex);
+2 -1
arch/powerpc/platforms/cell/ras.c
··· 123 123 124 124 area->nid = nid; 125 125 area->order = order; 126 - area->pages = alloc_pages_exact_node(area->nid, GFP_KERNEL|GFP_THISNODE, 126 + area->pages = alloc_pages_exact_node(area->nid, 127 + GFP_KERNEL|__GFP_THISNODE, 127 128 area->order); 128 129 129 130 if (!area->pages) {
+1 -1
drivers/misc/sgi-xp/xpc_uv.c
··· 240 240 241 241 nid = cpu_to_node(cpu); 242 242 page = alloc_pages_exact_node(nid, 243 - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 243 + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 244 244 pg_order); 245 245 if (page == NULL) { 246 246 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
+4
include/linux/gfp.h
··· 123 123 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ 124 124 __GFP_NO_KSWAPD) 125 125 126 + /* 127 + * GFP_THISNODE does not perform any reclaim, you most likely want to 128 + * use __GFP_THISNODE to allocate from a given node without fallback! 129 + */ 126 130 #ifdef CONFIG_NUMA 127 131 #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) 128 132 #else
+2 -2
include/linux/mmzone.h
··· 590 590 591 591 /* 592 592 * The NUMA zonelists are doubled because we need zonelists that restrict the 593 - * allocations to a single node for GFP_THISNODE. 593 + * allocations to a single node for __GFP_THISNODE. 594 594 * 595 595 * [0] : Zonelist with fallback 596 - * [1] : No fallback (GFP_THISNODE) 596 + * [1] : No fallback (__GFP_THISNODE) 597 597 */ 598 598 #define MAX_ZONELISTS 2 599 599
+1 -1
include/linux/slab.h
··· 410 410 * 411 411 * %GFP_NOWAIT - Allocation will not sleep. 412 412 * 413 - * %GFP_THISNODE - Allocate node-local memory only. 413 + * %__GFP_THISNODE - Allocate node-local memory only. 414 414 * 415 415 * %GFP_DMA - Allocation suitable for DMA. 416 416 * Should only be used for kmalloc() caches. Otherwise, use a
+2 -2
kernel/profile.c
··· 549 549 struct page *page; 550 550 551 551 page = alloc_pages_exact_node(node, 552 - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 552 + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 553 553 0); 554 554 if (!page) 555 555 goto out_cleanup; 556 556 per_cpu(cpu_profile_hits, cpu)[1] 557 557 = (struct profile_hit *)page_address(page); 558 558 page = alloc_pages_exact_node(node, 559 - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 559 + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 560 560 0); 561 561 if (!page) 562 562 goto out_cleanup;
+6 -5
mm/migrate.c
··· 1158 1158 pm->node); 1159 1159 else 1160 1160 return alloc_pages_exact_node(pm->node, 1161 - GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); 1161 + GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); 1162 1162 } 1163 1163 1164 1164 /* ··· 1544 1544 struct page *newpage; 1545 1545 1546 1546 newpage = alloc_pages_exact_node(nid, 1547 - (GFP_HIGHUSER_MOVABLE | GFP_THISNODE | 1548 - __GFP_NOMEMALLOC | __GFP_NORETRY | 1549 - __GFP_NOWARN) & 1547 + (GFP_HIGHUSER_MOVABLE | 1548 + __GFP_THISNODE | __GFP_NOMEMALLOC | 1549 + __GFP_NORETRY | __GFP_NOWARN) & 1550 1550 ~GFP_IOFS, 0); 1551 1551 1552 1552 return newpage; ··· 1747 1747 goto out_dropref; 1748 1748 1749 1749 new_page = alloc_pages_node(node, 1750 - (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); 1750 + (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT, 1751 + HPAGE_PMD_ORDER); 1751 1752 if (!new_page) 1752 1753 goto out_fail; 1753 1754