Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] mempool: NOMEMALLOC and NORETRY

Mempools have 2 problems.

The first is that mempool_alloc can possibly get stuck in __alloc_pages
when they should opt to fail, and take an element from their reserved pool.

The second is that it will happily eat emergency PF_MEMALLOC reserves
instead of going to their reserved pools.

Fix the first by passing __GFP_NORETRY in the allocation calls in
mempool_alloc. Fix the second by introducing a __GFP_MEMPOOL flag which
directs the page allocator not to allocate from the reserve pool.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Nick Piggin and committed by
Linus Torvalds
b84a35be 8e30f272

+23 -12
+4 -2
include/linux/gfp.h
··· 38 38 #define __GFP_NO_GROW 0x2000u /* Slab internal usage */ 39 39 #define __GFP_COMP 0x4000u /* Add compound page metadata */ 40 40 #define __GFP_ZERO 0x8000u /* Return zeroed page on success */ 41 + #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ 41 42 42 - #define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */ 43 + #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ 43 44 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) 44 45 45 46 /* if you forget to add the bitmask here kernel will crash, period */ 46 47 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ 47 48 __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ 48 - __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP) 49 + __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ 50 + __GFP_NOMEMALLOC) 49 51 50 52 #define GFP_ATOMIC (__GFP_HIGH) 51 53 #define GFP_NOIO (__GFP_WAIT)
+7 -2
mm/mempool.c
··· 198 198 void *element; 199 199 unsigned long flags; 200 200 DEFINE_WAIT(wait); 201 - int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); 201 + int gfp_nowait; 202 + 203 + gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ 204 + gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ 205 + gfp_mask |= __GFP_NOWARN; /* failures are OK */ 206 + gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); 202 207 203 208 might_sleep_if(gfp_mask & __GFP_WAIT); 204 209 repeat_alloc: 205 - element = pool->alloc(gfp_nowait|__GFP_NOWARN, pool->pool_data); 210 + element = pool->alloc(gfp_nowait, pool->pool_data); 206 211 if (likely(element != NULL)) 207 212 return element; 208 213
+12 -8
mm/page_alloc.c
··· 799 799 } 800 800 801 801 /* This allocation should allow future memory freeing. */ 802 - if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) { 803 - /* go through the zonelist yet again, ignoring mins */ 804 - for (i = 0; (z = zones[i]) != NULL; i++) { 805 - if (!cpuset_zone_allowed(z)) 806 - continue; 807 - page = buffered_rmqueue(z, order, gfp_mask); 808 - if (page) 809 - goto got_pg; 802 + 803 + if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) 804 + && !in_interrupt()) { 805 + if (!(gfp_mask & __GFP_NOMEMALLOC)) { 806 + /* go through the zonelist yet again, ignoring mins */ 807 + for (i = 0; (z = zones[i]) != NULL; i++) { 808 + if (!cpuset_zone_allowed(z)) 809 + continue; 810 + page = buffered_rmqueue(z, order, gfp_mask); 811 + if (page) 812 + goto got_pg; 813 + } 810 814 } 811 815 goto nopage; 812 816 }