Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

memory unplug: page isolation

Implement generic chunk-of-pages isolation method by using page grouping ops.

This patch add MIGRATE_ISOLATE to MIGRATE_TYPES. By this
- MIGRATE_TYPES increases.
- bitmap for migratetype is enlarged.

pages of MIGRATE_ISOLATE migratetype will not be allocated even if it is free.
By this, you can isolated *freed* pages from users. How-to-free pages is not
a purpose of this patch. You may use reclaim and migrate codes to free pages.

If start_isolate_page_range(start,end) is called,
- migratetype of the range turns to be MIGRATE_ISOLATE if
its type is MIGRATE_MOVABLE. (*) this check can be updated if other
memory reclaiming works make progress.
- MIGRATE_ISOLATE is not on migratetype fallback list.
- All free pages and will-be-freed pages are isolated.
To check all pages in the range are isolated or not, use test_pages_isolated(),
To cancel isolation, use undo_isolate_page_range().

Changes V6 -> V7
- removed unnecessary #ifdef

There are HOLES_IN_ZONE handling codes...I'm glad if we can remove them..

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

KAMEZAWA Hiroyuki and committed by
Linus Torvalds
a5d76b54 75884fb1

+223 -3
+2 -1
include/linux/mmzone.h
··· 37 37 #define MIGRATE_RECLAIMABLE 1 38 38 #define MIGRATE_MOVABLE 2 39 39 #define MIGRATE_RESERVE 3 40 - #define MIGRATE_TYPES 4 40 + #define MIGRATE_ISOLATE 4 /* can't allocate from here */ 41 + #define MIGRATE_TYPES 5 41 42 42 43 #define for_each_migratetype_order(order, type) \ 43 44 for (order = 0; order < MAX_ORDER; order++) \
+37
include/linux/page-isolation.h
··· 1 + #ifndef __LINUX_PAGEISOLATION_H 2 + #define __LINUX_PAGEISOLATION_H 3 + 4 + /* 5 + * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. 6 + * If specified range includes migrate types other than MOVABLE, 7 + * this will fail with -EBUSY. 8 + * 9 + * For isolating all pages in the range finally, the caller have to 10 + * free all pages in the range. test_page_isolated() can be used for 11 + * test it. 12 + */ 13 + extern int 14 + start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); 15 + 16 + /* 17 + * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. 18 + * target range is [start_pfn, end_pfn) 19 + */ 20 + extern int 21 + undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); 22 + 23 + /* 24 + * test all pages in [start_pfn, end_pfn)are isolated or not. 25 + */ 26 + extern int 27 + test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); 28 + 29 + /* 30 + * Internal funcs.Changes pageblock's migrate type. 31 + * Please use make_pagetype_isolated()/make_pagetype_movable(). 32 + */ 33 + extern int set_migratetype_isolate(struct page *page); 34 + extern void unset_migratetype_isolate(struct page *page); 35 + 36 + 37 + #endif
+1 -1
include/linux/pageblock-flags.h
··· 31 31 32 32 /* Bit indices that affect a whole block of pages */ 33 33 enum pageblock_bits { 34 - PB_range(PB_migrate, 2), /* 2 bits required for migrate types */ 34 + PB_range(PB_migrate, 3), /* 3 bits required for migrate types */ 35 35 NR_PAGEBLOCK_BITS 36 36 }; 37 37
+1 -1
mm/Makefile
··· 11 11 page_alloc.o page-writeback.o pdflush.o \ 12 12 readahead.o swap.o truncate.o vmscan.o \ 13 13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 14 - $(mmu-y) 14 + page_isolation.o $(mmu-y) 15 15 16 16 obj-$(CONFIG_BOUNCE) += bounce.o 17 17 obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
+44
mm/page_alloc.c
··· 41 41 #include <linux/pfn.h> 42 42 #include <linux/backing-dev.h> 43 43 #include <linux/fault-inject.h> 44 + #include <linux/page-isolation.h> 44 45 45 46 #include <asm/tlbflush.h> 46 47 #include <asm/div64.h> ··· 4433 4432 __set_bit(bitidx + start_bitidx, bitmap); 4434 4433 else 4435 4434 __clear_bit(bitidx + start_bitidx, bitmap); 4435 + } 4436 + 4437 + /* 4438 + * This is designed as sub function...plz see page_isolation.c also. 4439 + * set/clear page block's type to be ISOLATE. 4440 + * page allocater never alloc memory from ISOLATE block. 4441 + */ 4442 + 4443 + int set_migratetype_isolate(struct page *page) 4444 + { 4445 + struct zone *zone; 4446 + unsigned long flags; 4447 + int ret = -EBUSY; 4448 + 4449 + zone = page_zone(page); 4450 + spin_lock_irqsave(&zone->lock, flags); 4451 + /* 4452 + * In future, more migrate types will be able to be isolation target. 4453 + */ 4454 + if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) 4455 + goto out; 4456 + set_pageblock_migratetype(page, MIGRATE_ISOLATE); 4457 + move_freepages_block(zone, page, MIGRATE_ISOLATE); 4458 + ret = 0; 4459 + out: 4460 + spin_unlock_irqrestore(&zone->lock, flags); 4461 + if (!ret) 4462 + drain_all_local_pages(); 4463 + return ret; 4464 + } 4465 + 4466 + void unset_migratetype_isolate(struct page *page) 4467 + { 4468 + struct zone *zone; 4469 + unsigned long flags; 4470 + zone = page_zone(page); 4471 + spin_lock_irqsave(&zone->lock, flags); 4472 + if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 4473 + goto out; 4474 + set_pageblock_migratetype(page, MIGRATE_MOVABLE); 4475 + move_freepages_block(zone, page, MIGRATE_MOVABLE); 4476 + out: 4477 + spin_unlock_irqrestore(&zone->lock, flags); 4436 4478 }
+138
mm/page_isolation.c
··· 1 + /* 2 + * linux/mm/page_isolation.c 3 + */ 4 + 5 + #include <stddef.h> 6 + #include <linux/mm.h> 7 + #include <linux/page-isolation.h> 8 + #include <linux/pageblock-flags.h> 9 + #include "internal.h" 10 + 11 + static inline struct page * 12 + __first_valid_page(unsigned long pfn, unsigned long nr_pages) 13 + { 14 + int i; 15 + for (i = 0; i < nr_pages; i++) 16 + if (pfn_valid_within(pfn + i)) 17 + break; 18 + if (unlikely(i == nr_pages)) 19 + return NULL; 20 + return pfn_to_page(pfn + i); 21 + } 22 + 23 + /* 24 + * start_isolate_page_range() -- make page-allocation-type of range of pages 25 + * to be MIGRATE_ISOLATE. 26 + * @start_pfn: The lower PFN of the range to be isolated. 27 + * @end_pfn: The upper PFN of the range to be isolated. 28 + * 29 + * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in 30 + * the range will never be allocated. Any free pages and pages freed in the 31 + * future will not be allocated again. 32 + * 33 + * start_pfn/end_pfn must be aligned to pageblock_order. 34 + * Returns 0 on success and -EBUSY if any part of range cannot be isolated. 35 + */ 36 + int 37 + start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) 38 + { 39 + unsigned long pfn; 40 + unsigned long undo_pfn; 41 + struct page *page; 42 + 43 + BUG_ON((start_pfn) & (pageblock_nr_pages - 1)); 44 + BUG_ON((end_pfn) & (pageblock_nr_pages - 1)); 45 + 46 + for (pfn = start_pfn; 47 + pfn < end_pfn; 48 + pfn += pageblock_nr_pages) { 49 + page = __first_valid_page(pfn, pageblock_nr_pages); 50 + if (page && set_migratetype_isolate(page)) { 51 + undo_pfn = pfn; 52 + goto undo; 53 + } 54 + } 55 + return 0; 56 + undo: 57 + for (pfn = start_pfn; 58 + pfn <= undo_pfn; 59 + pfn += pageblock_nr_pages) 60 + unset_migratetype_isolate(pfn_to_page(pfn)); 61 + 62 + return -EBUSY; 63 + } 64 + 65 + /* 66 + * Make isolated pages available again. 67 + */ 68 + int 69 + undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) 70 + { 71 + unsigned long pfn; 72 + struct page *page; 73 + BUG_ON((start_pfn) & (pageblock_nr_pages - 1)); 74 + BUG_ON((end_pfn) & (pageblock_nr_pages - 1)); 75 + for (pfn = start_pfn; 76 + pfn < end_pfn; 77 + pfn += pageblock_nr_pages) { 78 + page = __first_valid_page(pfn, pageblock_nr_pages); 79 + if (!page || get_pageblock_flags(page) != MIGRATE_ISOLATE) 80 + continue; 81 + unset_migratetype_isolate(page); 82 + } 83 + return 0; 84 + } 85 + /* 86 + * Test all pages in the range is free(means isolated) or not. 87 + * all pages in [start_pfn...end_pfn) must be in the same zone. 88 + * zone->lock must be held before call this. 89 + * 90 + * Returns 0 if all pages in the range is isolated. 91 + */ 92 + static int 93 + __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) 94 + { 95 + struct page *page; 96 + 97 + while (pfn < end_pfn) { 98 + if (!pfn_valid_within(pfn)) { 99 + pfn++; 100 + continue; 101 + } 102 + page = pfn_to_page(pfn); 103 + if (PageBuddy(page)) 104 + pfn += 1 << page_order(page); 105 + else if (page_count(page) == 0 && 106 + page_private(page) == MIGRATE_ISOLATE) 107 + pfn += 1; 108 + else 109 + break; 110 + } 111 + if (pfn < end_pfn) 112 + return 0; 113 + return 1; 114 + } 115 + 116 + int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) 117 + { 118 + unsigned long pfn; 119 + struct page *page; 120 + 121 + pfn = start_pfn; 122 + /* 123 + * Note: pageblock_nr_page != MAX_ORDER. Then, chunks of free page 124 + * is not aligned to pageblock_nr_pages. 125 + * Then we just check pagetype fist. 126 + */ 127 + for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 128 + page = __first_valid_page(pfn, pageblock_nr_pages); 129 + if (page && get_pageblock_flags(page) != MIGRATE_ISOLATE) 130 + break; 131 + } 132 + if (pfn < end_pfn) 133 + return -EBUSY; 134 + /* Check all pages are free or Marked as ISOLATED */ 135 + if (__test_page_isolated_in_pageblock(start_pfn, end_pfn)) 136 + return 0; 137 + return -EBUSY; 138 + }