Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

memcg: fix page_cgroup allocation

page_cgroup_init() is called from mem_cgroup_init(). But at this
point, we cannot call alloc_bootmem().
(and this caused panic at boot.)

This patch moves page_cgroup_init() to init/main.c.

Time table is following:
==
parse_args(). # we can trust mem_cgroup_subsys.disabled bit after this.
....
cgroup_init_early() # "early" init of cgroup.
....
setup_arch() # memmap is allocated.
...
page_cgroup_init();
mem_init(); # we cannot call alloc_bootmem after this.
....
cgroup_init() # mem_cgroup is initialized.
==

Before page_cgroup_init(), mem_map must be initialized. So,
I added page_cgroup_init() to init/main.c directly.

(*) maybe this is not very clean but
- cgroup_init_early() is too early
- in cgroup_init(), we have to use vmalloc instead of alloc_bootmem().
use of vmalloc area in x86-32 is important and we should avoid very large
vmalloc() in x86-32. So, we want to use alloc_bootmem() and added page_cgroup_init()
directly to init/main.c

[akpm@linux-foundation.org: remove unneeded/bad mem_cgroup_subsys declaration]
[akpm@linux-foundation.org: fix build]
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Tested-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

KAMEZAWA Hiroyuki and committed by
Linus Torvalds
94b6da5a be07c4ed

+32 -8
+5
include/linux/page_cgroup.h
··· 99 99 { 100 100 return NULL; 101 101 } 102 + 103 + static inline void page_cgroup_init(void) 104 + { 105 + } 106 + 102 107 #endif 103 108 #endif
+2
init/main.c
··· 52 52 #include <linux/key.h> 53 53 #include <linux/unwind.h> 54 54 #include <linux/buffer_head.h> 55 + #include <linux/page_cgroup.h> 55 56 #include <linux/debug_locks.h> 56 57 #include <linux/debugobjects.h> 57 58 #include <linux/lockdep.h> ··· 648 647 vmalloc_init(); 649 648 vfs_caches_init_early(); 650 649 cpuset_init_early(); 650 + page_cgroup_init(); 651 651 mem_init(); 652 652 enable_debug_pagealloc(); 653 653 cpu_hotplug_init();
-1
mm/memcontrol.c
··· 1088 1088 int node; 1089 1089 1090 1090 if (unlikely((cont->parent) == NULL)) { 1091 - page_cgroup_init(); 1092 1091 mem = &init_mem_cgroup; 1093 1092 } else { 1094 1093 mem = mem_cgroup_alloc();
+25 -7
mm/page_cgroup.c
··· 4 4 #include <linux/bit_spinlock.h> 5 5 #include <linux/page_cgroup.h> 6 6 #include <linux/hash.h> 7 + #include <linux/slab.h> 7 8 #include <linux/memory.h> 8 9 #include <linux/vmalloc.h> 10 + #include <linux/cgroup.h> 9 11 10 12 static void __meminit 11 13 __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) ··· 69 67 70 68 int nid, fail; 71 69 70 + if (mem_cgroup_subsys.disabled) 71 + return; 72 + 72 73 for_each_online_node(nid) { 73 74 fail = alloc_node_page_cgroup(nid); 74 75 if (fail) ··· 112 107 nid = page_to_nid(pfn_to_page(pfn)); 113 108 114 109 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 115 - base = kmalloc_node(table_size, GFP_KERNEL, nid); 116 - if (!base) 117 - base = vmalloc_node(table_size, nid); 110 + if (slab_is_available()) { 111 + base = kmalloc_node(table_size, GFP_KERNEL, nid); 112 + if (!base) 113 + base = vmalloc_node(table_size, nid); 114 + } else { 115 + base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size, 116 + PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); 117 + } 118 118 119 119 if (!base) { 120 120 printk(KERN_ERR "page cgroup allocation failure\n"); ··· 146 136 if (!ms || !ms->page_cgroup) 147 137 return; 148 138 base = ms->page_cgroup + pfn; 149 - ms->page_cgroup = NULL; 150 - if (is_vmalloc_addr(base)) 139 + if (is_vmalloc_addr(base)) { 151 140 vfree(base); 152 - else 153 - kfree(base); 141 + ms->page_cgroup = NULL; 142 + } else { 143 + struct page *page = virt_to_page(base); 144 + if (!PageReserved(page)) { /* Is bootmem ? */ 145 + kfree(base); 146 + ms->page_cgroup = NULL; 147 + } 148 + } 154 149 } 155 150 156 151 int online_page_cgroup(unsigned long start_pfn, ··· 228 213 { 229 214 unsigned long pfn; 230 215 int fail = 0; 216 + 217 + if (mem_cgroup_subsys.disabled) 218 + return; 231 219 232 220 for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { 233 221 if (!pfn_present(pfn))