mm: embed the memcg pointer directly into struct page

+5

Documentation/cgroups/memory.txt

··· 1 1 Memory Resource Controller 2 2 3 + NOTE: This document is hopelessly outdated and it asks for a complete 4 + rewrite. It still contains a useful information so we are keeping it 5 + here but make sure to check the current code if you need a deeper 6 + understanding. 7 + 3 8 NOTE: The Memory Resource Controller has generically been referred to as the 4 9 memory controller in this document. Do not confuse memory controller 5 10 used here with the memory controller that is used in hardware.

+1 -5

include/linux/memcontrol.h

··· 25 25 #include <linux/jump_label.h> 26 26 27 27 struct mem_cgroup; 28 - struct page_cgroup; 29 28 struct page; 30 29 struct mm_struct; 31 30 struct kmem_cache; ··· 465 466 * memcg_kmem_uncharge_pages: uncharge pages from memcg 466 467 * @page: pointer to struct page being freed 467 468 * @order: allocation order. 468 - * 469 - * there is no need to specify memcg here, since it is embedded in page_cgroup 470 469 */ 471 470 static inline void 472 471 memcg_kmem_uncharge_pages(struct page *page, int order) ··· 481 484 * 482 485 * Needs to be called after memcg_kmem_newpage_charge, regardless of success or 483 486 * failure of the allocation. if @page is NULL, this function will revert the 484 - * charges. Otherwise, it will commit the memcg given by @memcg to the 485 - * corresponding page_cgroup. 487 + * charges. Otherwise, it will commit @page to @memcg. 486 488 */ 487 489 static inline void 488 490 memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)

+5

include/linux/mm_types.h

··· 22 22 #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) 23 23 24 24 struct address_space; 25 + struct mem_cgroup; 25 26 26 27 #define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) 27 28 #define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ ··· 167 166 struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ 168 167 struct page *first_page; /* Compound tail pages */ 169 168 }; 169 + 170 + #ifdef CONFIG_MEMCG 171 + struct mem_cgroup *mem_cgroup; 172 + #endif 170 173 171 174 /* 172 175 * On machines where all RAM is mapped into kernel address space,

-12

include/linux/mmzone.h

··· 722 722 int nr_zones; 723 723 #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ 724 724 struct page *node_mem_map; 725 - #ifdef CONFIG_MEMCG 726 - struct page_cgroup *node_page_cgroup; 727 - #endif 728 725 #endif 729 726 #ifndef CONFIG_NO_BOOTMEM 730 727 struct bootmem_data *bdata; ··· 1075 1078 #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) 1076 1079 1077 1080 struct page; 1078 - struct page_cgroup; 1079 1081 struct mem_section { 1080 1082 /* 1081 1083 * This is, logically, a pointer to an array of struct ··· 1092 1096 1093 1097 /* See declaration of similar field in struct zone */ 1094 1098 unsigned long *pageblock_flags; 1095 - #ifdef CONFIG_MEMCG 1096 - /* 1097 - * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use 1098 - * section. (see memcontrol.h/page_cgroup.h about this.) 1099 - */ 1100 - struct page_cgroup *page_cgroup; 1101 - unsigned long pad; 1102 - #endif 1103 1099 /* 1104 1100 * WARNING: mem_section must be a power-of-2 in size for the 1105 1101 * calculation and use of SECTION_ROOT_MASK to make sense.

-53

include/linux/page_cgroup.h

··· 1 1 #ifndef __LINUX_PAGE_CGROUP_H 2 2 #define __LINUX_PAGE_CGROUP_H 3 3 4 - struct pglist_data; 5 - 6 - #ifdef CONFIG_MEMCG 7 - struct mem_cgroup; 8 - 9 - /* 10 - * Page Cgroup can be considered as an extended mem_map. 11 - * A page_cgroup page is associated with every page descriptor. The 12 - * page_cgroup helps us identify information about the cgroup 13 - * All page cgroups are allocated at boot or memory hotplug event, 14 - * then the page cgroup for pfn always exists. 15 - */ 16 - struct page_cgroup { 17 - struct mem_cgroup *mem_cgroup; 18 - }; 19 - 20 - extern void pgdat_page_cgroup_init(struct pglist_data *pgdat); 21 - 22 - #ifdef CONFIG_SPARSEMEM 23 - static inline void page_cgroup_init_flatmem(void) 24 - { 25 - } 26 - extern void page_cgroup_init(void); 27 - #else 28 - extern void page_cgroup_init_flatmem(void); 29 - static inline void page_cgroup_init(void) 30 - { 31 - } 32 - #endif 33 - 34 - struct page_cgroup *lookup_page_cgroup(struct page *page); 35 - 36 - #else /* !CONFIG_MEMCG */ 37 - struct page_cgroup; 38 - 39 - static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat) 40 - { 41 - } 42 - 43 - static inline struct page_cgroup *lookup_page_cgroup(struct page *page) 44 - { 45 - return NULL; 46 - } 47 - 48 - static inline void page_cgroup_init(void) 49 - { 50 - } 51 - 52 - static inline void page_cgroup_init_flatmem(void) 53 - { 54 - } 55 - #endif /* CONFIG_MEMCG */ 56 - 57 4 #include <linux/swap.h> 58 5 59 6 #ifdef CONFIG_MEMCG_SWAP

-7

init/main.c

··· 51 51 #include <linux/mempolicy.h> 52 52 #include <linux/key.h> 53 53 #include <linux/buffer_head.h> 54 - #include <linux/page_cgroup.h> 55 54 #include <linux/debug_locks.h> 56 55 #include <linux/debugobjects.h> 57 56 #include <linux/lockdep.h> ··· 484 485 */ 485 486 static void __init mm_init(void) 486 487 { 487 - /* 488 - * page_cgroup requires contiguous pages, 489 - * bigger than MAX_ORDER unless SPARSEMEM. 490 - */ 491 - page_cgroup_init_flatmem(); 492 488 mem_init(); 493 489 kmem_cache_init(); 494 490 percpu_init_late(); ··· 621 627 initrd_start = 0; 622 628 } 623 629 #endif 624 - page_cgroup_init(); 625 630 debug_objects_mem_init(); 626 631 kmemleak_init(); 627 632 setup_per_cpu_pageset();

+35 -89

mm/memcontrol.c

··· 1274 1274 { 1275 1275 struct mem_cgroup_per_zone *mz; 1276 1276 struct mem_cgroup *memcg; 1277 - struct page_cgroup *pc; 1278 1277 struct lruvec *lruvec; 1279 1278 1280 1279 if (mem_cgroup_disabled()) { ··· 1281 1282 goto out; 1282 1283 } 1283 1284 1284 - pc = lookup_page_cgroup(page); 1285 - memcg = pc->mem_cgroup; 1285 + memcg = page->mem_cgroup; 1286 1286 /* 1287 1287 * Swapcache readahead pages are added to the LRU - and 1288 1288 * possibly migrated - before they are charged. ··· 2018 2020 unsigned long *flags) 2019 2021 { 2020 2022 struct mem_cgroup *memcg; 2021 - struct page_cgroup *pc; 2022 2023 2023 2024 rcu_read_lock(); 2024 2025 2025 2026 if (mem_cgroup_disabled()) 2026 2027 return NULL; 2027 - 2028 - pc = lookup_page_cgroup(page); 2029 2028 again: 2030 - memcg = pc->mem_cgroup; 2029 + memcg = page->mem_cgroup; 2031 2030 if (unlikely(!memcg)) 2032 2031 return NULL; 2033 2032 ··· 2033 2038 return memcg; 2034 2039 2035 2040 spin_lock_irqsave(&memcg->move_lock, *flags); 2036 - if (memcg != pc->mem_cgroup) { 2041 + if (memcg != page->mem_cgroup) { 2037 2042 spin_unlock_irqrestore(&memcg->move_lock, *flags); 2038 2043 goto again; 2039 2044 } ··· 2400 2405 struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) 2401 2406 { 2402 2407 struct mem_cgroup *memcg; 2403 - struct page_cgroup *pc; 2404 2408 unsigned short id; 2405 2409 swp_entry_t ent; 2406 2410 2407 2411 VM_BUG_ON_PAGE(!PageLocked(page), page); 2408 2412 2409 - pc = lookup_page_cgroup(page); 2410 - memcg = pc->mem_cgroup; 2411 - 2413 + memcg = page->mem_cgroup; 2412 2414 if (memcg) { 2413 2415 if (!css_tryget_online(&memcg->css)) 2414 2416 memcg = NULL; ··· 2455 2463 static void commit_charge(struct page *page, struct mem_cgroup *memcg, 2456 2464 bool lrucare) 2457 2465 { 2458 - struct page_cgroup *pc = lookup_page_cgroup(page); 2459 2466 int isolated; 2460 2467 2461 - VM_BUG_ON_PAGE(pc->mem_cgroup, page); 2468 + VM_BUG_ON_PAGE(page->mem_cgroup, page); 2462 2469 2463 2470 /* 2464 2471 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page ··· 2468 2477 2469 2478 /* 2470 2479 * Nobody should be changing or seriously looking at 2471 - * pc->mem_cgroup at this point: 2480 + * page->mem_cgroup at this point: 2472 2481 * 2473 2482 * - the page is uncharged 2474 2483 * ··· 2480 2489 * - a page cache insertion, a swapin fault, or a migration 2481 2490 * have the page locked 2482 2491 */ 2483 - pc->mem_cgroup = memcg; 2492 + page->mem_cgroup = memcg; 2484 2493 2485 2494 if (lrucare) 2486 2495 unlock_page_lru(page, isolated); ··· 2963 2972 void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, 2964 2973 int order) 2965 2974 { 2966 - struct page_cgroup *pc; 2967 - 2968 2975 VM_BUG_ON(mem_cgroup_is_root(memcg)); 2969 2976 2970 2977 /* The page allocation failed. Revert */ ··· 2970 2981 memcg_uncharge_kmem(memcg, 1 << order); 2971 2982 return; 2972 2983 } 2973 - pc = lookup_page_cgroup(page); 2974 - pc->mem_cgroup = memcg; 2984 + page->mem_cgroup = memcg; 2975 2985 } 2976 2986 2977 2987 void __memcg_kmem_uncharge_pages(struct page *page, int order) 2978 2988 { 2979 - struct page_cgroup *pc = lookup_page_cgroup(page); 2980 - struct mem_cgroup *memcg = pc->mem_cgroup; 2989 + struct mem_cgroup *memcg = page->mem_cgroup; 2981 2990 2982 2991 if (!memcg) 2983 2992 return; ··· 2983 2996 VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page); 2984 2997 2985 2998 memcg_uncharge_kmem(memcg, 1 << order); 2986 - pc->mem_cgroup = NULL; 2999 + page->mem_cgroup = NULL; 2987 3000 } 2988 3001 #else 2989 3002 static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg) ··· 3001 3014 */ 3002 3015 void mem_cgroup_split_huge_fixup(struct page *head) 3003 3016 { 3004 - struct page_cgroup *pc = lookup_page_cgroup(head); 3005 3017 int i; 3006 3018 3007 3019 if (mem_cgroup_disabled()) 3008 3020 return; 3009 3021 3010 3022 for (i = 1; i < HPAGE_PMD_NR; i++) 3011 - pc[i].mem_cgroup = pc[0].mem_cgroup; 3023 + head[i].mem_cgroup = head->mem_cgroup; 3012 3024 3013 - __this_cpu_sub(pc[0].mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE], 3025 + __this_cpu_sub(head->mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE], 3014 3026 HPAGE_PMD_NR); 3015 3027 } 3016 3028 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ ··· 3018 3032 * mem_cgroup_move_account - move account of the page 3019 3033 * @page: the page 3020 3034 * @nr_pages: number of regular pages (>1 for huge pages) 3021 - * @pc: page_cgroup of the page. 3022 3035 * @from: mem_cgroup which the page is moved from. 3023 3036 * @to: mem_cgroup which the page is moved to. @from != @to. 3024 3037 * ··· 3030 3045 */ 3031 3046 static int mem_cgroup_move_account(struct page *page, 3032 3047 unsigned int nr_pages, 3033 - struct page_cgroup *pc, 3034 3048 struct mem_cgroup *from, 3035 3049 struct mem_cgroup *to) 3036 3050 { ··· 3049 3065 goto out; 3050 3066 3051 3067 /* 3052 - * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup 3068 + * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup 3053 3069 * of its source page while we change it: page migration takes 3054 3070 * both pages off the LRU, but page cache replacement doesn't. 3055 3071 */ ··· 3057 3073 goto out; 3058 3074 3059 3075 ret = -EINVAL; 3060 - if (pc->mem_cgroup != from) 3076 + if (page->mem_cgroup != from) 3061 3077 goto out_unlock; 3062 3078 3063 3079 spin_lock_irqsave(&from->move_lock, flags); ··· 3077 3093 } 3078 3094 3079 3095 /* 3080 - * It is safe to change pc->mem_cgroup here because the page 3096 + * It is safe to change page->mem_cgroup here because the page 3081 3097 * is referenced, charged, and isolated - we can't race with 3082 3098 * uncharging, charging, migration, or LRU putback. 3083 3099 */ 3084 3100 3085 3101 /* caller should have done css_get */ 3086 - pc->mem_cgroup = to; 3102 + page->mem_cgroup = to; 3087 3103 spin_unlock_irqrestore(&from->move_lock, flags); 3088 3104 3089 3105 ret = 0; ··· 3158 3174 #endif 3159 3175 3160 3176 #ifdef CONFIG_DEBUG_VM 3161 - static struct page_cgroup *lookup_page_cgroup_used(struct page *page) 3162 - { 3163 - struct page_cgroup *pc; 3164 - 3165 - pc = lookup_page_cgroup(page); 3166 - /* 3167 - * Can be NULL while feeding pages into the page allocator for 3168 - * the first time, i.e. during boot or memory hotplug; 3169 - * or when mem_cgroup_disabled(). 3170 - */ 3171 - if (likely(pc) && pc->mem_cgroup) 3172 - return pc; 3173 - return NULL; 3174 - } 3175 - 3176 3177 bool mem_cgroup_bad_page_check(struct page *page) 3177 3178 { 3178 3179 if (mem_cgroup_disabled()) 3179 3180 return false; 3180 3181 3181 - return lookup_page_cgroup_used(page) != NULL; 3182 + return page->mem_cgroup != NULL; 3182 3183 } 3183 3184 3184 3185 void mem_cgroup_print_bad_page(struct page *page) 3185 3186 { 3186 - struct page_cgroup *pc; 3187 - 3188 - pc = lookup_page_cgroup_used(page); 3189 - if (pc) 3190 - pr_alert("pc:%p pc->mem_cgroup:%p\n", pc, pc->mem_cgroup); 3187 + pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); 3191 3188 } 3192 3189 #endif 3193 3190 ··· 5088 5123 unsigned long addr, pte_t ptent, union mc_target *target) 5089 5124 { 5090 5125 struct page *page = NULL; 5091 - struct page_cgroup *pc; 5092 5126 enum mc_target_type ret = MC_TARGET_NONE; 5093 5127 swp_entry_t ent = { .val = 0 }; 5094 5128 ··· 5101 5137 if (!page && !ent.val) 5102 5138 return ret; 5103 5139 if (page) { 5104 - pc = lookup_page_cgroup(page); 5105 5140 /* 5106 5141 * Do only loose check w/o serialization. 5107 - * mem_cgroup_move_account() checks the pc is valid or 5142 + * mem_cgroup_move_account() checks the page is valid or 5108 5143 * not under LRU exclusion. 5109 5144 */ 5110 - if (pc->mem_cgroup == mc.from) { 5145 + if (page->mem_cgroup == mc.from) { 5111 5146 ret = MC_TARGET_PAGE; 5112 5147 if (target) 5113 5148 target->page = page; ··· 5134 5171 unsigned long addr, pmd_t pmd, union mc_target *target) 5135 5172 { 5136 5173 struct page *page = NULL; 5137 - struct page_cgroup *pc; 5138 5174 enum mc_target_type ret = MC_TARGET_NONE; 5139 5175 5140 5176 page = pmd_page(pmd); 5141 5177 VM_BUG_ON_PAGE(!page || !PageHead(page), page); 5142 5178 if (!move_anon()) 5143 5179 return ret; 5144 - pc = lookup_page_cgroup(page); 5145 - if (pc->mem_cgroup == mc.from) { 5180 + if (page->mem_cgroup == mc.from) { 5146 5181 ret = MC_TARGET_PAGE; 5147 5182 if (target) { 5148 5183 get_page(page); ··· 5339 5378 enum mc_target_type target_type; 5340 5379 union mc_target target; 5341 5380 struct page *page; 5342 - struct page_cgroup *pc; 5343 5381 5344 5382 /* 5345 5383 * We don't take compound_lock() here but no race with splitting thp ··· 5359 5399 if (target_type == MC_TARGET_PAGE) { 5360 5400 page = target.page; 5361 5401 if (!isolate_lru_page(page)) { 5362 - pc = lookup_page_cgroup(page); 5363 5402 if (!mem_cgroup_move_account(page, HPAGE_PMD_NR, 5364 - pc, mc.from, mc.to)) { 5403 + mc.from, mc.to)) { 5365 5404 mc.precharge -= HPAGE_PMD_NR; 5366 5405 mc.moved_charge += HPAGE_PMD_NR; 5367 5406 } ··· 5388 5429 page = target.page; 5389 5430 if (isolate_lru_page(page)) 5390 5431 goto put; 5391 - pc = lookup_page_cgroup(page); 5392 - if (!mem_cgroup_move_account(page, 1, pc, 5393 - mc.from, mc.to)) { 5432 + if (!mem_cgroup_move_account(page, 1, mc.from, mc.to)) { 5394 5433 mc.precharge--; 5395 5434 /* we uncharge from mc.from later. */ 5396 5435 mc.moved_charge++; ··· 5576 5619 void mem_cgroup_swapout(struct page *page, swp_entry_t entry) 5577 5620 { 5578 5621 struct mem_cgroup *memcg; 5579 - struct page_cgroup *pc; 5580 5622 unsigned short oldid; 5581 5623 5582 5624 VM_BUG_ON_PAGE(PageLRU(page), page); ··· 5584 5628 if (!do_swap_account) 5585 5629 return; 5586 5630 5587 - pc = lookup_page_cgroup(page); 5588 - memcg = pc->mem_cgroup; 5631 + memcg = page->mem_cgroup; 5589 5632 5590 5633 /* Readahead page, never charged */ 5591 5634 if (!memcg) ··· 5594 5639 VM_BUG_ON_PAGE(oldid, page); 5595 5640 mem_cgroup_swap_statistics(memcg, true); 5596 5641 5597 - pc->mem_cgroup = NULL; 5642 + page->mem_cgroup = NULL; 5598 5643 5599 5644 if (!mem_cgroup_is_root(memcg)) 5600 5645 page_counter_uncharge(&memcg->memory, 1); ··· 5661 5706 goto out; 5662 5707 5663 5708 if (PageSwapCache(page)) { 5664 - struct page_cgroup *pc = lookup_page_cgroup(page); 5665 5709 /* 5666 5710 * Every swap fault against a single page tries to charge the 5667 5711 * page, bail as early as possible. shmem_unuse() encounters ··· 5668 5714 * the page lock, which serializes swap cache removal, which 5669 5715 * in turn serializes uncharging. 5670 5716 */ 5671 - if (pc->mem_cgroup) 5717 + if (page->mem_cgroup) 5672 5718 goto out; 5673 5719 } 5674 5720 ··· 5821 5867 next = page_list->next; 5822 5868 do { 5823 5869 unsigned int nr_pages = 1; 5824 - struct page_cgroup *pc; 5825 5870 5826 5871 page = list_entry(next, struct page, lru); 5827 5872 next = page->lru.next; ··· 5828 5875 VM_BUG_ON_PAGE(PageLRU(page), page); 5829 5876 VM_BUG_ON_PAGE(page_count(page), page); 5830 5877 5831 - pc = lookup_page_cgroup(page); 5832 - if (!pc->mem_cgroup) 5878 + if (!page->mem_cgroup) 5833 5879 continue; 5834 5880 5835 5881 /* 5836 5882 * Nobody should be changing or seriously looking at 5837 - * pc->mem_cgroup at this point, we have fully 5883 + * page->mem_cgroup at this point, we have fully 5838 5884 * exclusive access to the page. 5839 5885 */ 5840 5886 5841 - if (memcg != pc->mem_cgroup) { 5887 + if (memcg != page->mem_cgroup) { 5842 5888 if (memcg) { 5843 5889 uncharge_batch(memcg, pgpgout, nr_anon, nr_file, 5844 5890 nr_huge, page); 5845 5891 pgpgout = nr_anon = nr_file = nr_huge = 0; 5846 5892 } 5847 - memcg = pc->mem_cgroup; 5893 + memcg = page->mem_cgroup; 5848 5894 } 5849 5895 5850 5896 if (PageTransHuge(page)) { ··· 5857 5905 else 5858 5906 nr_file += nr_pages; 5859 5907 5860 - pc->mem_cgroup = NULL; 5908 + page->mem_cgroup = NULL; 5861 5909 5862 5910 pgpgout++; 5863 5911 } while (next != page_list); ··· 5876 5924 */ 5877 5925 void mem_cgroup_uncharge(struct page *page) 5878 5926 { 5879 - struct page_cgroup *pc; 5880 - 5881 5927 if (mem_cgroup_disabled()) 5882 5928 return; 5883 5929 5884 5930 /* Don't touch page->lru of any random page, pre-check: */ 5885 - pc = lookup_page_cgroup(page); 5886 - if (!pc->mem_cgroup) 5931 + if (!page->mem_cgroup) 5887 5932 return; 5888 5933 5889 5934 INIT_LIST_HEAD(&page->lru); ··· 5917 5968 bool lrucare) 5918 5969 { 5919 5970 struct mem_cgroup *memcg; 5920 - struct page_cgroup *pc; 5921 5971 int isolated; 5922 5972 5923 5973 VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); ··· 5931 5983 return; 5932 5984 5933 5985 /* Page cache replacement: new page already charged? */ 5934 - pc = lookup_page_cgroup(newpage); 5935 - if (pc->mem_cgroup) 5986 + if (newpage->mem_cgroup) 5936 5987 return; 5937 5988 5938 5989 /* ··· 5940 5993 * uncharged page when the PFN walker finds a page that 5941 5994 * reclaim just put back on the LRU but has not released yet. 5942 5995 */ 5943 - pc = lookup_page_cgroup(oldpage); 5944 - memcg = pc->mem_cgroup; 5996 + memcg = oldpage->mem_cgroup; 5945 5997 if (!memcg) 5946 5998 return; 5947 5999 5948 6000 if (lrucare) 5949 6001 lock_page_lru(oldpage, &isolated); 5950 6002 5951 - pc->mem_cgroup = NULL; 6003 + oldpage->mem_cgroup = NULL; 5952 6004 5953 6005 if (lrucare) 5954 6006 unlock_page_lru(oldpage, isolated);

-2

mm/page_alloc.c

··· 48 48 #include <linux/backing-dev.h> 49 49 #include <linux/fault-inject.h> 50 50 #include <linux/page-isolation.h> 51 - #include <linux/page_cgroup.h> 52 51 #include <linux/debugobjects.h> 53 52 #include <linux/kmemleak.h> 54 53 #include <linux/compaction.h> ··· 4852 4853 #endif 4853 4854 init_waitqueue_head(&pgdat->kswapd_wait); 4854 4855 init_waitqueue_head(&pgdat->pfmemalloc_wait); 4855 - pgdat_page_cgroup_init(pgdat); 4856 4856 4857 4857 for (j = 0; j < MAX_NR_ZONES; j++) { 4858 4858 struct zone *zone = pgdat->node_zones + j;

-319

mm/page_cgroup.c

··· 1 1 #include <linux/mm.h> 2 - #include <linux/mmzone.h> 3 - #include <linux/bootmem.h> 4 - #include <linux/bit_spinlock.h> 5 2 #include <linux/page_cgroup.h> 6 - #include <linux/hash.h> 7 - #include <linux/slab.h> 8 - #include <linux/memory.h> 9 3 #include <linux/vmalloc.h> 10 - #include <linux/cgroup.h> 11 4 #include <linux/swapops.h> 12 - #include <linux/kmemleak.h> 13 - 14 - static unsigned long total_usage; 15 - 16 - #if !defined(CONFIG_SPARSEMEM) 17 - 18 - 19 - void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) 20 - { 21 - pgdat->node_page_cgroup = NULL; 22 - } 23 - 24 - struct page_cgroup *lookup_page_cgroup(struct page *page) 25 - { 26 - unsigned long pfn = page_to_pfn(page); 27 - unsigned long offset; 28 - struct page_cgroup *base; 29 - 30 - base = NODE_DATA(page_to_nid(page))->node_page_cgroup; 31 - #ifdef CONFIG_DEBUG_VM 32 - /* 33 - * The sanity checks the page allocator does upon freeing a 34 - * page can reach here before the page_cgroup arrays are 35 - * allocated when feeding a range of pages to the allocator 36 - * for the first time during bootup or memory hotplug. 37 - */ 38 - if (unlikely(!base)) 39 - return NULL; 40 - #endif 41 - offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn; 42 - return base + offset; 43 - } 44 - 45 - static int __init alloc_node_page_cgroup(int nid) 46 - { 47 - struct page_cgroup *base; 48 - unsigned long table_size; 49 - unsigned long nr_pages; 50 - 51 - nr_pages = NODE_DATA(nid)->node_spanned_pages; 52 - if (!nr_pages) 53 - return 0; 54 - 55 - table_size = sizeof(struct page_cgroup) * nr_pages; 56 - 57 - base = memblock_virt_alloc_try_nid_nopanic( 58 - table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), 59 - BOOTMEM_ALLOC_ACCESSIBLE, nid); 60 - if (!base) 61 - return -ENOMEM; 62 - NODE_DATA(nid)->node_page_cgroup = base; 63 - total_usage += table_size; 64 - return 0; 65 - } 66 - 67 - void __init page_cgroup_init_flatmem(void) 68 - { 69 - 70 - int nid, fail; 71 - 72 - if (mem_cgroup_disabled()) 73 - return; 74 - 75 - for_each_online_node(nid) { 76 - fail = alloc_node_page_cgroup(nid); 77 - if (fail) 78 - goto fail; 79 - } 80 - printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage); 81 - printk(KERN_INFO "please try 'cgroup_disable=memory' option if you" 82 - " don't want memory cgroups\n"); 83 - return; 84 - fail: 85 - printk(KERN_CRIT "allocation of page_cgroup failed.\n"); 86 - printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option\n"); 87 - panic("Out of memory"); 88 - } 89 - 90 - #else /* CONFIG_FLAT_NODE_MEM_MAP */ 91 - 92 - struct page_cgroup *lookup_page_cgroup(struct page *page) 93 - { 94 - unsigned long pfn = page_to_pfn(page); 95 - struct mem_section *section = __pfn_to_section(pfn); 96 - #ifdef CONFIG_DEBUG_VM 97 - /* 98 - * The sanity checks the page allocator does upon freeing a 99 - * page can reach here before the page_cgroup arrays are 100 - * allocated when feeding a range of pages to the allocator 101 - * for the first time during bootup or memory hotplug. 102 - */ 103 - if (!section->page_cgroup) 104 - return NULL; 105 - #endif 106 - return section->page_cgroup + pfn; 107 - } 108 - 109 - static void *__meminit alloc_page_cgroup(size_t size, int nid) 110 - { 111 - gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN; 112 - void *addr = NULL; 113 - 114 - addr = alloc_pages_exact_nid(nid, size, flags); 115 - if (addr) { 116 - kmemleak_alloc(addr, size, 1, flags); 117 - return addr; 118 - } 119 - 120 - if (node_state(nid, N_HIGH_MEMORY)) 121 - addr = vzalloc_node(size, nid); 122 - else 123 - addr = vzalloc(size); 124 - 125 - return addr; 126 - } 127 - 128 - static int __meminit init_section_page_cgroup(unsigned long pfn, int nid) 129 - { 130 - struct mem_section *section; 131 - struct page_cgroup *base; 132 - unsigned long table_size; 133 - 134 - section = __pfn_to_section(pfn); 135 - 136 - if (section->page_cgroup) 137 - return 0; 138 - 139 - table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 140 - base = alloc_page_cgroup(table_size, nid); 141 - 142 - /* 143 - * The value stored in section->page_cgroup is (base - pfn) 144 - * and it does not point to the memory block allocated above, 145 - * causing kmemleak false positives. 146 - */ 147 - kmemleak_not_leak(base); 148 - 149 - if (!base) { 150 - printk(KERN_ERR "page cgroup allocation failure\n"); 151 - return -ENOMEM; 152 - } 153 - 154 - /* 155 - * The passed "pfn" may not be aligned to SECTION. For the calculation 156 - * we need to apply a mask. 157 - */ 158 - pfn &= PAGE_SECTION_MASK; 159 - section->page_cgroup = base - pfn; 160 - total_usage += table_size; 161 - return 0; 162 - } 163 - #ifdef CONFIG_MEMORY_HOTPLUG 164 - static void free_page_cgroup(void *addr) 165 - { 166 - if (is_vmalloc_addr(addr)) { 167 - vfree(addr); 168 - } else { 169 - struct page *page = virt_to_page(addr); 170 - size_t table_size = 171 - sizeof(struct page_cgroup) * PAGES_PER_SECTION; 172 - 173 - BUG_ON(PageReserved(page)); 174 - kmemleak_free(addr); 175 - free_pages_exact(addr, table_size); 176 - } 177 - } 178 - 179 - static void __free_page_cgroup(unsigned long pfn) 180 - { 181 - struct mem_section *ms; 182 - struct page_cgroup *base; 183 - 184 - ms = __pfn_to_section(pfn); 185 - if (!ms || !ms->page_cgroup) 186 - return; 187 - base = ms->page_cgroup + pfn; 188 - free_page_cgroup(base); 189 - ms->page_cgroup = NULL; 190 - } 191 - 192 - static int __meminit online_page_cgroup(unsigned long start_pfn, 193 - unsigned long nr_pages, 194 - int nid) 195 - { 196 - unsigned long start, end, pfn; 197 - int fail = 0; 198 - 199 - start = SECTION_ALIGN_DOWN(start_pfn); 200 - end = SECTION_ALIGN_UP(start_pfn + nr_pages); 201 - 202 - if (nid == -1) { 203 - /* 204 - * In this case, "nid" already exists and contains valid memory. 205 - * "start_pfn" passed to us is a pfn which is an arg for 206 - * online__pages(), and start_pfn should exist. 207 - */ 208 - nid = pfn_to_nid(start_pfn); 209 - VM_BUG_ON(!node_state(nid, N_ONLINE)); 210 - } 211 - 212 - for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { 213 - if (!pfn_present(pfn)) 214 - continue; 215 - fail = init_section_page_cgroup(pfn, nid); 216 - } 217 - if (!fail) 218 - return 0; 219 - 220 - /* rollback */ 221 - for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 222 - __free_page_cgroup(pfn); 223 - 224 - return -ENOMEM; 225 - } 226 - 227 - static int __meminit offline_page_cgroup(unsigned long start_pfn, 228 - unsigned long nr_pages, int nid) 229 - { 230 - unsigned long start, end, pfn; 231 - 232 - start = SECTION_ALIGN_DOWN(start_pfn); 233 - end = SECTION_ALIGN_UP(start_pfn + nr_pages); 234 - 235 - for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 236 - __free_page_cgroup(pfn); 237 - return 0; 238 - 239 - } 240 - 241 - static int __meminit page_cgroup_callback(struct notifier_block *self, 242 - unsigned long action, void *arg) 243 - { 244 - struct memory_notify *mn = arg; 245 - int ret = 0; 246 - switch (action) { 247 - case MEM_GOING_ONLINE: 248 - ret = online_page_cgroup(mn->start_pfn, 249 - mn->nr_pages, mn->status_change_nid); 250 - break; 251 - case MEM_OFFLINE: 252 - offline_page_cgroup(mn->start_pfn, 253 - mn->nr_pages, mn->status_change_nid); 254 - break; 255 - case MEM_CANCEL_ONLINE: 256 - offline_page_cgroup(mn->start_pfn, 257 - mn->nr_pages, mn->status_change_nid); 258 - break; 259 - case MEM_GOING_OFFLINE: 260 - break; 261 - case MEM_ONLINE: 262 - case MEM_CANCEL_OFFLINE: 263 - break; 264 - } 265 - 266 - return notifier_from_errno(ret); 267 - } 268 - 269 - #endif 270 - 271 - void __init page_cgroup_init(void) 272 - { 273 - unsigned long pfn; 274 - int nid; 275 - 276 - if (mem_cgroup_disabled()) 277 - return; 278 - 279 - for_each_node_state(nid, N_MEMORY) { 280 - unsigned long start_pfn, end_pfn; 281 - 282 - start_pfn = node_start_pfn(nid); 283 - end_pfn = node_end_pfn(nid); 284 - /* 285 - * start_pfn and end_pfn may not be aligned to SECTION and the 286 - * page->flags of out of node pages are not initialized. So we 287 - * scan [start_pfn, the biggest section's pfn < end_pfn) here. 288 - */ 289 - for (pfn = start_pfn; 290 - pfn < end_pfn; 291 - pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) { 292 - 293 - if (!pfn_valid(pfn)) 294 - continue; 295 - /* 296 - * Nodes's pfns can be overlapping. 297 - * We know some arch can have a nodes layout such as 298 - * -------------pfn--------------> 299 - * N0 | N1 | N2 | N0 | N1 | N2|.... 300 - */ 301 - if (pfn_to_nid(pfn) != nid) 302 - continue; 303 - if (init_section_page_cgroup(pfn, nid)) 304 - goto oom; 305 - } 306 - } 307 - hotplug_memory_notifier(page_cgroup_callback, 0); 308 - printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage); 309 - printk(KERN_INFO "please try 'cgroup_disable=memory' option if you " 310 - "don't want memory cgroups\n"); 311 - return; 312 - oom: 313 - printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n"); 314 - panic("Out of memory"); 315 - } 316 - 317 - void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) 318 - { 319 - return; 320 - } 321 - 322 - #endif 323 - 324 5 325 6 #ifdef CONFIG_MEMCG_SWAP 326 7