Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

init: kmsan: call KMSAN initialization routines

kmsan_init_shadow() scans the mappings created at boot time and creates
metadata pages for those mappings.

When the memblock allocator returns pages to pagealloc, we reserve 2/3 of
those pages and use them as metadata for the remaining 1/3. Once KMSAN
starts, every page allocated by pagealloc has its associated shadow and
origin pages.

kmsan_initialize() initializes the bookkeeping for init_task and enables
KMSAN.

Link: https://lkml.kernel.org/r/20220915150417.722975-18-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Eric Biggers <ebiggers@kernel.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Ilya Leoshkevich <iii@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Marco Elver <elver@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Alexander Potapenko and committed by
Andrew Morton
3c206509 50b5e49c

+317 -1
+36
include/linux/kmsan.h
··· 32 32 void kmsan_task_exit(struct task_struct *task); 33 33 34 34 /** 35 + * kmsan_init_shadow() - Initialize KMSAN shadow at boot time. 36 + * 37 + * Allocate and initialize KMSAN metadata for early allocations. 38 + */ 39 + void __init kmsan_init_shadow(void); 40 + 41 + /** 42 + * kmsan_init_runtime() - Initialize KMSAN state and enable KMSAN. 43 + */ 44 + void __init kmsan_init_runtime(void); 45 + 46 + /** 47 + * kmsan_memblock_free_pages() - handle freeing of memblock pages. 48 + * @page: struct page to free. 49 + * @order: order of @page. 50 + * 51 + * Freed pages are either returned to buddy allocator or held back to be used 52 + * as metadata pages. 53 + */ 54 + bool __init kmsan_memblock_free_pages(struct page *page, unsigned int order); 55 + 56 + /** 35 57 * kmsan_alloc_page() - Notify KMSAN about an alloc_pages() call. 36 58 * @page: struct page pointer returned by alloc_pages(). 37 59 * @order: order of allocated struct page. ··· 173 151 void kmsan_iounmap_page_range(unsigned long start, unsigned long end); 174 152 175 153 #else 154 + 155 + static inline void kmsan_init_shadow(void) 156 + { 157 + } 158 + 159 + static inline void kmsan_init_runtime(void) 160 + { 161 + } 162 + 163 + static inline bool kmsan_memblock_free_pages(struct page *page, 164 + unsigned int order) 165 + { 166 + return true; 167 + } 176 168 177 169 static inline void kmsan_task_create(struct task_struct *task) 178 170 {
+3
init/main.c
··· 34 34 #include <linux/percpu.h> 35 35 #include <linux/kmod.h> 36 36 #include <linux/kprobes.h> 37 + #include <linux/kmsan.h> 37 38 #include <linux/vmalloc.h> 38 39 #include <linux/kernel_stat.h> 39 40 #include <linux/start_kernel.h> ··· 838 837 init_mem_debugging_and_hardening(); 839 838 kfence_alloc_pool(); 840 839 report_meminit(); 840 + kmsan_init_shadow(); 841 841 stack_depot_early_init(); 842 842 mem_init(); 843 843 mem_init_print_info(); ··· 859 857 init_espfix_bsp(); 860 858 /* Should be run after espfix64 is set up. */ 861 859 pti_init(); 860 + kmsan_init_runtime(); 862 861 } 863 862 864 863 #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+2 -1
mm/kmsan/Makefile
··· 3 3 # Makefile for KernelMemorySanitizer (KMSAN). 4 4 # 5 5 # 6 - obj-y := core.o instrumentation.o hooks.o report.o shadow.o 6 + obj-y := core.o instrumentation.o init.o hooks.o report.o shadow.o 7 7 8 8 KMSAN_SANITIZE := n 9 9 KCOV_INSTRUMENT := n ··· 18 18 19 19 CFLAGS_core.o := $(CC_FLAGS_KMSAN_RUNTIME) 20 20 CFLAGS_hooks.o := $(CC_FLAGS_KMSAN_RUNTIME) 21 + CFLAGS_init.o := $(CC_FLAGS_KMSAN_RUNTIME) 21 22 CFLAGS_instrumentation.o := $(CC_FLAGS_KMSAN_RUNTIME) 22 23 CFLAGS_report.o := $(CC_FLAGS_KMSAN_RUNTIME) 23 24 CFLAGS_shadow.o := $(CC_FLAGS_KMSAN_RUNTIME)
+235
mm/kmsan/init.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * KMSAN initialization routines. 4 + * 5 + * Copyright (C) 2017-2021 Google LLC 6 + * Author: Alexander Potapenko <glider@google.com> 7 + * 8 + */ 9 + 10 + #include "kmsan.h" 11 + 12 + #include <asm/sections.h> 13 + #include <linux/mm.h> 14 + #include <linux/memblock.h> 15 + 16 + #include "../internal.h" 17 + 18 + #define NUM_FUTURE_RANGES 128 19 + struct start_end_pair { 20 + u64 start, end; 21 + }; 22 + 23 + static struct start_end_pair start_end_pairs[NUM_FUTURE_RANGES] __initdata; 24 + static int future_index __initdata; 25 + 26 + /* 27 + * Record a range of memory for which the metadata pages will be created once 28 + * the page allocator becomes available. 29 + */ 30 + static void __init kmsan_record_future_shadow_range(void *start, void *end) 31 + { 32 + u64 nstart = (u64)start, nend = (u64)end, cstart, cend; 33 + bool merged = false; 34 + 35 + KMSAN_WARN_ON(future_index == NUM_FUTURE_RANGES); 36 + KMSAN_WARN_ON((nstart >= nend) || !nstart || !nend); 37 + nstart = ALIGN_DOWN(nstart, PAGE_SIZE); 38 + nend = ALIGN(nend, PAGE_SIZE); 39 + 40 + /* 41 + * Scan the existing ranges to see if any of them overlaps with 42 + * [start, end). In that case, merge the two ranges instead of 43 + * creating a new one. 44 + * The number of ranges is less than 20, so there is no need to organize 45 + * them into a more intelligent data structure. 46 + */ 47 + for (int i = 0; i < future_index; i++) { 48 + cstart = start_end_pairs[i].start; 49 + cend = start_end_pairs[i].end; 50 + if ((cstart < nstart && cend < nstart) || 51 + (cstart > nend && cend > nend)) 52 + /* ranges are disjoint - do not merge */ 53 + continue; 54 + start_end_pairs[i].start = min(nstart, cstart); 55 + start_end_pairs[i].end = max(nend, cend); 56 + merged = true; 57 + break; 58 + } 59 + if (merged) 60 + return; 61 + start_end_pairs[future_index].start = nstart; 62 + start_end_pairs[future_index].end = nend; 63 + future_index++; 64 + } 65 + 66 + /* 67 + * Initialize the shadow for existing mappings during kernel initialization. 68 + * These include kernel text/data sections, NODE_DATA and future ranges 69 + * registered while creating other data (e.g. percpu). 70 + * 71 + * Allocations via memblock can be only done before slab is initialized. 72 + */ 73 + void __init kmsan_init_shadow(void) 74 + { 75 + const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 76 + phys_addr_t p_start, p_end; 77 + u64 loop; 78 + int nid; 79 + 80 + for_each_reserved_mem_range(loop, &p_start, &p_end) 81 + kmsan_record_future_shadow_range(phys_to_virt(p_start), 82 + phys_to_virt(p_end)); 83 + /* Allocate shadow for .data */ 84 + kmsan_record_future_shadow_range(_sdata, _edata); 85 + 86 + for_each_online_node(nid) 87 + kmsan_record_future_shadow_range( 88 + NODE_DATA(nid), (char *)NODE_DATA(nid) + nd_size); 89 + 90 + for (int i = 0; i < future_index; i++) 91 + kmsan_init_alloc_meta_for_range( 92 + (void *)start_end_pairs[i].start, 93 + (void *)start_end_pairs[i].end); 94 + } 95 + 96 + struct metadata_page_pair { 97 + struct page *shadow, *origin; 98 + }; 99 + static struct metadata_page_pair held_back[MAX_ORDER] __initdata; 100 + 101 + /* 102 + * Eager metadata allocation. When the memblock allocator is freeing pages to 103 + * pagealloc, we use 2/3 of them as metadata for the remaining 1/3. 104 + * We store the pointers to the returned blocks of pages in held_back[] grouped 105 + * by their order: when kmsan_memblock_free_pages() is called for the first 106 + * time with a certain order, it is reserved as a shadow block, for the second 107 + * time - as an origin block. On the third time the incoming block receives its 108 + * shadow and origin ranges from the previously saved shadow and origin blocks, 109 + * after which held_back[order] can be used again. 110 + * 111 + * At the very end there may be leftover blocks in held_back[]. They are 112 + * collected later by kmsan_memblock_discard(). 113 + */ 114 + bool kmsan_memblock_free_pages(struct page *page, unsigned int order) 115 + { 116 + struct page *shadow, *origin; 117 + 118 + if (!held_back[order].shadow) { 119 + held_back[order].shadow = page; 120 + return false; 121 + } 122 + if (!held_back[order].origin) { 123 + held_back[order].origin = page; 124 + return false; 125 + } 126 + shadow = held_back[order].shadow; 127 + origin = held_back[order].origin; 128 + kmsan_setup_meta(page, shadow, origin, order); 129 + 130 + held_back[order].shadow = NULL; 131 + held_back[order].origin = NULL; 132 + return true; 133 + } 134 + 135 + #define MAX_BLOCKS 8 136 + struct smallstack { 137 + struct page *items[MAX_BLOCKS]; 138 + int index; 139 + int order; 140 + }; 141 + 142 + static struct smallstack collect = { 143 + .index = 0, 144 + .order = MAX_ORDER, 145 + }; 146 + 147 + static void smallstack_push(struct smallstack *stack, struct page *pages) 148 + { 149 + KMSAN_WARN_ON(stack->index == MAX_BLOCKS); 150 + stack->items[stack->index] = pages; 151 + stack->index++; 152 + } 153 + #undef MAX_BLOCKS 154 + 155 + static struct page *smallstack_pop(struct smallstack *stack) 156 + { 157 + struct page *ret; 158 + 159 + KMSAN_WARN_ON(stack->index == 0); 160 + stack->index--; 161 + ret = stack->items[stack->index]; 162 + stack->items[stack->index] = NULL; 163 + return ret; 164 + } 165 + 166 + static void do_collection(void) 167 + { 168 + struct page *page, *shadow, *origin; 169 + 170 + while (collect.index >= 3) { 171 + page = smallstack_pop(&collect); 172 + shadow = smallstack_pop(&collect); 173 + origin = smallstack_pop(&collect); 174 + kmsan_setup_meta(page, shadow, origin, collect.order); 175 + __free_pages_core(page, collect.order); 176 + } 177 + } 178 + 179 + static void collect_split(void) 180 + { 181 + struct smallstack tmp = { 182 + .order = collect.order - 1, 183 + .index = 0, 184 + }; 185 + struct page *page; 186 + 187 + if (!collect.order) 188 + return; 189 + while (collect.index) { 190 + page = smallstack_pop(&collect); 191 + smallstack_push(&tmp, &page[0]); 192 + smallstack_push(&tmp, &page[1 << tmp.order]); 193 + } 194 + __memcpy(&collect, &tmp, sizeof(tmp)); 195 + } 196 + 197 + /* 198 + * Memblock is about to go away. Split the page blocks left over in held_back[] 199 + * and return 1/3 of that memory to the system. 200 + */ 201 + static void kmsan_memblock_discard(void) 202 + { 203 + /* 204 + * For each order=N: 205 + * - push held_back[N].shadow and .origin to @collect; 206 + * - while there are >= 3 elements in @collect, do garbage collection: 207 + * - pop 3 ranges from @collect; 208 + * - use two of them as shadow and origin for the third one; 209 + * - repeat; 210 + * - split each remaining element from @collect into 2 ranges of 211 + * order=N-1, 212 + * - repeat. 213 + */ 214 + collect.order = MAX_ORDER - 1; 215 + for (int i = MAX_ORDER - 1; i >= 0; i--) { 216 + if (held_back[i].shadow) 217 + smallstack_push(&collect, held_back[i].shadow); 218 + if (held_back[i].origin) 219 + smallstack_push(&collect, held_back[i].origin); 220 + held_back[i].shadow = NULL; 221 + held_back[i].origin = NULL; 222 + do_collection(); 223 + collect_split(); 224 + } 225 + } 226 + 227 + void __init kmsan_init_runtime(void) 228 + { 229 + /* Assuming current is init_task */ 230 + kmsan_internal_task_create(current); 231 + kmsan_memblock_discard(); 232 + pr_info("Starting KernelMemorySanitizer\n"); 233 + pr_info("ATTENTION: KMSAN is a debugging tool! Do not use it on production machines!\n"); 234 + kmsan_enabled = true; 235 + }
+3
mm/kmsan/kmsan.h
··· 67 67 struct shadow_origin_ptr kmsan_get_shadow_origin_ptr(void *addr, u64 size, 68 68 bool store); 69 69 void *kmsan_get_metadata(void *addr, bool is_origin); 70 + void __init kmsan_init_alloc_meta_for_range(void *start, void *end); 70 71 71 72 enum kmsan_bug_reason { 72 73 REASON_ANY, ··· 188 187 int reason); 189 188 190 189 struct page *kmsan_vmalloc_to_page_or_null(void *vaddr); 190 + void kmsan_setup_meta(struct page *page, struct page *shadow, 191 + struct page *origin, int order); 191 192 192 193 /* 193 194 * kmsan_internal_is_module_addr() and kmsan_internal_is_vmalloc_addr() are
+34
mm/kmsan/shadow.c
··· 258 258 kfree(s_pages); 259 259 kfree(o_pages); 260 260 } 261 + 262 + /* Allocate metadata for pages allocated at boot time. */ 263 + void __init kmsan_init_alloc_meta_for_range(void *start, void *end) 264 + { 265 + struct page *shadow_p, *origin_p; 266 + void *shadow, *origin; 267 + struct page *page; 268 + u64 size; 269 + 270 + start = (void *)ALIGN_DOWN((u64)start, PAGE_SIZE); 271 + size = ALIGN((u64)end - (u64)start, PAGE_SIZE); 272 + shadow = memblock_alloc(size, PAGE_SIZE); 273 + origin = memblock_alloc(size, PAGE_SIZE); 274 + for (u64 addr = 0; addr < size; addr += PAGE_SIZE) { 275 + page = virt_to_page_or_null((char *)start + addr); 276 + shadow_p = virt_to_page_or_null((char *)shadow + addr); 277 + set_no_shadow_origin_page(shadow_p); 278 + shadow_page_for(page) = shadow_p; 279 + origin_p = virt_to_page_or_null((char *)origin + addr); 280 + set_no_shadow_origin_page(origin_p); 281 + origin_page_for(page) = origin_p; 282 + } 283 + } 284 + 285 + void kmsan_setup_meta(struct page *page, struct page *shadow, 286 + struct page *origin, int order) 287 + { 288 + for (int i = 0; i < (1 << order); i++) { 289 + set_no_shadow_origin_page(&shadow[i]); 290 + set_no_shadow_origin_page(&origin[i]); 291 + shadow_page_for(&page[i]) = &shadow[i]; 292 + origin_page_for(&page[i]) = &origin[i]; 293 + } 294 + }
+4
mm/page_alloc.c
··· 1809 1809 { 1810 1810 if (early_page_uninitialised(pfn)) 1811 1811 return; 1812 + if (!kmsan_memblock_free_pages(page, order)) { 1813 + /* KMSAN will take care of these pages. */ 1814 + return; 1815 + } 1812 1816 __free_pages_core(page, order); 1813 1817 } 1814 1818