Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2#define DISABLE_BRANCH_PROFILING
3#define pr_fmt(fmt) "kasan: " fmt
4
5/* cpu_feature_enabled() cannot be used this early */
6#define USE_EARLY_PGTABLE_L5
7
8#include <linux/memblock.h>
9#include <linux/kasan.h>
10#include <linux/kdebug.h>
11#include <linux/mm.h>
12#include <linux/sched.h>
13#include <linux/sched/task.h>
14#include <linux/vmalloc.h>
15
16#include <asm/e820/types.h>
17#include <asm/pgalloc.h>
18#include <asm/tlbflush.h>
19#include <asm/sections.h>
20#include <asm/pgtable.h>
21#include <asm/cpu_entry_area.h>
22
23extern struct range pfn_mapped[E820_MAX_ENTRIES];
24
25static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
26
27static __init void *early_alloc(size_t size, int nid, bool should_panic)
28{
29 void *ptr = memblock_alloc_try_nid(size, size,
30 __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
31
32 if (!ptr && should_panic)
33 panic("%pS: Failed to allocate page, nid=%d from=%lx\n",
34 (void *)_RET_IP_, nid, __pa(MAX_DMA_ADDRESS));
35
36 return ptr;
37}
38
39static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
40 unsigned long end, int nid)
41{
42 pte_t *pte;
43
44 if (pmd_none(*pmd)) {
45 void *p;
46
47 if (boot_cpu_has(X86_FEATURE_PSE) &&
48 ((end - addr) == PMD_SIZE) &&
49 IS_ALIGNED(addr, PMD_SIZE)) {
50 p = early_alloc(PMD_SIZE, nid, false);
51 if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
52 return;
53 else if (p)
54 memblock_free(__pa(p), PMD_SIZE);
55 }
56
57 p = early_alloc(PAGE_SIZE, nid, true);
58 pmd_populate_kernel(&init_mm, pmd, p);
59 }
60
61 pte = pte_offset_kernel(pmd, addr);
62 do {
63 pte_t entry;
64 void *p;
65
66 if (!pte_none(*pte))
67 continue;
68
69 p = early_alloc(PAGE_SIZE, nid, true);
70 entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
71 set_pte_at(&init_mm, addr, pte, entry);
72 } while (pte++, addr += PAGE_SIZE, addr != end);
73}
74
75static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
76 unsigned long end, int nid)
77{
78 pmd_t *pmd;
79 unsigned long next;
80
81 if (pud_none(*pud)) {
82 void *p;
83
84 if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
85 ((end - addr) == PUD_SIZE) &&
86 IS_ALIGNED(addr, PUD_SIZE)) {
87 p = early_alloc(PUD_SIZE, nid, false);
88 if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
89 return;
90 else if (p)
91 memblock_free(__pa(p), PUD_SIZE);
92 }
93
94 p = early_alloc(PAGE_SIZE, nid, true);
95 pud_populate(&init_mm, pud, p);
96 }
97
98 pmd = pmd_offset(pud, addr);
99 do {
100 next = pmd_addr_end(addr, end);
101 if (!pmd_large(*pmd))
102 kasan_populate_pmd(pmd, addr, next, nid);
103 } while (pmd++, addr = next, addr != end);
104}
105
106static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
107 unsigned long end, int nid)
108{
109 pud_t *pud;
110 unsigned long next;
111
112 if (p4d_none(*p4d)) {
113 void *p = early_alloc(PAGE_SIZE, nid, true);
114
115 p4d_populate(&init_mm, p4d, p);
116 }
117
118 pud = pud_offset(p4d, addr);
119 do {
120 next = pud_addr_end(addr, end);
121 if (!pud_large(*pud))
122 kasan_populate_pud(pud, addr, next, nid);
123 } while (pud++, addr = next, addr != end);
124}
125
126static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
127 unsigned long end, int nid)
128{
129 void *p;
130 p4d_t *p4d;
131 unsigned long next;
132
133 if (pgd_none(*pgd)) {
134 p = early_alloc(PAGE_SIZE, nid, true);
135 pgd_populate(&init_mm, pgd, p);
136 }
137
138 p4d = p4d_offset(pgd, addr);
139 do {
140 next = p4d_addr_end(addr, end);
141 kasan_populate_p4d(p4d, addr, next, nid);
142 } while (p4d++, addr = next, addr != end);
143}
144
145static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
146 int nid)
147{
148 pgd_t *pgd;
149 unsigned long next;
150
151 addr = addr & PAGE_MASK;
152 end = round_up(end, PAGE_SIZE);
153 pgd = pgd_offset_k(addr);
154 do {
155 next = pgd_addr_end(addr, end);
156 kasan_populate_pgd(pgd, addr, next, nid);
157 } while (pgd++, addr = next, addr != end);
158}
159
160static void __init map_range(struct range *range)
161{
162 unsigned long start;
163 unsigned long end;
164
165 start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
166 end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
167
168 kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
169}
170
171static void __init clear_pgds(unsigned long start,
172 unsigned long end)
173{
174 pgd_t *pgd;
175 /* See comment in kasan_init() */
176 unsigned long pgd_end = end & PGDIR_MASK;
177
178 for (; start < pgd_end; start += PGDIR_SIZE) {
179 pgd = pgd_offset_k(start);
180 /*
181 * With folded p4d, pgd_clear() is nop, use p4d_clear()
182 * instead.
183 */
184 if (pgtable_l5_enabled())
185 pgd_clear(pgd);
186 else
187 p4d_clear(p4d_offset(pgd, start));
188 }
189
190 pgd = pgd_offset_k(start);
191 for (; start < end; start += P4D_SIZE)
192 p4d_clear(p4d_offset(pgd, start));
193}
194
195static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
196{
197 unsigned long p4d;
198
199 if (!pgtable_l5_enabled())
200 return (p4d_t *)pgd;
201
202 p4d = pgd_val(*pgd) & PTE_PFN_MASK;
203 p4d += __START_KERNEL_map - phys_base;
204 return (p4d_t *)p4d + p4d_index(addr);
205}
206
207static void __init kasan_early_p4d_populate(pgd_t *pgd,
208 unsigned long addr,
209 unsigned long end)
210{
211 pgd_t pgd_entry;
212 p4d_t *p4d, p4d_entry;
213 unsigned long next;
214
215 if (pgd_none(*pgd)) {
216 pgd_entry = __pgd(_KERNPG_TABLE |
217 __pa_nodebug(kasan_early_shadow_p4d));
218 set_pgd(pgd, pgd_entry);
219 }
220
221 p4d = early_p4d_offset(pgd, addr);
222 do {
223 next = p4d_addr_end(addr, end);
224
225 if (!p4d_none(*p4d))
226 continue;
227
228 p4d_entry = __p4d(_KERNPG_TABLE |
229 __pa_nodebug(kasan_early_shadow_pud));
230 set_p4d(p4d, p4d_entry);
231 } while (p4d++, addr = next, addr != end && p4d_none(*p4d));
232}
233
234static void __init kasan_map_early_shadow(pgd_t *pgd)
235{
236 /* See comment in kasan_init() */
237 unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
238 unsigned long end = KASAN_SHADOW_END;
239 unsigned long next;
240
241 pgd += pgd_index(addr);
242 do {
243 next = pgd_addr_end(addr, end);
244 kasan_early_p4d_populate(pgd, addr, next);
245 } while (pgd++, addr = next, addr != end);
246}
247
248static void __init kasan_shallow_populate_p4ds(pgd_t *pgd,
249 unsigned long addr,
250 unsigned long end)
251{
252 p4d_t *p4d;
253 unsigned long next;
254 void *p;
255
256 p4d = p4d_offset(pgd, addr);
257 do {
258 next = p4d_addr_end(addr, end);
259
260 if (p4d_none(*p4d)) {
261 p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
262 p4d_populate(&init_mm, p4d, p);
263 }
264 } while (p4d++, addr = next, addr != end);
265}
266
267static void __init kasan_shallow_populate_pgds(void *start, void *end)
268{
269 unsigned long addr, next;
270 pgd_t *pgd;
271 void *p;
272
273 addr = (unsigned long)start;
274 pgd = pgd_offset_k(addr);
275 do {
276 next = pgd_addr_end(addr, (unsigned long)end);
277
278 if (pgd_none(*pgd)) {
279 p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
280 pgd_populate(&init_mm, pgd, p);
281 }
282
283 /*
284 * we need to populate p4ds to be synced when running in
285 * four level mode - see sync_global_pgds_l4()
286 */
287 kasan_shallow_populate_p4ds(pgd, addr, next);
288 } while (pgd++, addr = next, addr != (unsigned long)end);
289}
290
291#ifdef CONFIG_KASAN_INLINE
292static int kasan_die_handler(struct notifier_block *self,
293 unsigned long val,
294 void *data)
295{
296 if (val == DIE_GPF) {
297 pr_emerg("CONFIG_KASAN_INLINE enabled\n");
298 pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n");
299 }
300 return NOTIFY_OK;
301}
302
303static struct notifier_block kasan_die_notifier = {
304 .notifier_call = kasan_die_handler,
305};
306#endif
307
308void __init kasan_early_init(void)
309{
310 int i;
311 pteval_t pte_val = __pa_nodebug(kasan_early_shadow_page) |
312 __PAGE_KERNEL | _PAGE_ENC;
313 pmdval_t pmd_val = __pa_nodebug(kasan_early_shadow_pte) | _KERNPG_TABLE;
314 pudval_t pud_val = __pa_nodebug(kasan_early_shadow_pmd) | _KERNPG_TABLE;
315 p4dval_t p4d_val = __pa_nodebug(kasan_early_shadow_pud) | _KERNPG_TABLE;
316
317 /* Mask out unsupported __PAGE_KERNEL bits: */
318 pte_val &= __default_kernel_pte_mask;
319 pmd_val &= __default_kernel_pte_mask;
320 pud_val &= __default_kernel_pte_mask;
321 p4d_val &= __default_kernel_pte_mask;
322
323 for (i = 0; i < PTRS_PER_PTE; i++)
324 kasan_early_shadow_pte[i] = __pte(pte_val);
325
326 for (i = 0; i < PTRS_PER_PMD; i++)
327 kasan_early_shadow_pmd[i] = __pmd(pmd_val);
328
329 for (i = 0; i < PTRS_PER_PUD; i++)
330 kasan_early_shadow_pud[i] = __pud(pud_val);
331
332 for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
333 kasan_early_shadow_p4d[i] = __p4d(p4d_val);
334
335 kasan_map_early_shadow(early_top_pgt);
336 kasan_map_early_shadow(init_top_pgt);
337}
338
339void __init kasan_init(void)
340{
341 int i;
342 void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
343
344#ifdef CONFIG_KASAN_INLINE
345 register_die_notifier(&kasan_die_notifier);
346#endif
347
348 memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
349
350 /*
351 * We use the same shadow offset for 4- and 5-level paging to
352 * facilitate boot-time switching between paging modes.
353 * As result in 5-level paging mode KASAN_SHADOW_START and
354 * KASAN_SHADOW_END are not aligned to PGD boundary.
355 *
356 * KASAN_SHADOW_START doesn't share PGD with anything else.
357 * We claim whole PGD entry to make things easier.
358 *
359 * KASAN_SHADOW_END lands in the last PGD entry and it collides with
360 * bunch of things like kernel code, modules, EFI mapping, etc.
361 * We need to take extra steps to not overwrite them.
362 */
363 if (pgtable_l5_enabled()) {
364 void *ptr;
365
366 ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
367 memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
368 set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
369 __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
370 }
371
372 load_cr3(early_top_pgt);
373 __flush_tlb_all();
374
375 clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
376
377 kasan_populate_early_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
378 kasan_mem_to_shadow((void *)PAGE_OFFSET));
379
380 for (i = 0; i < E820_MAX_ENTRIES; i++) {
381 if (pfn_mapped[i].end == 0)
382 break;
383
384 map_range(&pfn_mapped[i]);
385 }
386
387 shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
388 shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
389 shadow_cpu_entry_begin = (void *)round_down(
390 (unsigned long)shadow_cpu_entry_begin, PAGE_SIZE);
391
392 shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
393 CPU_ENTRY_AREA_MAP_SIZE);
394 shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
395 shadow_cpu_entry_end = (void *)round_up(
396 (unsigned long)shadow_cpu_entry_end, PAGE_SIZE);
397
398 kasan_populate_early_shadow(
399 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
400 kasan_mem_to_shadow((void *)VMALLOC_START));
401
402 /*
403 * If we're in full vmalloc mode, don't back vmalloc space with early
404 * shadow pages. Instead, prepopulate pgds/p4ds so they are synced to
405 * the global table and we can populate the lower levels on demand.
406 */
407 if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
408 kasan_shallow_populate_pgds(
409 kasan_mem_to_shadow((void *)VMALLOC_START),
410 kasan_mem_to_shadow((void *)VMALLOC_END));
411 else
412 kasan_populate_early_shadow(
413 kasan_mem_to_shadow((void *)VMALLOC_START),
414 kasan_mem_to_shadow((void *)VMALLOC_END));
415
416 kasan_populate_early_shadow(
417 kasan_mem_to_shadow((void *)VMALLOC_END + 1),
418 shadow_cpu_entry_begin);
419
420 kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
421 (unsigned long)shadow_cpu_entry_end, 0);
422
423 kasan_populate_early_shadow(shadow_cpu_entry_end,
424 kasan_mem_to_shadow((void *)__START_KERNEL_map));
425
426 kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
427 (unsigned long)kasan_mem_to_shadow(_end),
428 early_pfn_to_nid(__pa(_stext)));
429
430 kasan_populate_early_shadow(kasan_mem_to_shadow((void *)MODULES_END),
431 (void *)KASAN_SHADOW_END);
432
433 load_cr3(init_top_pgt);
434 __flush_tlb_all();
435
436 /*
437 * kasan_early_shadow_page has been used as early shadow memory, thus
438 * it may contain some garbage. Now we can clear and write protect it,
439 * since after the TLB flush no one should write to it.
440 */
441 memset(kasan_early_shadow_page, 0, PAGE_SIZE);
442 for (i = 0; i < PTRS_PER_PTE; i++) {
443 pte_t pte;
444 pgprot_t prot;
445
446 prot = __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC);
447 pgprot_val(prot) &= __default_kernel_pte_mask;
448
449 pte = __pte(__pa(kasan_early_shadow_page) | pgprot_val(prot));
450 set_pte(&kasan_early_shadow_pte[i], pte);
451 }
452 /* Flush TLBs again to be sure that write protection applied. */
453 __flush_tlb_all();
454
455 init_task.kasan_depth = 0;
456 pr_info("KernelAddressSanitizer initialized\n");
457}