Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: mm: avoid allocating struct mm_struct on the stack

struct mm_struct is quite large (~1664 bytes) and so allocating on the
stack may cause problems as the kernel stack size is small.

Since ptdump_walk_pgd_level_core() was only allocating the structure so
that it could modify the pgd argument we can instead introduce a pgd
override in struct mm_walk and pass this down the call stack to where it
is needed.

Since the correct mm_struct is now being passed down, it is now also
unnecessary to take the mmap_sem semaphore because ptdump_walk_pgd() will
now take the semaphore on the real mm.

[steven.price@arm.com: restore missed arm64 changes]
Link: http://lkml.kernel.org/r/20200108145710.34314-1-steven.price@arm.com
Link: http://lkml.kernel.org/r/20200108145710.34314-1-steven.price@arm.com
Signed-off-by: Steven Price <steven.price@arm.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Liang, Kan" <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Will Deacon <will@kernel.org>
Cc: Zong Li <zong.li@sifive.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Steven Price and committed by
Linus Torvalds
e47690d7 f8f0d0b6

+23 -25
+2 -2
arch/arm64/mm/dump.c
··· 323 323 } 324 324 }; 325 325 326 - ptdump_walk_pgd(&st.ptdump, info->mm); 326 + ptdump_walk_pgd(&st.ptdump, info->mm, NULL); 327 327 } 328 328 329 329 static void ptdump_initialize(void) ··· 361 361 } 362 362 }; 363 363 364 - ptdump_walk_pgd(&st.ptdump, &init_mm); 364 + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); 365 365 366 366 if (st.wx_pages || st.uxn_pages) 367 367 pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
+2 -8
arch/x86/mm/debug_pagetables.c
··· 15 15 16 16 static int ptdump_curknl_show(struct seq_file *m, void *v) 17 17 { 18 - if (current->mm->pgd) { 19 - down_read(&current->mm->mmap_sem); 18 + if (current->mm->pgd) 20 19 ptdump_walk_pgd_level_debugfs(m, current->mm, false); 21 - up_read(&current->mm->mmap_sem); 22 - } 23 20 return 0; 24 21 } 25 22 ··· 25 28 #ifdef CONFIG_PAGE_TABLE_ISOLATION 26 29 static int ptdump_curusr_show(struct seq_file *m, void *v) 27 30 { 28 - if (current->mm->pgd) { 29 - down_read(&current->mm->mmap_sem); 31 + if (current->mm->pgd) 30 32 ptdump_walk_pgd_level_debugfs(m, current->mm, true); 31 - up_read(&current->mm->mmap_sem); 32 - } 33 33 return 0; 34 34 } 35 35
+7 -11
arch/x86/mm/dump_pagetables.c
··· 357 357 } 358 358 } 359 359 360 - static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, 360 + static void ptdump_walk_pgd_level_core(struct seq_file *m, 361 + struct mm_struct *mm, pgd_t *pgd, 361 362 bool checkwx, bool dmesg) 362 363 { 363 364 const struct ptdump_range ptdump_ranges[] = { ··· 387 386 .seq = m 388 387 }; 389 388 390 - struct mm_struct fake_mm = { 391 - .pgd = pgd 392 - }; 393 - init_rwsem(&fake_mm.mmap_sem); 394 - 395 - ptdump_walk_pgd(&st.ptdump, &fake_mm); 389 + ptdump_walk_pgd(&st.ptdump, mm, pgd); 396 390 397 391 if (!checkwx) 398 392 return; ··· 400 404 401 405 void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm) 402 406 { 403 - ptdump_walk_pgd_level_core(m, mm->pgd, false, true); 407 + ptdump_walk_pgd_level_core(m, mm, mm->pgd, false, true); 404 408 } 405 409 406 410 void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, ··· 411 415 if (user && boot_cpu_has(X86_FEATURE_PTI)) 412 416 pgd = kernel_to_user_pgdp(pgd); 413 417 #endif 414 - ptdump_walk_pgd_level_core(m, pgd, false, false); 418 + ptdump_walk_pgd_level_core(m, mm, pgd, false, false); 415 419 } 416 420 EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); 417 421 ··· 426 430 427 431 pr_info("x86/mm: Checking user space page tables\n"); 428 432 pgd = kernel_to_user_pgdp(pgd); 429 - ptdump_walk_pgd_level_core(NULL, pgd, true, false); 433 + ptdump_walk_pgd_level_core(NULL, &init_mm, pgd, true, false); 430 434 #endif 431 435 } 432 436 433 437 void ptdump_walk_pgd_level_checkwx(void) 434 438 { 435 - ptdump_walk_pgd_level_core(NULL, INIT_PGD, true, false); 439 + ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false); 436 440 } 437 441 438 442 static int __init pt_dump_init(void)
+3
include/linux/pagewalk.h
··· 74 74 * mm_walk - walk_page_range data 75 75 * @ops: operation to call during the walk 76 76 * @mm: mm_struct representing the target process of page table walk 77 + * @pgd: pointer to PGD; only valid with no_vma (otherwise set to NULL) 77 78 * @vma: vma currently walked (NULL if walking outside vmas) 78 79 * @action: next action to perform (see enum page_walk_action) 79 80 * @no_vma: walk ignoring vmas (vma will always be NULL) ··· 85 84 struct mm_walk { 86 85 const struct mm_walk_ops *ops; 87 86 struct mm_struct *mm; 87 + pgd_t *pgd; 88 88 struct vm_area_struct *vma; 89 89 enum page_walk_action action; 90 90 bool no_vma; ··· 97 95 void *private); 98 96 int walk_page_range_novma(struct mm_struct *mm, unsigned long start, 99 97 unsigned long end, const struct mm_walk_ops *ops, 98 + pgd_t *pgd, 100 99 void *private); 101 100 int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, 102 101 void *private);
+1 -1
include/linux/ptdump.h
··· 17 17 const struct ptdump_range *range; 18 18 }; 19 19 20 - void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm); 20 + void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd); 21 21 22 22 #endif /* _LINUX_PTDUMP_H */
+6 -1
mm/pagewalk.c
··· 206 206 const struct mm_walk_ops *ops = walk->ops; 207 207 int err = 0; 208 208 209 - pgd = pgd_offset(walk->mm, addr); 209 + if (walk->pgd) 210 + pgd = walk->pgd + pgd_index(addr); 211 + else 212 + pgd = pgd_offset(walk->mm, addr); 210 213 do { 211 214 next = pgd_addr_end(addr, end); 212 215 if (pgd_none_or_clear_bad(pgd)) { ··· 439 436 */ 440 437 int walk_page_range_novma(struct mm_struct *mm, unsigned long start, 441 438 unsigned long end, const struct mm_walk_ops *ops, 439 + pgd_t *pgd, 442 440 void *private) 443 441 { 444 442 struct mm_walk walk = { 445 443 .ops = ops, 446 444 .mm = mm, 445 + .pgd = pgd, 447 446 .private = private, 448 447 .no_vma = true 449 448 };
+2 -2
mm/ptdump.c
··· 122 122 .pte_hole = ptdump_hole, 123 123 }; 124 124 125 - void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm) 125 + void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd) 126 126 { 127 127 const struct ptdump_range *range = st->range; 128 128 129 129 down_read(&mm->mmap_sem); 130 130 while (range->start != range->end) { 131 131 walk_page_range_novma(mm, range->start, range->end, 132 - &ptdump_ops, st); 132 + &ptdump_ops, pgd, st); 133 133 range++; 134 134 } 135 135 up_read(&mm->mmap_sem);