Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-next/scs' into for-next/core

Support for Clang's Shadow Call Stack in the kernel
(Sami Tolvanen and Will Deacon)
* for-next/scs:
arm64: entry-ftrace.S: Update comment to indicate that x18 is live
scs: Move DEFINE_SCS macro into core code
scs: Remove references to asm/scs.h from core code
scs: Move scs_overflow_check() out of architecture code
arm64: scs: Use 'scs_sp' register alias for x18
scs: Move accounting into alloc/free functions
arm64: scs: Store absolute SCS stack pointer value in thread_info
efi/libstub: Disable Shadow Call Stack
arm64: scs: Add shadow stacks for SDEI
arm64: Implement Shadow Call Stack
arm64: Disable SCS for hypervisor code
arm64: vdso: Disable Shadow Call Stack
arm64: efi: Restore register x18 if it was corrupted
arm64: Preserve register x18 when CPU is suspended
arm64: Reserve register x18 from general allocation with SCS
scs: Disable when function graph tracing is enabled
scs: Add support for stack usage debugging
scs: Add page accounting for shadow call stack allocations
scs: Add support for Clang's Shadow Call Stack (SCS)

+406 -9
+6
Makefile
··· 866 866 KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone) 867 867 endif 868 868 869 + ifdef CONFIG_SHADOW_CALL_STACK 870 + CC_FLAGS_SCS := -fsanitize=shadow-call-stack 871 + KBUILD_CFLAGS += $(CC_FLAGS_SCS) 872 + export CC_FLAGS_SCS 873 + endif 874 + 869 875 # arch Makefile may override CC so keep this after arch Makefile is included 870 876 NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) 871 877
+25
arch/Kconfig
··· 533 533 about 20% of all kernel functions, which increases the kernel code 534 534 size by about 2%. 535 535 536 + config ARCH_SUPPORTS_SHADOW_CALL_STACK 537 + bool 538 + help 539 + An architecture should select this if it supports Clang's Shadow 540 + Call Stack and implements runtime support for shadow stack 541 + switching. 542 + 543 + config SHADOW_CALL_STACK 544 + bool "Clang Shadow Call Stack" 545 + depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK 546 + depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER 547 + help 548 + This option enables Clang's Shadow Call Stack, which uses a 549 + shadow stack to protect function return addresses from being 550 + overwritten by an attacker. More information can be found in 551 + Clang's documentation: 552 + 553 + https://clang.llvm.org/docs/ShadowCallStack.html 554 + 555 + Note that security guarantees in the kernel differ from the 556 + ones documented for user space. The kernel must store addresses 557 + of shadow stacks in memory, which means an attacker capable of 558 + reading and writing arbitrary memory may be able to locate them 559 + and hijack control flow by modifying the stacks. 560 + 536 561 config HAVE_ARCH_WITHIN_STACK_FRAMES 537 562 bool 538 563 help
+5
arch/arm64/Kconfig
··· 68 68 select ARCH_USE_QUEUED_SPINLOCKS 69 69 select ARCH_USE_SYM_ANNOTATIONS 70 70 select ARCH_SUPPORTS_MEMORY_FAILURE 71 + select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK 71 72 select ARCH_SUPPORTS_ATOMIC_RMW 72 73 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) 73 74 select ARCH_SUPPORTS_NUMA_BALANCING ··· 1026 1025 1027 1026 config ARCH_ENABLE_SPLIT_PMD_PTLOCK 1028 1027 def_bool y if PGTABLE_LEVELS > 2 1028 + 1029 + # Supported by clang >= 7.0 1030 + config CC_HAVE_SHADOW_CALL_STACK 1031 + def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18) 1029 1032 1030 1033 config SECCOMP 1031 1034 bool "Enable seccomp to safely compute untrusted bytecode"
+4
arch/arm64/Makefile
··· 87 87 88 88 KBUILD_CFLAGS += $(branch-prot-flags-y) 89 89 90 + ifeq ($(CONFIG_SHADOW_CALL_STACK), y) 91 + KBUILD_CFLAGS += -ffixed-x18 92 + endif 93 + 90 94 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) 91 95 KBUILD_CPPFLAGS += -mbig-endian 92 96 CHECKFLAGS += -D__AARCH64EB__
+1 -1
arch/arm64/include/asm/kvm_hyp.h
··· 12 12 #include <asm/alternative.h> 13 13 #include <asm/sysreg.h> 14 14 15 - #define __hyp_text __section(.hyp.text) notrace 15 + #define __hyp_text __section(.hyp.text) notrace __noscs 16 16 17 17 #define read_sysreg_elx(r,nvh,vh) \ 18 18 ({ \
+29
arch/arm64/include/asm/scs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_SCS_H 3 + #define _ASM_SCS_H 4 + 5 + #ifdef __ASSEMBLY__ 6 + 7 + #include <asm/asm-offsets.h> 8 + 9 + #ifdef CONFIG_SHADOW_CALL_STACK 10 + scs_sp .req x18 11 + 12 + .macro scs_load tsk, tmp 13 + ldr scs_sp, [\tsk, #TSK_TI_SCS_SP] 14 + .endm 15 + 16 + .macro scs_save tsk, tmp 17 + str scs_sp, [\tsk, #TSK_TI_SCS_SP] 18 + .endm 19 + #else 20 + .macro scs_load tsk, tmp 21 + .endm 22 + 23 + .macro scs_save tsk, tmp 24 + .endm 25 + #endif /* CONFIG_SHADOW_CALL_STACK */ 26 + 27 + #endif /* __ASSEMBLY __ */ 28 + 29 + #endif /* _ASM_SCS_H */
+1 -1
arch/arm64/include/asm/suspend.h
··· 2 2 #ifndef __ASM_SUSPEND_H 3 3 #define __ASM_SUSPEND_H 4 4 5 - #define NR_CTX_REGS 12 5 + #define NR_CTX_REGS 13 6 6 #define NR_CALLEE_SAVED_REGS 12 7 7 8 8 /*
+13
arch/arm64/include/asm/thread_info.h
··· 41 41 #endif 42 42 } preempt; 43 43 }; 44 + #ifdef CONFIG_SHADOW_CALL_STACK 45 + void *scs_base; 46 + void *scs_sp; 47 + #endif 44 48 }; 45 49 46 50 #define thread_saved_pc(tsk) \ ··· 104 100 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ 105 101 _TIF_SYSCALL_EMU) 106 102 103 + #ifdef CONFIG_SHADOW_CALL_STACK 104 + #define INIT_SCS \ 105 + .scs_base = init_shadow_call_stack, \ 106 + .scs_sp = init_shadow_call_stack, 107 + #else 108 + #define INIT_SCS 109 + #endif 110 + 107 111 #define INIT_THREAD_INFO(tsk) \ 108 112 { \ 109 113 .flags = _TIF_FOREIGN_FPSTATE, \ 110 114 .preempt_count = INIT_PREEMPT_COUNT, \ 111 115 .addr_limit = KERNEL_DS, \ 116 + INIT_SCS \ 112 117 } 113 118 114 119 #endif /* __ASM_THREAD_INFO_H */
+1
arch/arm64/kernel/Makefile
··· 63 63 obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o 64 64 obj-$(CONFIG_ARM64_SSBD) += ssbd.o 65 65 obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o 66 + obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o 66 67 67 68 obj-y += vdso/ probes/ 68 69 obj-$(CONFIG_COMPAT_VDSO) += vdso32/
+4
arch/arm64/kernel/asm-offsets.c
··· 34 34 #ifdef CONFIG_ARM64_SW_TTBR0_PAN 35 35 DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); 36 36 #endif 37 + #ifdef CONFIG_SHADOW_CALL_STACK 38 + DEFINE(TSK_TI_SCS_BASE, offsetof(struct task_struct, thread_info.scs_base)); 39 + DEFINE(TSK_TI_SCS_SP, offsetof(struct task_struct, thread_info.scs_sp)); 40 + #endif 37 41 DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); 38 42 #ifdef CONFIG_STACKPROTECTOR 39 43 DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary));
+10 -1
arch/arm64/kernel/efi-rt-wrapper.S
··· 34 34 ldp x29, x30, [sp], #32 35 35 b.ne 0f 36 36 ret 37 - 0: b efi_handle_corrupted_x18 // tail call 37 + 0: 38 + /* 39 + * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a 40 + * shadow stack pointer, which we need to restore before returning to 41 + * potentially instrumented code. This is safe because the wrapper is 42 + * called with preemption disabled and a separate shadow stack is used 43 + * for interrupts. 44 + */ 45 + mov x18, x2 46 + b efi_handle_corrupted_x18 // tail call 38 47 SYM_FUNC_END(__efi_rt_asm_wrapper)
+3 -2
arch/arm64/kernel/entry-ftrace.S
··· 23 23 * 24 24 * ... where <entry> is either ftrace_caller or ftrace_regs_caller. 25 25 * 26 - * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are 27 - * live, and x9-x18 are safe to clobber. 26 + * Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are 27 + * live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to 28 + * clobber. 28 29 * 29 30 * We save the callsite's context into a pt_regs before invoking any ftrace 30 31 * callbacks. So that we can get a sensible backtrace, we create a stack record
+35 -3
arch/arm64/kernel/entry.S
··· 23 23 #include <asm/mmu.h> 24 24 #include <asm/processor.h> 25 25 #include <asm/ptrace.h> 26 + #include <asm/scs.h> 26 27 #include <asm/thread_info.h> 27 28 #include <asm/asm-uaccess.h> 28 29 #include <asm/unistd.h> ··· 180 179 apply_ssbd 1, x22, x23 181 180 182 181 ptrauth_keys_install_kernel tsk, x20, x22, x23 182 + 183 + scs_load tsk, x20 183 184 .else 184 185 add x21, sp, #S_FRAME_SIZE 185 186 get_current_task tsk ··· 346 343 msr cntkctl_el1, x1 347 344 4: 348 345 #endif 346 + scs_save tsk, x0 347 + 349 348 /* No kernel C function calls after this as user keys are set. */ 350 349 ptrauth_keys_install_user tsk, x0, x1, x2 351 350 ··· 393 388 394 389 .macro irq_stack_entry 395 390 mov x19, sp // preserve the original sp 391 + #ifdef CONFIG_SHADOW_CALL_STACK 392 + mov x24, scs_sp // preserve the original shadow stack 393 + #endif 396 394 397 395 /* 398 396 * Compare sp with the base of the task stack. ··· 413 405 414 406 /* switch to the irq stack */ 415 407 mov sp, x26 408 + 409 + #ifdef CONFIG_SHADOW_CALL_STACK 410 + /* also switch to the irq shadow stack */ 411 + adr_this_cpu scs_sp, irq_shadow_call_stack, x26 412 + #endif 413 + 416 414 9998: 417 415 .endm 418 416 419 417 /* 420 - * x19 should be preserved between irq_stack_entry and 421 - * irq_stack_exit. 418 + * The callee-saved regs (x19-x29) should be preserved between 419 + * irq_stack_entry and irq_stack_exit, but note that kernel_entry 420 + * uses x20-x23 to store data for later use. 422 421 */ 423 422 .macro irq_stack_exit 424 423 mov sp, x19 424 + #ifdef CONFIG_SHADOW_CALL_STACK 425 + mov scs_sp, x24 426 + #endif 425 427 .endm 426 428 427 429 /* GPRs used by entry code */ ··· 920 902 mov sp, x9 921 903 msr sp_el0, x1 922 904 ptrauth_keys_install_kernel x1, x8, x9, x10 905 + scs_save x0, x8 906 + scs_load x1, x8 923 907 ret 924 908 SYM_FUNC_END(cpu_switch_to) 925 909 NOKPROBE(cpu_switch_to) ··· 1050 1030 1051 1031 mov x19, x1 1052 1032 1033 + #if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK) 1034 + ldrb w4, [x19, #SDEI_EVENT_PRIORITY] 1035 + #endif 1036 + 1053 1037 #ifdef CONFIG_VMAP_STACK 1054 1038 /* 1055 1039 * entry.S may have been using sp as a scratch register, find whether 1056 1040 * this is a normal or critical event and switch to the appropriate 1057 1041 * stack for this CPU. 1058 1042 */ 1059 - ldrb w4, [x19, #SDEI_EVENT_PRIORITY] 1060 1043 cbnz w4, 1f 1061 1044 ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6 1062 1045 b 2f ··· 1067 1044 2: mov x6, #SDEI_STACK_SIZE 1068 1045 add x5, x5, x6 1069 1046 mov sp, x5 1047 + #endif 1048 + 1049 + #ifdef CONFIG_SHADOW_CALL_STACK 1050 + /* Use a separate shadow call stack for normal and critical events */ 1051 + cbnz w4, 3f 1052 + adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal, tmp=x6 1053 + b 4f 1054 + 3: adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical, tmp=x6 1055 + 4: 1070 1056 #endif 1071 1057 1072 1058 /*
+6
arch/arm64/kernel/head.S
··· 28 28 #include <asm/pgtable-hwdef.h> 29 29 #include <asm/pgtable.h> 30 30 #include <asm/page.h> 31 + #include <asm/scs.h> 31 32 #include <asm/smp.h> 32 33 #include <asm/sysreg.h> 33 34 #include <asm/thread_info.h> ··· 434 433 stp xzr, x30, [sp, #-16]! 435 434 mov x29, sp 436 435 436 + #ifdef CONFIG_SHADOW_CALL_STACK 437 + adr_l scs_sp, init_shadow_call_stack // Set shadow call stack 438 + #endif 439 + 437 440 str_l x21, __fdt_pointer, x5 // Save FDT pointer 438 441 439 442 ldr_l x4, kimage_vaddr // Save the offset between ··· 750 745 ldr x2, [x0, #CPU_BOOT_TASK] 751 746 cbz x2, __secondary_too_slow 752 747 msr sp_el0, x2 748 + scs_load x2, x3 753 749 mov x29, #0 754 750 mov x30, #0 755 751
+16
arch/arm64/kernel/scs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Shadow Call Stack support. 4 + * 5 + * Copyright (C) 2019 Google LLC 6 + */ 7 + 8 + #include <linux/percpu.h> 9 + #include <linux/scs.h> 10 + 11 + DEFINE_SCS(irq_shadow_call_stack); 12 + 13 + #ifdef CONFIG_ARM_SDE_INTERFACE 14 + DEFINE_SCS(sdei_shadow_call_stack_normal); 15 + DEFINE_SCS(sdei_shadow_call_stack_critical); 16 + #endif
+1 -1
arch/arm64/kernel/vdso/Makefile
··· 29 29 ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 30 30 ccflags-y += -DDISABLE_BRANCH_PROFILING 31 31 32 - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os 32 + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) 33 33 KBUILD_CFLAGS += $(DISABLE_LTO) 34 34 KASAN_SANITIZE := n 35 35 UBSAN_SANITIZE := n
+14
arch/arm64/mm/proc.S
··· 58 58 * cpu_do_suspend - save CPU registers context 59 59 * 60 60 * x0: virtual address of context pointer 61 + * 62 + * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>. 61 63 */ 62 64 SYM_FUNC_START(cpu_do_suspend) 63 65 mrs x2, tpidr_el0 ··· 84 82 stp x8, x9, [x0, #48] 85 83 stp x10, x11, [x0, #64] 86 84 stp x12, x13, [x0, #80] 85 + /* 86 + * Save x18 as it may be used as a platform register, e.g. by shadow 87 + * call stack. 88 + */ 89 + str x18, [x0, #96] 87 90 ret 88 91 SYM_FUNC_END(cpu_do_suspend) 89 92 ··· 105 98 ldp x9, x10, [x0, #48] 106 99 ldp x11, x12, [x0, #64] 107 100 ldp x13, x14, [x0, #80] 101 + /* 102 + * Restore x18, as it may be used as a platform register, and clear 103 + * the buffer to minimize the risk of exposure when used for shadow 104 + * call stack. 105 + */ 106 + ldr x18, [x0, #96] 107 + str xzr, [x0, #96] 108 108 msr tpidr_el0, x2 109 109 msr tpidrro_el0, x3 110 110 msr contextidr_el1, x4
+6
drivers/base/node.c
··· 415 415 "Node %d AnonPages: %8lu kB\n" 416 416 "Node %d Shmem: %8lu kB\n" 417 417 "Node %d KernelStack: %8lu kB\n" 418 + #ifdef CONFIG_SHADOW_CALL_STACK 419 + "Node %d ShadowCallStack:%8lu kB\n" 420 + #endif 418 421 "Node %d PageTables: %8lu kB\n" 419 422 "Node %d NFS_Unstable: %8lu kB\n" 420 423 "Node %d Bounce: %8lu kB\n" ··· 441 438 nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), 442 439 nid, K(i.sharedram), 443 440 nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), 441 + #ifdef CONFIG_SHADOW_CALL_STACK 442 + nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB), 443 + #endif 444 444 nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), 445 445 nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), 446 446 nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
+3
drivers/firmware/efi/libstub/Makefile
··· 32 32 $(call cc-option,-fno-stack-protector) \ 33 33 -D__DISABLE_EXPORTS 34 34 35 + # remove SCS flags from all objects in this directory 36 + KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) 37 + 35 38 GCOV_PROFILE := n 36 39 KASAN_SANITIZE := n 37 40 UBSAN_SANITIZE := n
+4
fs/proc/meminfo.c
··· 103 103 show_val_kb(m, "SUnreclaim: ", sunreclaim); 104 104 seq_printf(m, "KernelStack: %8lu kB\n", 105 105 global_zone_page_state(NR_KERNEL_STACK_KB)); 106 + #ifdef CONFIG_SHADOW_CALL_STACK 107 + seq_printf(m, "ShadowCallStack:%8lu kB\n", 108 + global_zone_page_state(NR_KERNEL_SCS_KB)); 109 + #endif 106 110 show_val_kb(m, "PageTables: ", 107 111 global_zone_page_state(NR_PAGETABLE)); 108 112
+4
include/linux/compiler-clang.h
··· 42 42 * compilers, like ICC. 43 43 */ 44 44 #define barrier() __asm__ __volatile__("" : : : "memory") 45 + 46 + #if __has_feature(shadow_call_stack) 47 + # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) 48 + #endif
+4
include/linux/compiler_types.h
··· 193 193 # define randomized_struct_fields_end 194 194 #endif 195 195 196 + #ifndef __noscs 197 + # define __noscs 198 + #endif 199 + 196 200 #ifndef asm_volatile_goto 197 201 #define asm_volatile_goto(x...) asm goto(x) 198 202 #endif
+3
include/linux/mmzone.h
··· 156 156 NR_MLOCK, /* mlock()ed pages found and moved off LRU */ 157 157 NR_PAGETABLE, /* used for pagetables */ 158 158 NR_KERNEL_STACK_KB, /* measured in KiB */ 159 + #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) 160 + NR_KERNEL_SCS_KB, /* measured in KiB */ 161 + #endif 159 162 /* Second 128 byte cacheline */ 160 163 NR_BOUNCE, 161 164 #if IS_ENABLED(CONFIG_ZSMALLOC)
+72
include/linux/scs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Shadow Call Stack support. 4 + * 5 + * Copyright (C) 2019 Google LLC 6 + */ 7 + 8 + #ifndef _LINUX_SCS_H 9 + #define _LINUX_SCS_H 10 + 11 + #include <linux/gfp.h> 12 + #include <linux/poison.h> 13 + #include <linux/sched.h> 14 + #include <linux/sizes.h> 15 + 16 + #ifdef CONFIG_SHADOW_CALL_STACK 17 + 18 + /* 19 + * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit 20 + * architecture) provided ~40% safety margin on stack usage while keeping 21 + * memory allocation overhead reasonable. 22 + */ 23 + #define SCS_SIZE SZ_1K 24 + #define GFP_SCS (GFP_KERNEL | __GFP_ZERO) 25 + 26 + /* An illegal pointer value to mark the end of the shadow stack. */ 27 + #define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA) 28 + 29 + /* Allocate a static per-CPU shadow stack */ 30 + #define DEFINE_SCS(name) \ 31 + DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \ 32 + 33 + #define task_scs(tsk) (task_thread_info(tsk)->scs_base) 34 + #define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp) 35 + 36 + void scs_init(void); 37 + int scs_prepare(struct task_struct *tsk, int node); 38 + void scs_release(struct task_struct *tsk); 39 + 40 + static inline void scs_task_reset(struct task_struct *tsk) 41 + { 42 + /* 43 + * Reset the shadow stack to the base address in case the task 44 + * is reused. 45 + */ 46 + task_scs_sp(tsk) = task_scs(tsk); 47 + } 48 + 49 + static inline unsigned long *__scs_magic(void *s) 50 + { 51 + return (unsigned long *)(s + SCS_SIZE) - 1; 52 + } 53 + 54 + static inline bool task_scs_end_corrupted(struct task_struct *tsk) 55 + { 56 + unsigned long *magic = __scs_magic(task_scs(tsk)); 57 + unsigned long sz = task_scs_sp(tsk) - task_scs(tsk); 58 + 59 + return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC; 60 + } 61 + 62 + #else /* CONFIG_SHADOW_CALL_STACK */ 63 + 64 + static inline void scs_init(void) {} 65 + static inline void scs_task_reset(struct task_struct *tsk) {} 66 + static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; } 67 + static inline void scs_release(struct task_struct *tsk) {} 68 + static inline bool task_scs_end_corrupted(struct task_struct *tsk) { return false; } 69 + 70 + #endif /* CONFIG_SHADOW_CALL_STACK */ 71 + 72 + #endif /* _LINUX_SCS_H */
+8
init/init_task.c
··· 11 11 #include <linux/mm.h> 12 12 #include <linux/audit.h> 13 13 #include <linux/numa.h> 14 + #include <linux/scs.h> 14 15 15 16 #include <asm/pgtable.h> 16 17 #include <linux/uaccess.h> ··· 50 49 .siglock = __SPIN_LOCK_UNLOCKED(init_sighand.siglock), 51 50 .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh), 52 51 }; 52 + 53 + #ifdef CONFIG_SHADOW_CALL_STACK 54 + unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] 55 + __init_task_data = { 56 + [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC 57 + }; 58 + #endif 53 59 54 60 /* 55 61 * Set up the first task table, touch at your own risk!. Base=0,
+1
kernel/Makefile
··· 103 103 obj-$(CONFIG_IRQ_WORK) += irq_work.o 104 104 obj-$(CONFIG_CPU_PM) += cpu_pm.o 105 105 obj-$(CONFIG_BPF) += bpf/ 106 + obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o 106 107 107 108 obj-$(CONFIG_PERF_EVENTS) += events/ 108 109
+9
kernel/fork.c
··· 94 94 #include <linux/thread_info.h> 95 95 #include <linux/stackleak.h> 96 96 #include <linux/kasan.h> 97 + #include <linux/scs.h> 97 98 98 99 #include <asm/pgtable.h> 99 100 #include <asm/pgalloc.h> ··· 457 456 458 457 void free_task(struct task_struct *tsk) 459 458 { 459 + scs_release(tsk); 460 + 460 461 #ifndef CONFIG_THREAD_INFO_IN_TASK 461 462 /* 462 463 * The task is finally done with both the stack and thread_info, ··· 843 840 NULL, free_vm_stack_cache); 844 841 #endif 845 842 843 + scs_init(); 844 + 846 845 lockdep_init_task(&init_task); 847 846 uprobes_init(); 848 847 } ··· 901 896 refcount_set(&tsk->stack_refcount, 1); 902 897 #endif 903 898 899 + if (err) 900 + goto free_stack; 901 + 902 + err = scs_prepare(tsk, node); 904 903 if (err) 905 904 goto free_stack; 906 905
+5
kernel/sched/core.c
··· 11 11 #include <linux/nospec.h> 12 12 13 13 #include <linux/kcov.h> 14 + #include <linux/scs.h> 14 15 15 16 #include <asm/switch_to.h> 16 17 #include <asm/tlb.h> ··· 3878 3877 #ifdef CONFIG_SCHED_STACK_END_CHECK 3879 3878 if (task_stack_end_corrupted(prev)) 3880 3879 panic("corrupted stack end detected inside scheduler\n"); 3880 + 3881 + if (task_scs_end_corrupted(prev)) 3882 + panic("corrupted shadow stack detected inside scheduler\n"); 3881 3883 #endif 3882 3884 3883 3885 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP ··· 6044 6040 idle->se.exec_start = sched_clock(); 6045 6041 idle->flags |= PF_IDLE; 6046 6042 6043 + scs_task_reset(idle); 6047 6044 kasan_unpoison_task_stack(idle); 6048 6045 6049 6046 #ifdef CONFIG_SMP
+104
kernel/scs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Shadow Call Stack support. 4 + * 5 + * Copyright (C) 2019 Google LLC 6 + */ 7 + 8 + #include <linux/kasan.h> 9 + #include <linux/mm.h> 10 + #include <linux/scs.h> 11 + #include <linux/slab.h> 12 + #include <linux/vmstat.h> 13 + 14 + static struct kmem_cache *scs_cache; 15 + 16 + static void __scs_account(void *s, int account) 17 + { 18 + struct page *scs_page = virt_to_page(s); 19 + 20 + mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB, 21 + account * (SCS_SIZE / SZ_1K)); 22 + } 23 + 24 + static void *scs_alloc(int node) 25 + { 26 + void *s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node); 27 + 28 + if (!s) 29 + return NULL; 30 + 31 + *__scs_magic(s) = SCS_END_MAGIC; 32 + 33 + /* 34 + * Poison the allocation to catch unintentional accesses to 35 + * the shadow stack when KASAN is enabled. 36 + */ 37 + kasan_poison_object_data(scs_cache, s); 38 + __scs_account(s, 1); 39 + return s; 40 + } 41 + 42 + static void scs_free(void *s) 43 + { 44 + __scs_account(s, -1); 45 + kasan_unpoison_object_data(scs_cache, s); 46 + kmem_cache_free(scs_cache, s); 47 + } 48 + 49 + void __init scs_init(void) 50 + { 51 + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL); 52 + } 53 + 54 + int scs_prepare(struct task_struct *tsk, int node) 55 + { 56 + void *s = scs_alloc(node); 57 + 58 + if (!s) 59 + return -ENOMEM; 60 + 61 + task_scs(tsk) = task_scs_sp(tsk) = s; 62 + return 0; 63 + } 64 + 65 + static void scs_check_usage(struct task_struct *tsk) 66 + { 67 + static unsigned long highest; 68 + 69 + unsigned long *p, prev, curr = highest, used = 0; 70 + 71 + if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE)) 72 + return; 73 + 74 + for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) { 75 + if (!READ_ONCE_NOCHECK(*p)) 76 + break; 77 + used++; 78 + } 79 + 80 + while (used > curr) { 81 + prev = cmpxchg_relaxed(&highest, curr, used); 82 + 83 + if (prev == curr) { 84 + pr_info("%s (%d): highest shadow stack usage: %lu bytes\n", 85 + tsk->comm, task_pid_nr(tsk), used); 86 + break; 87 + } 88 + 89 + curr = prev; 90 + } 91 + } 92 + 93 + void scs_release(struct task_struct *tsk) 94 + { 95 + void *s = task_scs(tsk); 96 + 97 + if (!s) 98 + return; 99 + 100 + WARN(task_scs_end_corrupted(tsk), 101 + "corrupted shadow stack detected when freeing task\n"); 102 + scs_check_usage(tsk); 103 + scs_free(s); 104 + }
+6
mm/page_alloc.c
··· 5411 5411 " managed:%lukB" 5412 5412 " mlocked:%lukB" 5413 5413 " kernel_stack:%lukB" 5414 + #ifdef CONFIG_SHADOW_CALL_STACK 5415 + " shadow_call_stack:%lukB" 5416 + #endif 5414 5417 " pagetables:%lukB" 5415 5418 " bounce:%lukB" 5416 5419 " free_pcp:%lukB" ··· 5436 5433 K(zone_managed_pages(zone)), 5437 5434 K(zone_page_state(zone, NR_MLOCK)), 5438 5435 zone_page_state(zone, NR_KERNEL_STACK_KB), 5436 + #ifdef CONFIG_SHADOW_CALL_STACK 5437 + zone_page_state(zone, NR_KERNEL_SCS_KB), 5438 + #endif 5439 5439 K(zone_page_state(zone, NR_PAGETABLE)), 5440 5440 K(zone_page_state(zone, NR_BOUNCE)), 5441 5441 K(free_pcp),
+3
mm/vmstat.c
··· 1119 1119 "nr_mlock", 1120 1120 "nr_page_table_pages", 1121 1121 "nr_kernel_stack", 1122 + #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) 1123 + "nr_shadow_call_stack", 1124 + #endif 1122 1125 "nr_bounce", 1123 1126 #if IS_ENABLED(CONFIG_ZSMALLOC) 1124 1127 "nr_zspages",