Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/alternative: Use .ibt_endbr_seal to seal indirect calls

Objtool's --ibt option generates .ibt_endbr_seal which lists
superfluous ENDBR instructions. That is those instructions for which
the function is never indirectly called.

Overwrite these ENDBR instructions with a NOP4 such that these
function can never be indirect called, reducing the number of viable
ENDBR targets in the kernel.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/20220308154319.822545231@infradead.org

+117 -13
+4
arch/um/kernel/um_arch.c
··· 424 424 os_check_bugs(); 425 425 } 426 426 427 + void apply_ibt_endbr(s32 *start, s32 *end) 428 + { 429 + } 430 + 427 431 void apply_retpolines(s32 *start, s32 *end) 428 432 { 429 433 }
+8 -1
arch/x86/Kconfig
··· 1873 1873 config X86_KERNEL_IBT 1874 1874 prompt "Indirect Branch Tracking" 1875 1875 bool 1876 - depends on X86_64 && CC_HAS_IBT 1876 + depends on X86_64 && CC_HAS_IBT && STACK_VALIDATION 1877 1877 help 1878 1878 Build the kernel with support for Indirect Branch Tracking, a 1879 1879 hardware support course-grain forward-edge Control Flow Integrity 1880 1880 protection. It enforces that all indirect calls must land on 1881 1881 an ENDBR instruction, as such, the compiler will instrument the 1882 1882 code with them to make this happen. 1883 + 1884 + In addition to building the kernel with IBT, seal all functions that 1885 + are not indirect call targets, avoiding them ever becomming one. 1886 + 1887 + This requires LTO like objtool runs and will slow down the build. It 1888 + does significantly reduce the number of ENDBR instructions in the 1889 + kernel image. 1883 1890 1884 1891 config X86_INTEL_MEMORY_PROTECTION_KEYS 1885 1892 prompt "Memory Protection Keys"
+1
arch/x86/include/asm/alternative.h
··· 76 76 extern void alternative_instructions(void); 77 77 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); 78 78 extern void apply_retpolines(s32 *start, s32 *end); 79 + extern void apply_ibt_endbr(s32 *start, s32 *end); 79 80 80 81 struct module; 81 82
+12
arch/x86/include/asm/ibt.h
··· 46 46 return endbr; 47 47 } 48 48 49 + static inline __attribute_const__ u32 gen_endbr_poison(void) 50 + { 51 + /* 52 + * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it 53 + * will be unique to (former) ENDBR sites. 54 + */ 55 + return 0x001f0f66; /* osp nopl (%rax) */ 56 + } 57 + 49 58 static inline bool is_endbr(u32 val) 50 59 { 60 + if (val == gen_endbr_poison()) 61 + return true; 62 + 51 63 val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ 52 64 return val == gen_endbr(); 53 65 }
+39
arch/x86/kernel/alternative.c
··· 115 115 } 116 116 117 117 extern s32 __retpoline_sites[], __retpoline_sites_end[]; 118 + extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; 118 119 extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 119 120 extern s32 __smp_locks[], __smp_locks_end[]; 120 121 void text_poke_early(void *addr, const void *opcode, size_t len); ··· 513 512 514 513 #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ 515 514 515 + #ifdef CONFIG_X86_KERNEL_IBT 516 + 517 + /* 518 + * Generated by: objtool --ibt 519 + */ 520 + void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) 521 + { 522 + s32 *s; 523 + 524 + for (s = start; s < end; s++) { 525 + u32 endbr, poison = gen_endbr_poison(); 526 + void *addr = (void *)s + *s; 527 + 528 + if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) 529 + continue; 530 + 531 + if (WARN_ON_ONCE(!is_endbr(endbr))) 532 + continue; 533 + 534 + DPRINTK("ENDBR at: %pS (%px)", addr, addr); 535 + 536 + /* 537 + * When we have IBT, the lack of ENDBR will trigger #CP 538 + */ 539 + DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); 540 + DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); 541 + text_poke_early(addr, &poison, 4); 542 + } 543 + } 544 + 545 + #else 546 + 547 + void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { } 548 + 549 + #endif /* CONFIG_X86_KERNEL_IBT */ 550 + 516 551 #ifdef CONFIG_SMP 517 552 static void alternatives_smp_lock(const s32 *start, const s32 *end, 518 553 u8 *text, u8 *text_end) ··· 866 829 * alternatives can be overwritten by their immediate fragments. 867 830 */ 868 831 apply_alternatives(__alt_instructions, __alt_instructions_end); 832 + 833 + apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); 869 834 870 835 #ifdef CONFIG_SMP 871 836 /* Patch to UP if other cpus not imminent. */
+7 -1
arch/x86/kernel/module.c
··· 253 253 { 254 254 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, 255 255 *para = NULL, *orc = NULL, *orc_ip = NULL, 256 - *retpolines = NULL; 256 + *retpolines = NULL, *ibt_endbr = NULL; 257 257 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 258 258 259 259 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { ··· 271 271 orc_ip = s; 272 272 if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) 273 273 retpolines = s; 274 + if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) 275 + ibt_endbr = s; 274 276 } 275 277 276 278 /* ··· 291 289 /* patch .altinstructions */ 292 290 void *aseg = (void *)alt->sh_addr; 293 291 apply_alternatives(aseg, aseg + alt->sh_size); 292 + } 293 + if (ibt_endbr) { 294 + void *iseg = (void *)ibt_endbr->sh_addr; 295 + apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); 294 296 } 295 297 if (locks && text) { 296 298 void *lseg = (void *)locks->sh_addr;
+38 -9
scripts/Makefile.build
··· 86 86 targets-for-builtin += $(obj)/built-in.a 87 87 endif 88 88 89 - targets-for-modules := $(patsubst %.o, %.mod, $(filter %.o, $(obj-m))) 89 + targets-for-modules := 90 90 91 91 ifdef CONFIG_LTO_CLANG 92 92 targets-for-modules += $(patsubst %.o, %.lto.o, $(filter %.o, $(obj-m))) 93 93 endif 94 + 95 + ifdef CONFIG_X86_KERNEL_IBT 96 + targets-for-modules += $(patsubst %.o, %.objtool, $(filter %.o, $(obj-m))) 97 + endif 98 + 99 + targets-for-modules += $(patsubst %.o, %.mod, $(filter %.o, $(obj-m))) 94 100 95 101 ifdef need-modorder 96 102 targets-for-modules += $(obj)/modules.order ··· 236 230 objtool_args = \ 237 231 $(if $(CONFIG_UNWINDER_ORC),orc generate,check) \ 238 232 $(if $(part-of-module), --module) \ 233 + $(if $(CONFIG_X86_KERNEL_IBT), --lto --ibt) \ 239 234 $(if $(CONFIG_FRAME_POINTER),, --no-fp) \ 240 235 $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\ 241 236 $(if $(CONFIG_RETPOLINE), --retpoline) \ ··· 244 237 $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ 245 238 $(if $(CONFIG_SLS), --sls) 246 239 247 - cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool_args) $@) 248 - cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) 240 + cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool_args) $(@:.objtool=.o)) 241 + cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$(@:.objtool=.o): $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) 249 242 250 243 endif # CONFIG_STACK_VALIDATION 251 244 ··· 253 246 254 247 # Skip objtool for LLVM bitcode 255 248 $(obj)/%.o: objtool-enabled := 249 + 250 + # objtool was skipped for LLVM bitcode, run it now that we have compiled 251 + # modules into native code 252 + $(obj)/%.lto.o: objtool-enabled = y 253 + $(obj)/%.lto.o: part-of-module := y 254 + 255 + else ifdef CONFIG_X86_KERNEL_IBT 256 + 257 + # Skip objtool on individual files 258 + $(obj)/%.o: objtool-enabled := 259 + 260 + # instead run objtool on the module as a whole, right before 261 + # the final link pass with the linker script. 262 + $(obj)/%.objtool: objtool-enabled = y 263 + $(obj)/%.objtool: part-of-module := y 256 264 257 265 else 258 266 ··· 314 292 # Module .o files may contain LLVM bitcode, compile them into native code 315 293 # before ELF processing 316 294 quiet_cmd_cc_lto_link_modules = LTO [M] $@ 317 - cmd_cc_lto_link_modules = \ 295 + cmd_cc_lto_link_modules = \ 318 296 $(LD) $(ld_flags) -r -o $@ \ 319 297 $(shell [ -s $(@:.lto.o=.o.symversions) ] && \ 320 298 echo -T $(@:.lto.o=.o.symversions)) \ 321 299 --whole-archive $(filter-out FORCE,$^) \ 322 300 $(cmd_objtool) 323 - 324 - # objtool was skipped for LLVM bitcode, run it now that we have compiled 325 - # modules into native code 326 - $(obj)/%.lto.o: objtool-enabled = y 327 - $(obj)/%.lto.o: part-of-module := y 328 301 329 302 $(obj)/%.lto.o: $(obj)/%.o FORCE 330 303 $(call if_changed,cc_lto_link_modules) ··· 332 315 333 316 $(obj)/%.mod: $(obj)/%$(mod-prelink-ext).o FORCE 334 317 $(call if_changed,mod) 318 + 319 + # 320 + # Since objtool will re-write the file it will change the timestamps, therefore 321 + # it is critical that the %.objtool file gets a timestamp *after* objtool runs. 322 + # 323 + # Additionally, care must be had with ordering this rule against the other rules 324 + # that take %.o as a dependency. 325 + # 326 + cmd_objtool_mod = true $(cmd_objtool) ; touch $@ 327 + 328 + $(obj)/%.objtool: $(obj)/%$(mod-prelink-ext).o FORCE 329 + $(call if_changed,objtool_mod) 335 330 336 331 quiet_cmd_cc_lst_c = MKLST $@ 337 332 cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \