Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'clang-lto-v5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull clang LTO updates from Kees Cook:
"Clang Link Time Optimization.

This is built on the work done preparing for LTO by arm64 folks,
tracing folks, etc. This includes the core changes as well as the
remaining pieces for arm64 (LTO has been the default build method on
Android for about 3 years now, as it is the prerequisite for the
Control Flow Integrity protections).

While x86 LTO enablement is done, it depends on some pending objtool
clean-ups. It's possible that I'll send a "part 2" pull request for
LTO that includes x86 support.

For merge log posterity, and as detailed in commit dc5723b02e52
("kbuild: add support for Clang LTO"), here is the lt;dr to do an LTO
build:

make LLVM=1 LLVM_IAS=1 defconfig
scripts/config -e LTO_CLANG_THIN
make LLVM=1 LLVM_IAS=1

(To do a cross-compile of arm64, add "CROSS_COMPILE=aarch64-linux-gnu-"
and "ARCH=arm64" to the "make" command lines.)

Summary:

- Clang LTO build infrastructure and arm64-specific enablement (Sami
Tolvanen)

- Recursive build CC_FLAGS_LTO fix (Alexander Lobakin)"

* tag 'clang-lto-v5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
kbuild: prevent CC_FLAGS_LTO self-bloating on recursive rebuilds
arm64: allow LTO to be selected
arm64: disable recordmcount with DYNAMIC_FTRACE_WITH_REGS
arm64: vdso: disable LTO
drivers/misc/lkdtm: disable LTO for rodata.o
efi/libstub: disable LTO
scripts/mod: disable LTO for empty.c
modpost: lto: strip .lto from module names
PCI: Fix PREL32 relocations for LTO
init: lto: fix PREL32 relocations
init: lto: ensure initcall ordering
kbuild: lto: add a default list of used symbols
kbuild: lto: merge module sections
kbuild: lto: limit inlining
kbuild: lto: fix module versioning
kbuild: add support for Clang LTO
tracing: move function tracer options to Kconfig

+704 -59
+1
.gitignore
··· 42 42 *.so.dbg 43 43 *.su 44 44 *.symtypes 45 + *.symversions 45 46 *.tab.[ch] 46 47 *.tar 47 48 *.xz
+30 -15
Makefile
··· 853 853 export DEBUG_CFLAGS 854 854 855 855 ifdef CONFIG_FUNCTION_TRACER 856 - ifdef CONFIG_FTRACE_MCOUNT_RECORD 857 - # gcc 5 supports generating the mcount tables directly 858 - ifeq ($(call cc-option-yn,-mrecord-mcount),y) 859 - CC_FLAGS_FTRACE += -mrecord-mcount 860 - export CC_USING_RECORD_MCOUNT := 1 861 - endif 856 + ifdef CONFIG_FTRACE_MCOUNT_USE_CC 857 + CC_FLAGS_FTRACE += -mrecord-mcount 862 858 ifdef CONFIG_HAVE_NOP_MCOUNT 863 859 ifeq ($(call cc-option-yn, -mnop-mcount),y) 864 860 CC_FLAGS_FTRACE += -mnop-mcount 865 861 CC_FLAGS_USING += -DCC_USING_NOP_MCOUNT 866 862 endif 863 + endif 864 + endif 865 + ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT 866 + ifdef CONFIG_HAVE_C_RECORDMCOUNT 867 + BUILD_C_RECORDMCOUNT := y 868 + export BUILD_C_RECORDMCOUNT 867 869 endif 868 870 endif 869 871 ifdef CONFIG_HAVE_FENTRY ··· 877 875 export CC_FLAGS_FTRACE 878 876 KBUILD_CFLAGS += $(CC_FLAGS_FTRACE) $(CC_FLAGS_USING) 879 877 KBUILD_AFLAGS += $(CC_FLAGS_USING) 880 - ifdef CONFIG_DYNAMIC_FTRACE 881 - ifdef CONFIG_HAVE_C_RECORDMCOUNT 882 - BUILD_C_RECORDMCOUNT := y 883 - export BUILD_C_RECORDMCOUNT 884 - endif 885 - endif 886 878 endif 887 879 888 880 # We trigger additional mismatches with less inlining ··· 893 897 CC_FLAGS_SCS := -fsanitize=shadow-call-stack 894 898 KBUILD_CFLAGS += $(CC_FLAGS_SCS) 895 899 export CC_FLAGS_SCS 900 + endif 901 + 902 + ifdef CONFIG_LTO_CLANG 903 + ifdef CONFIG_LTO_CLANG_THIN 904 + CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit 905 + KBUILD_LDFLAGS += --thinlto-cache-dir=$(extmod-prefix).thinlto-cache 906 + else 907 + CC_FLAGS_LTO := -flto 908 + endif 909 + CC_FLAGS_LTO += -fvisibility=hidden 910 + 911 + # Limit inlining across translation units to reduce binary size 912 + KBUILD_LDFLAGS += -mllvm -import-instr-limit=5 913 + endif 914 + 915 + ifdef CONFIG_LTO 916 + KBUILD_CFLAGS += $(CC_FLAGS_LTO) 917 + export CC_FLAGS_LTO 896 918 endif 897 919 898 920 ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B ··· 1507 1493 *.spec 1508 1494 1509 1495 # Directories & files removed with 'make distclean' 1510 - DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS 1496 + DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS .thinlto-cache 1511 1497 1512 1498 # clean - Delete most, but leave enough to build external modules 1513 1499 # ··· 1753 1739 1754 1740 clean-dirs := $(KBUILD_EXTMOD) 1755 1741 clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps \ 1756 - $(KBUILD_EXTMOD)/compile_commands.json 1742 + $(KBUILD_EXTMOD)/compile_commands.json $(KBUILD_EXTMOD)/.thinlto-cache 1757 1743 1758 1744 PHONY += help 1759 1745 help: ··· 1850 1836 -o -name '.tmp_*.o.*' \ 1851 1837 -o -name '*.c.[012]*.*' \ 1852 1838 -o -name '*.ll' \ 1853 - -o -name '*.gcno' \) -type f -print | xargs rm -f 1839 + -o -name '*.gcno' \ 1840 + -o -name '*.*.symversions' \) -type f -print | xargs rm -f 1854 1841 1855 1842 # Generate tags for editors 1856 1843 # ---------------------------------------------------------------------------
+90
arch/Kconfig
··· 603 603 reading and writing arbitrary memory may be able to locate them 604 604 and hijack control flow by modifying the stacks. 605 605 606 + config LTO 607 + bool 608 + help 609 + Selected if the kernel will be built using the compiler's LTO feature. 610 + 611 + config LTO_CLANG 612 + bool 613 + select LTO 614 + help 615 + Selected if the kernel will be built using Clang's LTO feature. 616 + 617 + config ARCH_SUPPORTS_LTO_CLANG 618 + bool 619 + help 620 + An architecture should select this option if it supports: 621 + - compiling with Clang, 622 + - compiling inline assembly with Clang's integrated assembler, 623 + - and linking with LLD. 624 + 625 + config ARCH_SUPPORTS_LTO_CLANG_THIN 626 + bool 627 + help 628 + An architecture should select this option if it can support Clang's 629 + ThinLTO mode. 630 + 631 + config HAS_LTO_CLANG 632 + def_bool y 633 + # Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510 634 + depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD 635 + depends on $(success,test $(LLVM) -eq 1) 636 + depends on $(success,test $(LLVM_IAS) -eq 1) 637 + depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm) 638 + depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm) 639 + depends on ARCH_SUPPORTS_LTO_CLANG 640 + depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT 641 + depends on !KASAN 642 + depends on !GCOV_KERNEL 643 + help 644 + The compiler and Kconfig options support building with Clang's 645 + LTO. 646 + 647 + choice 648 + prompt "Link Time Optimization (LTO)" 649 + default LTO_NONE 650 + help 651 + This option enables Link Time Optimization (LTO), which allows the 652 + compiler to optimize binaries globally. 653 + 654 + If unsure, select LTO_NONE. Note that LTO is very resource-intensive 655 + so it's disabled by default. 656 + 657 + config LTO_NONE 658 + bool "None" 659 + help 660 + Build the kernel normally, without Link Time Optimization (LTO). 661 + 662 + config LTO_CLANG_FULL 663 + bool "Clang Full LTO (EXPERIMENTAL)" 664 + depends on HAS_LTO_CLANG 665 + depends on !COMPILE_TEST 666 + select LTO_CLANG 667 + help 668 + This option enables Clang's full Link Time Optimization (LTO), which 669 + allows the compiler to optimize the kernel globally. If you enable 670 + this option, the compiler generates LLVM bitcode instead of ELF 671 + object files, and the actual compilation from bitcode happens at 672 + the LTO link step, which may take several minutes depending on the 673 + kernel configuration. More information can be found from LLVM's 674 + documentation: 675 + 676 + https://llvm.org/docs/LinkTimeOptimization.html 677 + 678 + During link time, this option can use a large amount of RAM, and 679 + may take much longer than the ThinLTO option. 680 + 681 + config LTO_CLANG_THIN 682 + bool "Clang ThinLTO (EXPERIMENTAL)" 683 + depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN 684 + select LTO_CLANG 685 + help 686 + This option enables Clang's ThinLTO, which allows for parallel 687 + optimization and faster incremental compiles compared to the 688 + CONFIG_LTO_CLANG_FULL option. More information can be found 689 + from Clang's documentation: 690 + 691 + https://clang.llvm.org/docs/ThinLTO.html 692 + 693 + If unsure, say Y. 694 + endchoice 695 + 606 696 config HAVE_ARCH_WITHIN_STACK_FRAMES 607 697 bool 608 698 help
+4
arch/arm64/Kconfig
··· 73 73 select ARCH_SUPPORTS_DEBUG_PAGEALLOC 74 74 select ARCH_SUPPORTS_MEMORY_FAILURE 75 75 select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK 76 + select ARCH_SUPPORTS_LTO_CLANG if CPU_LITTLE_ENDIAN 77 + select ARCH_SUPPORTS_LTO_CLANG_THIN 76 78 select ARCH_SUPPORTS_ATOMIC_RMW 77 79 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) 78 80 select ARCH_SUPPORTS_NUMA_BALANCING ··· 164 162 select HAVE_DYNAMIC_FTRACE 165 163 select HAVE_DYNAMIC_FTRACE_WITH_REGS \ 166 164 if $(cc-option,-fpatchable-function-entry=2) 165 + select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \ 166 + if DYNAMIC_FTRACE_WITH_REGS 167 167 select HAVE_EFFICIENT_UNALIGNED_ACCESS 168 168 select HAVE_FAST_GUP 169 169 select HAVE_FTRACE_MCOUNT_RECORD
+2 -1
arch/arm64/kernel/vdso/Makefile
··· 29 29 ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 30 30 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO 31 31 32 - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) 32 + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \ 33 + $(CC_FLAGS_LTO) 33 34 KASAN_SANITIZE := n 34 35 UBSAN_SANITIZE := n 35 36 OBJECT_FILES_NON_STANDARD := y
+2
drivers/firmware/efi/libstub/Makefile
··· 38 38 39 39 # remove SCS flags from all objects in this directory 40 40 KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) 41 + # disable LTO 42 + KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) 41 43 42 44 GCOV_PROFILE := n 43 45 # Sanitizer runtimes are unavailable and cannot be linked here.
+1
drivers/misc/lkdtm/Makefile
··· 16 16 KASAN_SANITIZE_rodata.o := n 17 17 KASAN_SANITIZE_stackleak.o := n 18 18 KCOV_INSTRUMENT_rodata.o := n 19 + CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO) 19 20 20 21 OBJCOPYFLAGS := 21 22 OBJCOPYFLAGS_rodata_objcopy.o := \
+4 -1
include/asm-generic/vmlinux.lds.h
··· 90 90 * .data. We don't want to pull in .data..other sections, which Linux 91 91 * has defined. Same for text and bss. 92 92 * 93 + * With LTO_CLANG, the linker also splits sections by default, so we need 94 + * these macros to combine the sections during the final link. 95 + * 93 96 * RODATA_MAIN is not used because existing code already defines .rodata.x 94 97 * sections to be brought in with rodata. 95 98 */ 96 - #ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION 99 + #if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) 97 100 #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* 98 101 #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$Lubsan_* 99 102 #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
+70 -9
include/linux/init.h
··· 184 184 * as KEEP() in the linker script. 185 185 */ 186 186 187 + /* Format: <modname>__<counter>_<line>_<fn> */ 188 + #define __initcall_id(fn) \ 189 + __PASTE(__KBUILD_MODNAME, \ 190 + __PASTE(__, \ 191 + __PASTE(__COUNTER__, \ 192 + __PASTE(_, \ 193 + __PASTE(__LINE__, \ 194 + __PASTE(_, fn)))))) 195 + 196 + /* Format: __<prefix>__<iid><id> */ 197 + #define __initcall_name(prefix, __iid, id) \ 198 + __PASTE(__, \ 199 + __PASTE(prefix, \ 200 + __PASTE(__, \ 201 + __PASTE(__iid, id)))) 202 + 203 + #ifdef CONFIG_LTO_CLANG 204 + /* 205 + * With LTO, the compiler doesn't necessarily obey link order for 206 + * initcalls. In order to preserve the correct order, we add each 207 + * variable into its own section and generate a linker script (in 208 + * scripts/link-vmlinux.sh) to specify the order of the sections. 209 + */ 210 + #define __initcall_section(__sec, __iid) \ 211 + #__sec ".init.." #__iid 212 + 213 + /* 214 + * With LTO, the compiler can rename static functions to avoid 215 + * global naming collisions. We use a global stub function for 216 + * initcalls to create a stable symbol name whose address can be 217 + * taken in inline assembly when PREL32 relocations are used. 218 + */ 219 + #define __initcall_stub(fn, __iid, id) \ 220 + __initcall_name(initstub, __iid, id) 221 + 222 + #define __define_initcall_stub(__stub, fn) \ 223 + int __init __stub(void); \ 224 + int __init __stub(void) \ 225 + { \ 226 + return fn(); \ 227 + } \ 228 + __ADDRESSABLE(__stub) 229 + #else 230 + #define __initcall_section(__sec, __iid) \ 231 + #__sec ".init" 232 + 233 + #define __initcall_stub(fn, __iid, id) fn 234 + 235 + #define __define_initcall_stub(__stub, fn) \ 236 + __ADDRESSABLE(fn) 237 + #endif 238 + 187 239 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS 188 - #define ___define_initcall(fn, id, __sec) \ 189 - __ADDRESSABLE(fn) \ 190 - asm(".section \"" #__sec ".init\", \"a\" \n" \ 191 - "__initcall_" #fn #id ": \n" \ 192 - ".long " #fn " - . \n" \ 240 + #define ____define_initcall(fn, __stub, __name, __sec) \ 241 + __define_initcall_stub(__stub, fn) \ 242 + asm(".section \"" __sec "\", \"a\" \n" \ 243 + __stringify(__name) ": \n" \ 244 + ".long " __stringify(__stub) " - . \n" \ 193 245 ".previous \n"); 194 246 #else 195 - #define ___define_initcall(fn, id, __sec) \ 196 - static initcall_t __initcall_##fn##id __used \ 197 - __attribute__((__section__(#__sec ".init"))) = fn; 247 + #define ____define_initcall(fn, __unused, __name, __sec) \ 248 + static initcall_t __name __used \ 249 + __attribute__((__section__(__sec))) = fn; 198 250 #endif 251 + 252 + #define __unique_initcall(fn, id, __sec, __iid) \ 253 + ____define_initcall(fn, \ 254 + __initcall_stub(fn, __iid, id), \ 255 + __initcall_name(initcall, __iid, id), \ 256 + __initcall_section(__sec, __iid)) 257 + 258 + #define ___define_initcall(fn, id, __sec) \ 259 + __unique_initcall(fn, id, __sec, __initcall_id(fn)) 199 260 200 261 #define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id) 201 262 ··· 297 236 #define __exitcall(fn) \ 298 237 static exitcall_t __exitcall_##fn __exit_call = fn 299 238 300 - #define console_initcall(fn) ___define_initcall(fn,, .con_initcall) 239 + #define console_initcall(fn) ___define_initcall(fn, con, .con_initcall) 301 240 302 241 struct obs_kernel_param { 303 242 const char *str;
+25 -2
include/linux/pci.h
··· 1926 1926 }; 1927 1927 1928 1928 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS 1929 - #define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ 1929 + #define ___DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ 1930 1930 class_shift, hook) \ 1931 1931 __ADDRESSABLE(hook) \ 1932 1932 asm(".section " #sec ", \"a\" \n" \ ··· 1935 1935 ".long " #class ", " #class_shift " \n" \ 1936 1936 ".long " #hook " - . \n" \ 1937 1937 ".previous \n"); 1938 + 1939 + /* 1940 + * Clang's LTO may rename static functions in C, but has no way to 1941 + * handle such renamings when referenced from inline asm. To work 1942 + * around this, create global C stubs for these cases. 1943 + */ 1944 + #ifdef CONFIG_LTO_CLANG 1945 + #define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ 1946 + class_shift, hook, stub) \ 1947 + void stub(struct pci_dev *dev); \ 1948 + void stub(struct pci_dev *dev) \ 1949 + { \ 1950 + hook(dev); \ 1951 + } \ 1952 + ___DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ 1953 + class_shift, stub) 1954 + #else 1955 + #define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ 1956 + class_shift, hook, stub) \ 1957 + ___DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ 1958 + class_shift, hook) 1959 + #endif 1960 + 1938 1961 #define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ 1939 1962 class_shift, hook) \ 1940 1963 __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ 1941 - class_shift, hook) 1964 + class_shift, hook, __UNIQUE_ID(hook)) 1942 1965 #else 1943 1966 /* Anonymous variables would be nice... */ 1944 1967 #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class, \
+1
init/Kconfig
··· 2307 2307 config UNUSED_KSYMS_WHITELIST 2308 2308 string "Whitelist of symbols to keep in ksymtab" 2309 2309 depends on TRIM_UNUSED_KSYMS 2310 + default "scripts/lto-used-symbollist.txt" if LTO_CLANG 2310 2311 help 2311 2312 By default, all unused exported symbols will be un-exported from the 2312 2313 build when TRIM_UNUSED_KSYMS is selected.
+16
kernel/trace/Kconfig
··· 602 602 depends on DYNAMIC_FTRACE 603 603 depends on HAVE_FTRACE_MCOUNT_RECORD 604 604 605 + config FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY 606 + bool 607 + depends on FTRACE_MCOUNT_RECORD 608 + 609 + config FTRACE_MCOUNT_USE_CC 610 + def_bool y 611 + depends on $(cc-option,-mrecord-mcount) 612 + depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY 613 + depends on FTRACE_MCOUNT_RECORD 614 + 615 + config FTRACE_MCOUNT_USE_RECORDMCOUNT 616 + def_bool y 617 + depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY 618 + depends on !FTRACE_MCOUNT_USE_CC 619 + depends on FTRACE_MCOUNT_RECORD 620 + 605 621 config TRACING_MAP 606 622 bool 607 623 depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
+41 -7
scripts/Makefile.build
··· 111 111 # --------------------------------------------------------------------------- 112 112 113 113 quiet_cmd_cc_s_c = CC $(quiet_modtag) $@ 114 - cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) -fverbose-asm -S -o $@ $< 114 + cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) -fverbose-asm -S -o $@ $< 115 115 116 116 $(obj)/%.s: $(src)/%.c FORCE 117 117 $(call if_changed_dep,cc_s_c) ··· 166 166 # the actual value of the checksum generated by genksyms 167 167 # o remove .tmp_<file>.o to <file>.o 168 168 169 + ifdef CONFIG_LTO_CLANG 170 + # Generate .o.symversions files for each .o with exported symbols, and link these 171 + # to the kernel and/or modules at the end. 172 + cmd_modversions_c = \ 173 + if $(NM) $@ 2>/dev/null | grep -q __ksymtab; then \ 174 + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ 175 + > $@.symversions; \ 176 + fi; 177 + else 169 178 cmd_modversions_c = \ 170 179 if $(OBJDUMP) -h $@ | grep -q __ksymtab; then \ 171 180 $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ ··· 186 177 rm -f $(@D)/.tmp_$(@F:.o=.ver); \ 187 178 fi 188 179 endif 180 + endif 189 181 190 - ifdef CONFIG_FTRACE_MCOUNT_RECORD 191 - ifndef CC_USING_RECORD_MCOUNT 182 + ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT 192 183 # compiler will not generate __mcount_loc use recordmcount or recordmcount.pl 193 184 ifdef BUILD_C_RECORDMCOUNT 194 185 ifeq ("$(origin RECORDMCOUNT_WARN)", "command line") ··· 215 206 endif # BUILD_C_RECORDMCOUNT 216 207 cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)), \ 217 208 $(sub_cmd_record_mcount)) 218 - endif # CC_USING_RECORD_MCOUNT 219 - endif # CONFIG_FTRACE_MCOUNT_RECORD 209 + endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT 220 210 221 211 ifdef CONFIG_STACK_VALIDATION 222 212 ifneq ($(SKIP_STACK_VALIDATION),1) ··· 396 388 $(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ; 397 389 $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ; 398 390 391 + # combine symversions for later processing 392 + quiet_cmd_update_lto_symversions = SYMVER $@ 393 + ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y) 394 + cmd_update_lto_symversions = \ 395 + rm -f $@.symversions \ 396 + $(foreach n, $(filter-out FORCE,$^), \ 397 + $(if $(wildcard $(n).symversions), \ 398 + ; cat $(n).symversions >> $@.symversions)) 399 + else 400 + cmd_update_lto_symversions = echo >/dev/null 401 + endif 402 + 399 403 # 400 404 # Rule to compile a set of .o files into one .a file (without symbol table) 401 405 # ··· 415 395 quiet_cmd_ar_builtin = AR $@ 416 396 cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs) 417 397 398 + quiet_cmd_ar_and_symver = AR $@ 399 + cmd_ar_and_symver = $(cmd_update_lto_symversions); $(cmd_ar_builtin) 400 + 418 401 $(obj)/built-in.a: $(real-obj-y) FORCE 419 - $(call if_changed,ar_builtin) 402 + $(call if_changed,ar_and_symver) 420 403 421 404 # 422 405 # Rule to create modules.order file ··· 439 416 # 440 417 # Rule to compile a set of .o files into one .a file (with symbol table) 441 418 # 419 + quiet_cmd_ar_lib = AR $@ 420 + cmd_ar_lib = $(cmd_update_lto_symversions); $(cmd_ar) 421 + 442 422 $(obj)/lib.a: $(lib-y) FORCE 443 - $(call if_changed,ar) 423 + $(call if_changed,ar_lib) 444 424 445 425 # NOTE: 446 426 # Do not replace $(filter %.o,^) with $(real-prereqs). When a single object 447 427 # module is turned into a multi object module, $^ will contain header file 448 428 # dependencies recorded in the .*.cmd file. 429 + ifdef CONFIG_LTO_CLANG 430 + quiet_cmd_link_multi-m = AR [M] $@ 431 + cmd_link_multi-m = \ 432 + $(cmd_update_lto_symversions); \ 433 + rm -f $@; \ 434 + $(AR) cDPrsT $@ $(filter %.o,$^) 435 + else 449 436 quiet_cmd_link_multi-m = LD [M] $@ 450 437 cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^) 438 + endif 451 439 452 440 $(multi-used-m): FORCE 453 441 $(call if_changed,link_multi-m)
+4 -2
scripts/Makefile.lib
··· 119 119 # These flags are needed for modversions and compiling, so we define them here 120 120 # $(modname_flags) defines KBUILD_MODNAME as the name of the module it will 121 121 # end up in (or would, if it gets compiled in) 122 - name-fix = $(call stringify,$(subst $(comma),_,$(subst -,_,$1))) 122 + name-fix-token = $(subst $(comma),_,$(subst -,_,$1)) 123 + name-fix = $(call stringify,$(call name-fix-token,$1)) 123 124 basename_flags = -DKBUILD_BASENAME=$(call name-fix,$(basetarget)) 124 - modname_flags = -DKBUILD_MODNAME=$(call name-fix,$(modname)) 125 + modname_flags = -DKBUILD_MODNAME=$(call name-fix,$(modname)) \ 126 + -D__KBUILD_MODNAME=kmod_$(call name-fix-token,$(modname)) 125 127 modfile_flags = -DKBUILD_MODFILE=$(call stringify,$(modfile)) 126 128 127 129 _c_flags = $(filter-out $(CFLAGS_REMOVE_$(target-stem).o), \
+8 -1
scripts/Makefile.modfinal
··· 30 30 31 31 ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) 32 32 33 + ifdef CONFIG_LTO_CLANG 34 + # With CONFIG_LTO_CLANG, reuse the object file we compiled for modpost to 35 + # avoid a second slow LTO link 36 + prelink-ext := .lto 37 + endif 38 + 33 39 quiet_cmd_ld_ko_o = LD [M] $@ 34 40 cmd_ld_ko_o = \ 35 41 $(LD) -r $(KBUILD_LDFLAGS) \ ··· 59 53 $(cmd); \ 60 54 printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:) 61 55 56 + 62 57 # Re-generate module BTFs if either module's .ko or vmlinux changed 63 - $(modules): %.ko: %.o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE 58 + $(modules): %.ko: %$(prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE 64 59 +$(call if_changed_except,ld_ko_o,vmlinux) 65 60 ifdef CONFIG_DEBUG_INFO_BTF_MODULES 66 61 +$(if $(newer-prereqs),$(call cmd,btf_ko))
+23 -2
scripts/Makefile.modpost
··· 43 43 include include/config/auto.conf 44 44 include scripts/Kbuild.include 45 45 46 + # for ld_flags 47 + include scripts/Makefile.lib 48 + 46 49 MODPOST = scripts/mod/modpost \ 47 50 $(if $(CONFIG_MODVERSIONS),-m) \ 48 51 $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ ··· 105 102 @echo >&2 'WARNING: Symbol version dump "$@" is missing.' 106 103 @echo >&2 ' Modules may not have dependencies or modversions.' 107 104 105 + ifdef CONFIG_LTO_CLANG 106 + # With CONFIG_LTO_CLANG, .o files might be LLVM bitcode, so we need to run 107 + # LTO to compile them into native code before running modpost 108 + prelink-ext := .lto 109 + 110 + quiet_cmd_cc_lto_link_modules = LTO [M] $@ 111 + cmd_cc_lto_link_modules = \ 112 + $(LD) $(ld_flags) -r -o $@ \ 113 + $(shell [ -s $(@:.lto.o=.o.symversions) ] && \ 114 + echo -T $(@:.lto.o=.o.symversions)) \ 115 + --whole-archive $^ 116 + 117 + %.lto.o: %.o 118 + $(call if_changed,cc_lto_link_modules) 119 + endif 120 + 121 + modules := $(sort $(shell cat $(MODORDER))) 122 + 108 123 # Read out modules.order to pass in modpost. 109 124 # Otherwise, allmodconfig would fail with "Argument list too long". 110 125 quiet_cmd_modpost = MODPOST $@ 111 - cmd_modpost = sed 's/ko$$/o/' $< | $(MODPOST) -T - 126 + cmd_modpost = sed 's/\.ko$$/$(prelink-ext)\.o/' $< | $(MODPOST) -T - 112 127 113 - $(output-symdump): $(MODORDER) $(input-symdump) FORCE 128 + $(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(prelink-ext).o) FORCE 114 129 $(call if_changed,modpost) 115 130 116 131 targets += $(output-symdump)
+270
scripts/generate_initcall_order.pl
··· 1 + #!/usr/bin/env perl 2 + # SPDX-License-Identifier: GPL-2.0 3 + # 4 + # Generates a linker script that specifies the correct initcall order. 5 + # 6 + # Copyright (C) 2019 Google LLC 7 + 8 + use strict; 9 + use warnings; 10 + use IO::Handle; 11 + use IO::Select; 12 + use POSIX ":sys_wait_h"; 13 + 14 + my $nm = $ENV{'NM'} || die "$0: ERROR: NM not set?"; 15 + my $objtree = $ENV{'objtree'} || '.'; 16 + 17 + ## currently active child processes 18 + my $jobs = {}; # child process pid -> file handle 19 + ## results from child processes 20 + my $results = {}; # object index -> [ { level, secname }, ... ] 21 + 22 + ## reads _NPROCESSORS_ONLN to determine the maximum number of processes to 23 + ## start 24 + sub get_online_processors { 25 + open(my $fh, "getconf _NPROCESSORS_ONLN 2>/dev/null |") 26 + or die "$0: ERROR: failed to execute getconf: $!"; 27 + my $procs = <$fh>; 28 + close($fh); 29 + 30 + if (!($procs =~ /^\d+$/)) { 31 + return 1; 32 + } 33 + 34 + return int($procs); 35 + } 36 + 37 + ## writes results to the parent process 38 + ## format: <file index> <initcall level> <base initcall section name> 39 + sub write_results { 40 + my ($index, $initcalls) = @_; 41 + 42 + # sort by the counter value to ensure the order of initcalls within 43 + # each object file is correct 44 + foreach my $counter (sort { $a <=> $b } keys(%{$initcalls})) { 45 + my $level = $initcalls->{$counter}->{'level'}; 46 + 47 + # section name for the initcall function 48 + my $secname = $initcalls->{$counter}->{'module'} . '__' . 49 + $counter . '_' . 50 + $initcalls->{$counter}->{'line'} . '_' . 51 + $initcalls->{$counter}->{'function'}; 52 + 53 + print "$index $level $secname\n"; 54 + } 55 + } 56 + 57 + ## reads a result line from a child process and adds it to the $results array 58 + sub read_results{ 59 + my ($fh) = @_; 60 + 61 + # each child prints out a full line w/ autoflush and exits after the 62 + # last line, so even if buffered I/O blocks here, it shouldn't block 63 + # very long 64 + my $data = <$fh>; 65 + 66 + if (!defined($data)) { 67 + return 0; 68 + } 69 + 70 + chomp($data); 71 + 72 + my ($index, $level, $secname) = $data =~ 73 + /^(\d+)\ ([^\ ]+)\ (.*)$/; 74 + 75 + if (!defined($index) || 76 + !defined($level) || 77 + !defined($secname)) { 78 + die "$0: ERROR: child process returned invalid data: $data\n"; 79 + } 80 + 81 + $index = int($index); 82 + 83 + if (!exists($results->{$index})) { 84 + $results->{$index} = []; 85 + } 86 + 87 + push (@{$results->{$index}}, { 88 + 'level' => $level, 89 + 'secname' => $secname 90 + }); 91 + 92 + return 1; 93 + } 94 + 95 + ## finds initcalls from an object file or all object files in an archive, and 96 + ## writes results back to the parent process 97 + sub find_initcalls { 98 + my ($index, $file) = @_; 99 + 100 + die "$0: ERROR: file $file doesn't exist?" if (! -f $file); 101 + 102 + open(my $fh, "\"$nm\" --defined-only \"$file\" 2>/dev/null |") 103 + or die "$0: ERROR: failed to execute \"$nm\": $!"; 104 + 105 + my $initcalls = {}; 106 + 107 + while (<$fh>) { 108 + chomp; 109 + 110 + # check for the start of a new object file (if processing an 111 + # archive) 112 + my ($path)= $_ =~ /^(.+)\:$/; 113 + 114 + if (defined($path)) { 115 + write_results($index, $initcalls); 116 + $initcalls = {}; 117 + next; 118 + } 119 + 120 + # look for an initcall 121 + my ($module, $counter, $line, $symbol) = $_ =~ 122 + /[a-z]\s+__initcall__(\S*)__(\d+)_(\d+)_(.*)$/; 123 + 124 + if (!defined($module)) { 125 + $module = '' 126 + } 127 + 128 + if (!defined($counter) || 129 + !defined($line) || 130 + !defined($symbol)) { 131 + next; 132 + } 133 + 134 + # parse initcall level 135 + my ($function, $level) = $symbol =~ 136 + /^(.*)((early|rootfs|con|[0-9])s?)$/; 137 + 138 + die "$0: ERROR: invalid initcall name $symbol in $file($path)" 139 + if (!defined($function) || !defined($level)); 140 + 141 + $initcalls->{$counter} = { 142 + 'module' => $module, 143 + 'line' => $line, 144 + 'function' => $function, 145 + 'level' => $level, 146 + }; 147 + } 148 + 149 + close($fh); 150 + write_results($index, $initcalls); 151 + } 152 + 153 + ## waits for any child process to complete, reads the results, and adds them to 154 + ## the $results array for later processing 155 + sub wait_for_results { 156 + my ($select) = @_; 157 + 158 + my $pid = 0; 159 + do { 160 + # unblock children that may have a full write buffer 161 + foreach my $fh ($select->can_read(0)) { 162 + read_results($fh); 163 + } 164 + 165 + # check for children that have exited, read the remaining data 166 + # from them, and clean up 167 + $pid = waitpid(-1, WNOHANG); 168 + if ($pid > 0) { 169 + if (!exists($jobs->{$pid})) { 170 + next; 171 + } 172 + 173 + my $fh = $jobs->{$pid}; 174 + $select->remove($fh); 175 + 176 + while (read_results($fh)) { 177 + # until eof 178 + } 179 + 180 + close($fh); 181 + delete($jobs->{$pid}); 182 + } 183 + } while ($pid > 0); 184 + } 185 + 186 + ## forks a child to process each file passed in the command line and collects 187 + ## the results 188 + sub process_files { 189 + my $index = 0; 190 + my $njobs = $ENV{'PARALLELISM'} || get_online_processors(); 191 + my $select = IO::Select->new(); 192 + 193 + while (my $file = shift(@ARGV)) { 194 + # fork a child process and read it's stdout 195 + my $pid = open(my $fh, '-|'); 196 + 197 + if (!defined($pid)) { 198 + die "$0: ERROR: failed to fork: $!"; 199 + } elsif ($pid) { 200 + # save the child process pid and the file handle 201 + $select->add($fh); 202 + $jobs->{$pid} = $fh; 203 + } else { 204 + # in the child process 205 + STDOUT->autoflush(1); 206 + find_initcalls($index, "$objtree/$file"); 207 + exit; 208 + } 209 + 210 + $index++; 211 + 212 + # limit the number of children to $njobs 213 + if (scalar(keys(%{$jobs})) >= $njobs) { 214 + wait_for_results($select); 215 + } 216 + } 217 + 218 + # wait for the remaining children to complete 219 + while (scalar(keys(%{$jobs})) > 0) { 220 + wait_for_results($select); 221 + } 222 + } 223 + 224 + sub generate_initcall_lds() { 225 + process_files(); 226 + 227 + my $sections = {}; # level -> [ secname, ...] 228 + 229 + # sort results to retain link order and split to sections per 230 + # initcall level 231 + foreach my $index (sort { $a <=> $b } keys(%{$results})) { 232 + foreach my $result (@{$results->{$index}}) { 233 + my $level = $result->{'level'}; 234 + 235 + if (!exists($sections->{$level})) { 236 + $sections->{$level} = []; 237 + } 238 + 239 + push(@{$sections->{$level}}, $result->{'secname'}); 240 + } 241 + } 242 + 243 + die "$0: ERROR: no initcalls?" if (!keys(%{$sections})); 244 + 245 + # print out a linker script that defines the order of initcalls for 246 + # each level 247 + print "SECTIONS {\n"; 248 + 249 + foreach my $level (sort(keys(%{$sections}))) { 250 + my $section; 251 + 252 + if ($level eq 'con') { 253 + $section = '.con_initcall.init'; 254 + } else { 255 + $section = ".initcall${level}.init"; 256 + } 257 + 258 + print "\t${section} : {\n"; 259 + 260 + foreach my $secname (@{$sections->{$level}}) { 261 + print "\t\t*(${section}..${secname}) ;\n"; 262 + } 263 + 264 + print "\t}\n"; 265 + } 266 + 267 + print "}\n"; 268 + } 269 + 270 + generate_initcall_lds();
+5
scripts/lto-used-symbollist.txt
··· 1 + memcpy 2 + memmove 3 + memset 4 + __stack_chk_fail 5 + __stack_chk_guard
+1
scripts/mod/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 OBJECT_FILES_NON_STANDARD := y 3 + CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO) 3 4 4 5 hostprogs-always-y += modpost mk_elfconfig 5 6 always-y += empty.o
+7 -9
scripts/mod/modpost.c
··· 17 17 #include <ctype.h> 18 18 #include <string.h> 19 19 #include <limits.h> 20 - #include <stdbool.h> 21 20 #include <errno.h> 22 21 #include "modpost.h" 23 22 #include "../../include/linux/license.h" ··· 81 82 exit(1); 82 83 if (loglevel == LOG_ERROR) 83 84 error_occurred = true; 84 - } 85 - 86 - static inline bool strends(const char *str, const char *postfix) 87 - { 88 - if (strlen(str) < strlen(postfix)) 89 - return false; 90 - 91 - return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0; 92 85 } 93 86 94 87 void *do_nofail(void *ptr, const char *expr) ··· 1979 1988 size_t m = strspn(s + n + 1, "0123456789"); 1980 1989 if (m && (s[n + m] == '.' || s[n + m] == 0)) 1981 1990 s[n] = 0; 1991 + 1992 + /* strip trailing .lto */ 1993 + if (strends(s, ".lto")) 1994 + s[strlen(s) - 4] = '\0'; 1982 1995 } 1983 1996 return s; 1984 1997 } ··· 2006 2011 /* strip trailing .o */ 2007 2012 tmp = NOFAIL(strdup(modname)); 2008 2013 tmp[strlen(tmp) - 2] = '\0'; 2014 + /* strip trailing .lto */ 2015 + if (strends(tmp, ".lto")) 2016 + tmp[strlen(tmp) - 4] = '\0'; 2009 2017 mod = new_module(tmp); 2010 2018 free(tmp); 2011 2019 }
+9
scripts/mod/modpost.h
··· 2 2 #include <stdio.h> 3 3 #include <stdlib.h> 4 4 #include <stdarg.h> 5 + #include <stdbool.h> 5 6 #include <string.h> 6 7 #include <sys/types.h> 7 8 #include <sys/stat.h> ··· 179 178 if (sym->st_shndx != SHN_XINDEX) 180 179 return sym->st_shndx; 181 180 return info->symtab_shndx_start[sym - info->symtab_start]; 181 + } 182 + 183 + static inline bool strends(const char *str, const char *postfix) 184 + { 185 + if (strlen(str) < strlen(postfix)) 186 + return false; 187 + 188 + return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0; 182 189 } 183 190 184 191 /* file2alias.c */
+5 -1
scripts/mod/sumversion.c
··· 391 391 struct md4_ctx md; 392 392 char *fname; 393 393 char filelist[PATH_MAX + 1]; 394 + int postfix_len = 1; 395 + 396 + if (strends(modname, ".lto.o")) 397 + postfix_len = 5; 394 398 395 399 /* objects for a module are listed in the first line of *.mod file. */ 396 400 snprintf(filelist, sizeof(filelist), "%.*smod", 397 - (int)strlen(modname) - 1, modname); 401 + (int)strlen(modname) - postfix_len, modname); 398 402 399 403 buf = read_text_file(filelist); 400 404
+24
scripts/module.lds.S
··· 23 23 .init_array 0 : ALIGN(8) { *(SORT(.init_array.*)) *(.init_array) } 24 24 25 25 __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) } 26 + 27 + __patchable_function_entries : { *(__patchable_function_entries) } 28 + 29 + /* 30 + * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and 31 + * -ffunction-sections, which increases the size of the final module. 32 + * Merge the split sections in the final binary. 33 + */ 34 + .bss : { 35 + *(.bss .bss.[0-9a-zA-Z_]*) 36 + *(.bss..L*) 37 + } 38 + 39 + .data : { 40 + *(.data .data.[0-9a-zA-Z_]*) 41 + *(.data..L*) 42 + } 43 + 44 + .rodata : { 45 + *(.rodata .rodata.[0-9a-zA-Z_]*) 46 + *(.rodata..L*) 47 + } 48 + 49 + .text : { *(.text .text.[0-9a-zA-Z_]*) } 26 50 } 27 51 28 52 /* bring in arch-specific sections */