Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/build: Mostly disable '-maccumulate-outgoing-args'

The GCC '-maccumulate-outgoing-args' flag is enabled for most configs,
mostly because of issues which are no longer relevant. For most
configs, and with most recent versions of GCC, it's no longer needed.

Clarify which cases need it, and only enable it for those cases. Also
produce a compile-time error for the ftrace graph + mcount + '-Os' case,
which will otherwise cause runtime failures.

The main benefit of '-maccumulate-outgoing-args' is that it prevents an
ugly prologue for functions which have aligned stacks. But removing the
option also has some benefits: more readable argument saves, smaller
text size, and (presumably) slightly improved performance.

Here are the object size savings for 32-bit and 64-bit defconfig
kernels:

text data bss dec hex filename
10006710 3543328 1773568 15323606 e9d1d6 vmlinux.x86-32.before
9706358 3547424 1773568 15027350 e54c96 vmlinux.x86-32.after

text data bss dec hex filename
10652105 4537576 843776 16033457 f4a6b1 vmlinux.x86-64.before
10639629 4537576 843776 16020981 f475f5 vmlinux.x86-64.after

That comes out to a 3% text size improvement on x86-32 and a 0.1% text
size improvement on x86-64.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170316193133.zrj6gug53766m6nn@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Josh Poimboeuf and committed by
Ingo Molnar
3f135e57 a46f60d7

+41 -22
+31 -4
arch/x86/Makefile
··· 120 120 # -funit-at-a-time shrinks the kernel .text considerably 121 121 # unfortunately it makes reading oopses harder. 122 122 KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time) 123 - 124 - # this works around some issues with generating unwind tables in older gccs 125 - # newer gccs do it by default 126 - KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args) 127 123 endif 128 124 129 125 ifdef CONFIG_X86_X32 ··· 141 145 # Don't unroll struct assignments with kmemcheck enabled 142 146 ifeq ($(CONFIG_KMEMCHECK),y) 143 147 KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) 148 + endif 149 + 150 + # 151 + # If the function graph tracer is used with mcount instead of fentry, 152 + # '-maccumulate-outgoing-args' is needed to prevent a GCC bug 153 + # (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109) 154 + # 155 + ifdef CONFIG_FUNCTION_GRAPH_TRACER 156 + ifndef CONFIG_HAVE_FENTRY 157 + ACCUMULATE_OUTGOING_ARGS := 1 158 + else 159 + ifeq ($(call cc-option-yn, -mfentry), n) 160 + ACCUMULATE_OUTGOING_ARGS := 1 161 + endif 162 + endif 163 + endif 164 + 165 + # 166 + # Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a 167 + # GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way 168 + # to test for this bug at compile-time because the test case needs to execute, 169 + # which is a no-go for cross compilers. So check the GCC version instead. 170 + # 171 + ifdef CONFIG_JUMP_LABEL 172 + ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1) 173 + ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1) 174 + endif 175 + endif 176 + 177 + ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) 178 + KBUILD_CFLAGS += -maccumulate-outgoing-args 144 179 endif 145 180 146 181 # Stackpointer is addressed different for 32 bit and 64 bit x86
-18
arch/x86/Makefile_32.cpu
··· 45 45 # cpu entries 46 46 cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) 47 47 48 - # Work around the pentium-mmx code generator madness of gcc4.4.x which 49 - # does stack alignment by generating horrible code _before_ the mcount 50 - # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph 51 - # tracer assumptions. For i686, generic, core2 this is set by the 52 - # compiler anyway 53 - ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y) 54 - ADD_ACCUMULATE_OUTGOING_ARGS := y 55 - endif 56 - 57 - # Work around to a bug with asm goto with first implementations of it 58 - # in gcc causing gcc to mess up the push and pop of the stack in some 59 - # uses of asm goto. 60 - ifeq ($(CONFIG_JUMP_LABEL), y) 61 - ADD_ACCUMULATE_OUTGOING_ARGS := y 62 - endif 63 - 64 - cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args) 65 - 66 48 # Bug fix for binutils: this option is required in order to keep 67 49 # binutils from generating NOPL instructions against our will. 68 50 ifneq ($(CONFIG_X86_P6_NOP),y)
+6
arch/x86/kernel/ftrace.c
··· 29 29 #include <asm/ftrace.h> 30 30 #include <asm/nops.h> 31 31 32 + #if defined(CONFIG_FUNCTION_GRAPH_TRACER) && \ 33 + !defined(CC_USING_FENTRY) && \ 34 + !defined(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE) 35 + # error The following combination is not supported: ((compiler missing -mfentry) || (CONFIG_X86_32 and !CONFIG_DYNAMIC_FTRACE)) && CONFIG_FUNCTION_GRAPH_TRACER && CONFIG_CC_OPTIMIZE_FOR_SIZE 36 + #endif 37 + 32 38 #ifdef CONFIG_DYNAMIC_FTRACE 33 39 34 40 int ftrace_arch_code_modify_prepare(void)
+4
scripts/Kbuild.include
··· 148 148 # Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) 149 149 cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4)) 150 150 151 + # cc-if-fullversion 152 + # Usage: EXTRA_CFLAGS += $(call cc-if-fullversion, -lt, 040502, -O1) 153 + cc-if-fullversion = $(shell [ $(cc-fullversion) $(1) $(2) ] && echo $(3) || echo $(4)) 154 + 151 155 # cc-ldoption 152 156 # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both) 153 157 cc-ldoption = $(call try-run,\