Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/lib/copy_page_64.S: Use generic ALTERNATIVE macro

... instead of the semi-version with the spelled out sections.

What is more, make the REP_GOOD version be the default copy_page()
version as the majority of the relevant x86 CPUs do set
X86_FEATURE_REP_GOOD. Thus, copy_page gets compiled to:

ffffffff8130af80 <copy_page>:
ffffffff8130af80: e9 0b 00 00 00 jmpq ffffffff8130af90 <copy_page_regs>
ffffffff8130af85: b9 00 02 00 00 mov $0x200,%ecx
ffffffff8130af8a: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi)
ffffffff8130af8d: c3 retq
ffffffff8130af8e: 66 90 xchg %ax,%ax

ffffffff8130af90 <copy_page_regs>:
...

and after the alternatives have run, the JMP to the old, unrolled
version gets NOPed out:

ffffffff8130af80 <copy_page>:
ffffffff8130af80: 66 66 90 xchg %ax,%ax
ffffffff8130af83: 66 90 xchg %ax,%ax
ffffffff8130af85: b9 00 02 00 00 mov $0x200,%ecx
ffffffff8130af8a: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi)
ffffffff8130af8d: c3 retq

On modern uarches, those NOPs are cheaper than the unconditional JMP
previously.

Signed-off-by: Borislav Petkov <bp@suse.de>

+12 -25
+12 -25
arch/x86/lib/copy_page_64.S
··· 2 2 3 3 #include <linux/linkage.h> 4 4 #include <asm/dwarf2.h> 5 + #include <asm/cpufeature.h> 5 6 #include <asm/alternative-asm.h> 6 7 8 + /* 9 + * Some CPUs run faster using the string copy instructions (sane microcode). 10 + * It is also a lot simpler. Use this when possible. But, don't use streaming 11 + * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the 12 + * prefetch distance based on SMP/UP. 13 + */ 7 14 ALIGN 8 - copy_page_rep: 15 + ENTRY(copy_page) 9 16 CFI_STARTPROC 17 + ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD 10 18 movl $4096/8, %ecx 11 19 rep movsq 12 20 ret 13 21 CFI_ENDPROC 14 - ENDPROC(copy_page_rep) 22 + ENDPROC(copy_page) 15 23 16 - /* 17 - * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD. 18 - * Could vary the prefetch distance based on SMP/UP. 19 - */ 20 - 21 - ENTRY(copy_page) 24 + ENTRY(copy_page_regs) 22 25 CFI_STARTPROC 23 26 subq $2*8, %rsp 24 27 CFI_ADJUST_CFA_OFFSET 2*8 ··· 93 90 addq $2*8, %rsp 94 91 CFI_ADJUST_CFA_OFFSET -2*8 95 92 ret 96 - .Lcopy_page_end: 97 93 CFI_ENDPROC 98 - ENDPROC(copy_page) 99 - 100 - /* Some CPUs run faster using the string copy instructions. 101 - It is also a lot simpler. Use this when possible */ 102 - 103 - #include <asm/cpufeature.h> 104 - 105 - .section .altinstr_replacement,"ax" 106 - 1: .byte 0xeb /* jmp <disp8> */ 107 - .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */ 108 - 2: 109 - .previous 110 - .section .altinstructions,"a" 111 - altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \ 112 - .Lcopy_page_end-copy_page, 2b-1b, 0 113 - .previous 94 + ENDPROC(copy_page_regs)