Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/lib/memcpy_64.S: Convert memcpy to ALTERNATIVE_2 macro

Make REP_GOOD variant the default after alternatives have run.

Signed-off-by: Borislav Petkov <bp@suse.de>

+21 -47
+21 -47
arch/x86/lib/memcpy_64.S
··· 1 1 /* Copyright 2002 Andi Kleen */ 2 2 3 3 #include <linux/linkage.h> 4 - 5 4 #include <asm/cpufeature.h> 6 5 #include <asm/dwarf2.h> 7 6 #include <asm/alternative-asm.h> 7 + 8 + /* 9 + * We build a jump to memcpy_orig by default which gets NOPped out on 10 + * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which 11 + * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs 12 + * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. 13 + */ 14 + 15 + .weak memcpy 8 16 9 17 /* 10 18 * memcpy - Copy a memory block. ··· 25 17 * Output: 26 18 * rax original destination 27 19 */ 20 + ENTRY(__memcpy) 21 + ENTRY(memcpy) 22 + ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ 23 + "jmp memcpy_erms", X86_FEATURE_ERMS 28 24 29 - /* 30 - * memcpy_c() - fast string ops (REP MOVSQ) based variant. 31 - * 32 - * This gets patched over the unrolled variant (below) via the 33 - * alternative instructions framework: 34 - */ 35 - .section .altinstr_replacement, "ax", @progbits 36 - .Lmemcpy_c: 37 25 movq %rdi, %rax 38 26 movq %rdx, %rcx 39 27 shrq $3, %rcx ··· 38 34 movl %edx, %ecx 39 35 rep movsb 40 36 ret 41 - .Lmemcpy_e: 42 - .previous 37 + ENDPROC(memcpy) 38 + ENDPROC(__memcpy) 43 39 44 40 /* 45 - * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than 46 - * memcpy_c. Use memcpy_c_e when possible. 47 - * 48 - * This gets patched over the unrolled variant (below) via the 49 - * alternative instructions framework: 41 + * memcpy_erms() - enhanced fast string memcpy. This is faster and 42 + * simpler than memcpy. Use memcpy_erms when possible. 50 43 */ 51 - .section .altinstr_replacement, "ax", @progbits 52 - .Lmemcpy_c_e: 44 + ENTRY(memcpy_erms) 53 45 movq %rdi, %rax 54 46 movq %rdx, %rcx 55 47 rep movsb 56 48 ret 57 - .Lmemcpy_e_e: 58 - .previous 49 + ENDPROC(memcpy_erms) 59 50 60 - .weak memcpy 61 - 62 - ENTRY(__memcpy) 63 - ENTRY(memcpy) 51 + ENTRY(memcpy_orig) 64 52 CFI_STARTPROC 65 53 movq %rdi, %rax 66 54 ··· 179 183 .Lend: 180 184 retq 181 185 CFI_ENDPROC 182 - ENDPROC(memcpy) 183 - ENDPROC(__memcpy) 184 - 185 - /* 186 - * Some CPUs are adding enhanced REP MOVSB/STOSB feature 187 - * If the feature is supported, memcpy_c_e() is the first choice. 188 - * If enhanced rep movsb copy is not available, use fast string copy 189 - * memcpy_c() when possible. This is faster and code is simpler than 190 - * original memcpy(). 191 - * Otherwise, original memcpy() is used. 192 - * In .altinstructions section, ERMS feature is placed after REG_GOOD 193 - * feature to implement the right patch order. 194 - * 195 - * Replace only beginning, memcpy is used to apply alternatives, 196 - * so it is silly to overwrite itself with nops - reboot is the 197 - * only outcome... 198 - */ 199 - .section .altinstructions, "a" 200 - altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ 201 - .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c,0 202 - altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ 203 - .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e,0 204 - .previous 186 + ENDPROC(memcpy_orig)