Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/inst: Optimise copy_inst_from_kernel_nofault()

copy_inst_from_kernel_nofault() uses copy_from_kernel_nofault() to
copy one or two 32bits words. This means calling an out-of-line
function which itself calls back copy_from_kernel_nofault_allowed()
then performs a generic copy with loops.

Rewrite copy_inst_from_kernel_nofault() to do everything at a
single place and use __get_kernel_nofault() directly to perform
single accesses without loops.

Allthough the generic function uses pagefault_disable(), it is not
required on powerpc because do_page_fault() bails earlier when a
kernel mode fault happens on a kernel address.

As the function has now become very small, inline it.

With this change, on an 8xx the time spent in the loop in
ftrace_replace_code() is reduced by 23% at function tracer activation
and 27% at nop tracer activation.
The overall time to activate function tracer (measured with shell
command 'time') is 570ms before the patch and 470ms after the patch.

Even vmlinux size is reduced (by 152 instruction).

Before the patch:

00000018 <copy_inst_from_kernel_nofault>:
18: 94 21 ff e0 stwu r1,-32(r1)
1c: 7c 08 02 a6 mflr r0
20: 38 a0 00 04 li r5,4
24: 93 e1 00 1c stw r31,28(r1)
28: 7c 7f 1b 78 mr r31,r3
2c: 38 61 00 08 addi r3,r1,8
30: 90 01 00 24 stw r0,36(r1)
34: 48 00 00 01 bl 34 <copy_inst_from_kernel_nofault+0x1c>
34: R_PPC_REL24 copy_from_kernel_nofault
38: 2c 03 00 00 cmpwi r3,0
3c: 40 82 00 0c bne 48 <copy_inst_from_kernel_nofault+0x30>
40: 81 21 00 08 lwz r9,8(r1)
44: 91 3f 00 00 stw r9,0(r31)
48: 80 01 00 24 lwz r0,36(r1)
4c: 83 e1 00 1c lwz r31,28(r1)
50: 38 21 00 20 addi r1,r1,32
54: 7c 08 03 a6 mtlr r0
58: 4e 80 00 20 blr

After the patch (before inlining):

00000018 <copy_inst_from_kernel_nofault>:
18: 3d 20 b0 00 lis r9,-20480
1c: 7c 04 48 40 cmplw r4,r9
20: 7c 69 1b 78 mr r9,r3
24: 41 80 00 14 blt 38 <copy_inst_from_kernel_nofault+0x20>
28: 81 44 00 00 lwz r10,0(r4)
2c: 38 60 00 00 li r3,0
30: 91 49 00 00 stw r10,0(r9)
34: 4e 80 00 20 blr

38: 38 60 ff de li r3,-34
3c: 4e 80 00 20 blr
40: 38 60 ff f2 li r3,-14
44: 4e 80 00 20 blr

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[mpe: Add clang workaround, with version check as suggested by Nathan]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/0d5b12183d5176dd702d29ad94c39c384e51c78f.1638208156.git.christophe.leroy@csgroup.eu

authored by

Christophe Leroy and committed by
Michael Ellerman
0d76914a 9b307576

+24 -18
+24 -1
arch/powerpc/include/asm/inst.h
··· 4 4 5 5 #include <asm/ppc-opcode.h> 6 6 #include <asm/reg.h> 7 + #include <asm/disassemble.h> 8 + #include <asm/uaccess.h> 7 9 8 10 #define ___get_user_instr(gu_op, dest, ptr) \ 9 11 ({ \ ··· 150 148 __str; \ 151 149 }) 152 150 153 - int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src); 151 + static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) 152 + { 153 + unsigned int val, suffix; 154 + 155 + if (unlikely(!is_kernel_addr((unsigned long)src))) 156 + return -ERANGE; 157 + 158 + /* See https://github.com/ClangBuiltLinux/linux/issues/1521 */ 159 + #if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 140000 160 + val = suffix = 0; 161 + #endif 162 + __get_kernel_nofault(&val, src, u32, Efault); 163 + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { 164 + __get_kernel_nofault(&suffix, src + 1, u32, Efault); 165 + *inst = ppc_inst_prefix(val, suffix); 166 + } else { 167 + *inst = ppc_inst(val); 168 + } 169 + return 0; 170 + Efault: 171 + return -EFAULT; 172 + } 154 173 155 174 #endif /* _ASM_POWERPC_INST_H */
-17
arch/powerpc/mm/maccess.c
··· 11 11 { 12 12 return is_kernel_addr((unsigned long)unsafe_src); 13 13 } 14 - 15 - int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) 16 - { 17 - unsigned int val, suffix; 18 - int err; 19 - 20 - err = copy_from_kernel_nofault(&val, src, sizeof(val)); 21 - if (err) 22 - return err; 23 - if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { 24 - err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix)); 25 - *inst = ppc_inst_prefix(val, suffix); 26 - } else { 27 - *inst = ppc_inst(val); 28 - } 29 - return err; 30 - }