Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/pseries: Use jump labels for hcall tracepoints

hcall tracepoints add quite a few instructions to our hcall path:

plpar_hcall:
mr r2,r2
mfcr r0
stw r0,8(r1)
b 164 <---- start
ld r12,0(r2)
std r12,32(r1)
cmpdi r12,0
beq 164 <---- end
...

We have an unconditional branch that gets noped out during boot and
a load/compare/branch. We also store the tracepoint value to the
stack for the hcall_exit path to use.

By using jump labels we can simplify this to just a single nop that
gets replaced with a branch when the tracepoint is enabled:

plpar_hcall:
mr r2,r2
mfcr r0
stw r0,8(r1)
nop <----
...

If jump labels are not enabled, we fall back to the old method.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by

Anton Blanchard and committed by
Benjamin Herrenschmidt
cc1adb5f 8fa5d454

+142 -41
+9
arch/powerpc/include/asm/jump_label.h
··· 10 10 * 2 of the License, or (at your option) any later version. 11 11 */ 12 12 13 + #ifndef __ASSEMBLY__ 13 14 #include <linux/types.h> 14 15 15 16 #include <asm/feature-fixups.h> ··· 42 41 jump_label_t target; 43 42 jump_label_t key; 44 43 }; 44 + 45 + #else 46 + #define ARCH_STATIC_BRANCH(LABEL, KEY) \ 47 + 1098: nop; \ 48 + .pushsection __jump_table, "aw"; \ 49 + FTR_ENTRY_LONG 1098b, LABEL, KEY; \ 50 + .popsection 51 + #endif 45 52 46 53 #endif /* _ASM_POWERPC_JUMP_LABEL_H */
+110 -34
arch/powerpc/platforms/pseries/hvCall.S
··· 12 12 #include <asm/ppc_asm.h> 13 13 #include <asm/asm-offsets.h> 14 14 #include <asm/ptrace.h> 15 + #include <asm/jump_label.h> 16 + 17 + .section ".text" 15 18 16 19 #ifdef CONFIG_TRACEPOINTS 17 20 21 + #ifndef CONFIG_JUMP_LABEL 18 22 .section ".toc","aw" 19 23 20 24 .globl hcall_tracepoint_refcount ··· 26 22 .llong 0 27 23 28 24 .section ".text" 25 + #endif 29 26 30 27 /* 31 28 * precall must preserve all registers. use unused STK_PARAM() 32 - * areas to save snapshots and opcode. We branch around this 33 - * in early init (eg when populating the MMU hashtable) by using an 34 - * unconditional cpu feature. 29 + * areas to save snapshots and opcode. 35 30 */ 36 31 #define HCALL_INST_PRECALL(FIRST_REG) \ 37 - BEGIN_FTR_SECTION; \ 38 - b 1f; \ 39 - END_FTR_SECTION(0, 1); \ 40 - ld r12,hcall_tracepoint_refcount@toc(r2); \ 41 - std r12,32(r1); \ 42 - cmpdi r12,0; \ 43 - beq+ 1f; \ 44 32 mflr r0; \ 45 33 std r3,STK_PARAM(R3)(r1); \ 46 34 std r4,STK_PARAM(R4)(r1); \ ··· 56 60 ld r8,STK_PARAM(R8)(r1); \ 57 61 ld r9,STK_PARAM(R9)(r1); \ 58 62 ld r10,STK_PARAM(R10)(r1); \ 59 - mtlr r0; \ 60 - 1: 63 + mtlr r0 61 64 62 65 /* 63 66 * postcall is performed immediately before function return which 64 - * allows liberal use of volatile registers. We branch around this 65 - * in early init (eg when populating the MMU hashtable) by using an 66 - * unconditional cpu feature. 67 + * allows liberal use of volatile registers. 67 68 */ 68 69 #define __HCALL_INST_POSTCALL \ 69 - BEGIN_FTR_SECTION; \ 70 - b 1f; \ 71 - END_FTR_SECTION(0, 1); \ 72 - ld r12,32(r1); \ 73 - cmpdi r12,0; \ 74 - beq+ 1f; \ 75 70 mflr r0; \ 76 71 ld r6,STK_PARAM(R3)(r1); \ 77 72 std r3,STK_PARAM(R3)(r1); \ ··· 74 87 addi r1,r1,STACK_FRAME_OVERHEAD; \ 75 88 ld r0,16(r1); \ 76 89 ld r3,STK_PARAM(R3)(r1); \ 77 - mtlr r0; \ 78 - 1: 90 + mtlr r0 79 91 80 92 #define HCALL_INST_POSTCALL_NORETS \ 81 93 li r5,0; \ ··· 84 98 mr r5,BUFREG; \ 85 99 __HCALL_INST_POSTCALL 86 100 101 + #ifdef CONFIG_JUMP_LABEL 102 + #define HCALL_BRANCH(LABEL) \ 103 + ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key) 104 + #else 105 + 106 + /* 107 + * We branch around this in early init (eg when populating the MMU 108 + * hashtable) by using an unconditional cpu feature. 109 + */ 110 + #define HCALL_BRANCH(LABEL) \ 111 + BEGIN_FTR_SECTION; \ 112 + b 1f; \ 113 + END_FTR_SECTION(0, 1); \ 114 + ld r12,hcall_tracepoint_refcount@toc(r2); \ 115 + std r12,32(r1); \ 116 + cmpdi r12,0; \ 117 + bne- LABEL; \ 118 + 1: 119 + #endif 120 + 87 121 #else 88 122 #define HCALL_INST_PRECALL(FIRST_ARG) 89 123 #define HCALL_INST_POSTCALL_NORETS 90 124 #define HCALL_INST_POSTCALL(BUFREG) 125 + #define HCALL_BRANCH(LABEL) 91 126 #endif 92 - 93 - .text 94 127 95 128 _GLOBAL_TOC(plpar_hcall_norets) 96 129 HMT_MEDIUM 97 130 98 131 mfcr r0 99 132 stw r0,8(r1) 100 - 101 - HCALL_INST_PRECALL(R4) 102 - 133 + HCALL_BRANCH(plpar_hcall_norets_trace) 103 134 HVSC /* invoke the hypervisor */ 104 - 105 - HCALL_INST_POSTCALL_NORETS 106 135 107 136 lwz r0,8(r1) 108 137 mtcrf 0xff,r0 109 138 blr /* return r3 = status */ 139 + 140 + #ifdef CONFIG_TRACEPOINTS 141 + plpar_hcall_norets_trace: 142 + HCALL_INST_PRECALL(R4) 143 + HVSC 144 + HCALL_INST_POSTCALL_NORETS 145 + lwz r0,8(r1) 146 + mtcrf 0xff,r0 147 + blr 148 + #endif 110 149 111 150 _GLOBAL_TOC(plpar_hcall) 112 151 HMT_MEDIUM ··· 139 128 mfcr r0 140 129 stw r0,8(r1) 141 130 142 - HCALL_INST_PRECALL(R5) 131 + HCALL_BRANCH(plpar_hcall_trace) 143 132 144 133 std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ 145 134 ··· 158 147 std r6, 16(r12) 159 148 std r7, 24(r12) 160 149 150 + lwz r0,8(r1) 151 + mtcrf 0xff,r0 152 + 153 + blr /* return r3 = status */ 154 + 155 + #ifdef CONFIG_TRACEPOINTS 156 + plpar_hcall_trace: 157 + HCALL_INST_PRECALL(R5) 158 + 159 + std r4,STK_PARAM(R4)(r1) 160 + mr r0,r4 161 + 162 + mr r4,r5 163 + mr r5,r6 164 + mr r6,r7 165 + mr r7,r8 166 + mr r8,r9 167 + mr r9,r10 168 + 169 + HVSC 170 + 171 + ld r12,STK_PARAM(R4)(r1) 172 + std r4,0(r12) 173 + std r5,8(r12) 174 + std r6,16(r12) 175 + std r7,24(r12) 176 + 161 177 HCALL_INST_POSTCALL(r12) 162 178 163 179 lwz r0,8(r1) 164 180 mtcrf 0xff,r0 165 181 166 - blr /* return r3 = status */ 182 + blr 183 + #endif 167 184 168 185 /* 169 186 * plpar_hcall_raw can be called in real mode. kexec/kdump need some ··· 233 194 mfcr r0 234 195 stw r0,8(r1) 235 196 236 - HCALL_INST_PRECALL(R5) 197 + HCALL_BRANCH(plpar_hcall9_trace) 237 198 238 199 std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ 239 200 ··· 261 222 std r11,56(r12) 262 223 std r0, 64(r12) 263 224 225 + lwz r0,8(r1) 226 + mtcrf 0xff,r0 227 + 228 + blr /* return r3 = status */ 229 + 230 + #ifdef CONFIG_TRACEPOINTS 231 + plpar_hcall9_trace: 232 + HCALL_INST_PRECALL(R5) 233 + 234 + std r4,STK_PARAM(R4)(r1) 235 + mr r0,r4 236 + 237 + mr r4,r5 238 + mr r5,r6 239 + mr r6,r7 240 + mr r7,r8 241 + mr r8,r9 242 + mr r9,r10 243 + ld r10,STK_PARAM(R11)(r1) 244 + ld r11,STK_PARAM(R12)(r1) 245 + ld r12,STK_PARAM(R13)(r1) 246 + 247 + HVSC 248 + 249 + mr r0,r12 250 + ld r12,STK_PARAM(R4)(r1) 251 + std r4,0(r12) 252 + std r5,8(r12) 253 + std r6,16(r12) 254 + std r7,24(r12) 255 + std r8,32(r12) 256 + std r9,40(r12) 257 + std r10,48(r12) 258 + std r11,56(r12) 259 + std r0,64(r12) 260 + 264 261 HCALL_INST_POSTCALL(r12) 265 262 266 263 lwz r0,8(r1) 267 264 mtcrf 0xff,r0 268 265 269 - blr /* return r3 = status */ 266 + blr 267 + #endif 270 268 271 269 /* See plpar_hcall_raw to see why this is needed */ 272 270 _GLOBAL(plpar_hcall9_raw)
+23 -7
arch/powerpc/platforms/pseries/lpar.c
··· 26 26 #include <linux/dma-mapping.h> 27 27 #include <linux/console.h> 28 28 #include <linux/export.h> 29 + #include <linux/static_key.h> 29 30 #include <asm/processor.h> 30 31 #include <asm/mmu.h> 31 32 #include <asm/page.h> ··· 650 649 #endif 651 650 652 651 #ifdef CONFIG_TRACEPOINTS 652 + #ifdef CONFIG_JUMP_LABEL 653 + struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; 654 + 655 + void hcall_tracepoint_regfunc(void) 656 + { 657 + static_key_slow_inc(&hcall_tracepoint_key); 658 + } 659 + 660 + void hcall_tracepoint_unregfunc(void) 661 + { 662 + static_key_slow_dec(&hcall_tracepoint_key); 663 + } 664 + #else 653 665 /* 654 666 * We optimise our hcall path by placing hcall_tracepoint_refcount 655 667 * directly in the TOC so we can check if the hcall tracepoints are ··· 671 657 672 658 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ 673 659 extern long hcall_tracepoint_refcount; 674 - 675 - /* 676 - * Since the tracing code might execute hcalls we need to guard against 677 - * recursion. One example of this are spinlocks calling H_YIELD on 678 - * shared processor partitions. 679 - */ 680 - static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); 681 660 682 661 void hcall_tracepoint_regfunc(void) 683 662 { ··· 681 674 { 682 675 hcall_tracepoint_refcount--; 683 676 } 677 + #endif 678 + 679 + /* 680 + * Since the tracing code might execute hcalls we need to guard against 681 + * recursion. One example of this are spinlocks calling H_YIELD on 682 + * shared processor partitions. 683 + */ 684 + static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); 685 + 684 686 685 687 void __trace_hcall_entry(unsigned long opcode, unsigned long *args) 686 688 {