Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powernv: Add OPAL tracepoints

Knowing how long we spend in firmware calls is an important part of
minimising OS jitter.

This patch adds tracepoints to each OPAL call. If tracepoints are
enabled we branch out to a common routine that calls an entry and exit
tracepoint.

This allows us to write tools that monitor the frequency and duration
of OPAL calls, eg:

name count total(ms) min(ms) max(ms) avg(ms) period(ms)
OPAL_HANDLE_INTERRUPT 5 0.199 0.037 0.042 0.040 12547.545
OPAL_POLL_EVENTS 204 2.590 0.012 0.036 0.013 2264.899
OPAL_PCI_MSI_EOI 2830 3.066 0.001 0.005 0.001 81.166

We use jump labels if configured, which means we only add a single
nop instruction to every OPAL call when the tracepoints are disabled.

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by

Anton Blanchard and committed by
Benjamin Herrenschmidt
c49f6353 aaad4224

+236 -9
+45
arch/powerpc/include/asm/trace.h
··· 99 99 ); 100 100 #endif 101 101 102 + #ifdef CONFIG_PPC_POWERNV 103 + extern void opal_tracepoint_regfunc(void); 104 + extern void opal_tracepoint_unregfunc(void); 105 + 106 + TRACE_EVENT_FN(opal_entry, 107 + 108 + TP_PROTO(unsigned long opcode, unsigned long *args), 109 + 110 + TP_ARGS(opcode, args), 111 + 112 + TP_STRUCT__entry( 113 + __field(unsigned long, opcode) 114 + ), 115 + 116 + TP_fast_assign( 117 + __entry->opcode = opcode; 118 + ), 119 + 120 + TP_printk("opcode=%lu", __entry->opcode), 121 + 122 + opal_tracepoint_regfunc, opal_tracepoint_unregfunc 123 + ); 124 + 125 + TRACE_EVENT_FN(opal_exit, 126 + 127 + TP_PROTO(unsigned long opcode, unsigned long retval), 128 + 129 + TP_ARGS(opcode, retval), 130 + 131 + TP_STRUCT__entry( 132 + __field(unsigned long, opcode) 133 + __field(unsigned long, retval) 134 + ), 135 + 136 + TP_fast_assign( 137 + __entry->opcode = opcode; 138 + __entry->retval = retval; 139 + ), 140 + 141 + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), 142 + 143 + opal_tracepoint_regfunc, opal_tracepoint_unregfunc 144 + ); 145 + #endif 146 + 102 147 #endif /* _TRACE_POWERPC_H */ 103 148 104 149 #undef TRACE_INCLUDE_PATH
+1
arch/powerpc/platforms/powernv/Makefile
··· 8 8 obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o 9 9 obj-$(CONFIG_PPC_SCOM) += opal-xscom.o 10 10 obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o 11 + obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
+84
arch/powerpc/platforms/powernv/opal-tracepoints.c
··· 1 + #include <linux/percpu.h> 2 + #include <linux/jump_label.h> 3 + #include <asm/trace.h> 4 + 5 + #ifdef CONFIG_JUMP_LABEL 6 + struct static_key opal_tracepoint_key = STATIC_KEY_INIT; 7 + 8 + void opal_tracepoint_regfunc(void) 9 + { 10 + static_key_slow_inc(&opal_tracepoint_key); 11 + } 12 + 13 + void opal_tracepoint_unregfunc(void) 14 + { 15 + static_key_slow_dec(&opal_tracepoint_key); 16 + } 17 + #else 18 + /* 19 + * We optimise OPAL calls by placing opal_tracepoint_refcount 20 + * directly in the TOC so we can check if the opal tracepoints are 21 + * enabled via a single load. 22 + */ 23 + 24 + /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ 25 + extern long opal_tracepoint_refcount; 26 + 27 + void opal_tracepoint_regfunc(void) 28 + { 29 + opal_tracepoint_refcount++; 30 + } 31 + 32 + void opal_tracepoint_unregfunc(void) 33 + { 34 + opal_tracepoint_refcount--; 35 + } 36 + #endif 37 + 38 + /* 39 + * Since the tracing code might execute OPAL calls we need to guard against 40 + * recursion. 41 + */ 42 + static DEFINE_PER_CPU(unsigned int, opal_trace_depth); 43 + 44 + void __trace_opal_entry(unsigned long opcode, unsigned long *args) 45 + { 46 + unsigned long flags; 47 + unsigned int *depth; 48 + 49 + local_irq_save(flags); 50 + 51 + depth = &__get_cpu_var(opal_trace_depth); 52 + 53 + if (*depth) 54 + goto out; 55 + 56 + (*depth)++; 57 + preempt_disable(); 58 + trace_opal_entry(opcode, args); 59 + (*depth)--; 60 + 61 + out: 62 + local_irq_restore(flags); 63 + } 64 + 65 + void __trace_opal_exit(long opcode, unsigned long retval) 66 + { 67 + unsigned long flags; 68 + unsigned int *depth; 69 + 70 + local_irq_save(flags); 71 + 72 + depth = &__get_cpu_var(opal_trace_depth); 73 + 74 + if (*depth) 75 + goto out; 76 + 77 + (*depth)++; 78 + trace_opal_exit(opcode, retval); 79 + preempt_enable(); 80 + (*depth)--; 81 + 82 + out: 83 + local_irq_restore(flags); 84 + }
+106 -9
arch/powerpc/platforms/powernv/opal-wrappers.S
··· 13 13 #include <asm/hvcall.h> 14 14 #include <asm/asm-offsets.h> 15 15 #include <asm/opal.h> 16 + #include <asm/jump_label.h> 17 + 18 + .section ".text" 19 + 20 + #ifdef CONFIG_TRACEPOINTS 21 + #ifdef CONFIG_JUMP_LABEL 22 + #define OPAL_BRANCH(LABEL) \ 23 + ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) 24 + #else 25 + 26 + .section ".toc","aw" 27 + 28 + .globl opal_tracepoint_refcount 29 + opal_tracepoint_refcount: 30 + .llong 0 31 + 32 + .section ".text" 33 + 34 + /* 35 + * We branch around this in early init by using an unconditional cpu 36 + * feature. 37 + */ 38 + #define OPAL_BRANCH(LABEL) \ 39 + BEGIN_FTR_SECTION; \ 40 + b 1f; \ 41 + END_FTR_SECTION(0, 1); \ 42 + ld r12,opal_tracepoint_refcount@toc(r2); \ 43 + std r12,32(r1); \ 44 + cmpdi r12,0; \ 45 + bne- LABEL; \ 46 + 1: 47 + 48 + #endif 49 + 50 + #else 51 + #define OPAL_BRANCH(LABEL) 52 + #endif 16 53 17 54 /* TODO: 18 55 * 19 56 * - Trace irqs in/off (needs saving/restoring all args, argh...) 20 57 * - Get r11 feed up by Dave so I can have better register usage 21 58 */ 59 + 22 60 #define OPAL_CALL(name, token) \ 23 61 _GLOBAL(name); \ 24 62 mflr r0; \ 25 - mfcr r12; \ 26 63 std r0,16(r1); \ 64 + li r0,token; \ 65 + OPAL_BRANCH(opal_tracepoint_entry) \ 66 + mfcr r12; \ 27 67 stw r12,8(r1); \ 28 68 std r1,PACAR1(r13); \ 29 - li r0,0; \ 69 + li r11,0; \ 30 70 mfmsr r12; \ 31 - ori r0,r0,MSR_EE; \ 71 + ori r11,r11,MSR_EE; \ 32 72 std r12,PACASAVEDMSR(r13); \ 33 - andc r12,r12,r0; \ 73 + andc r12,r12,r11; \ 34 74 mtmsrd r12,1; \ 35 - LOAD_REG_ADDR(r0,opal_return); \ 36 - mtlr r0; \ 37 - li r0,MSR_DR|MSR_IR|MSR_LE;\ 38 - andc r12,r12,r0; \ 39 - li r0,token; \ 75 + LOAD_REG_ADDR(r11,opal_return); \ 76 + mtlr r11; \ 77 + li r11,MSR_DR|MSR_IR|MSR_LE;\ 78 + andc r12,r12,r11; \ 40 79 mtspr SPRN_HSRR1,r12; \ 41 80 LOAD_REG_ADDR(r11,opal); \ 42 81 ld r12,8(r11); \ ··· 99 60 mtspr SPRN_SRR1,r6; 100 61 mtcr r4; 101 62 rfid 63 + 64 + #ifdef CONFIG_TRACEPOINTS 65 + opal_tracepoint_entry: 66 + stdu r1,-STACKFRAMESIZE(r1) 67 + std r0,STK_REG(R23)(r1) 68 + std r3,STK_REG(R24)(r1) 69 + std r4,STK_REG(R25)(r1) 70 + std r5,STK_REG(R26)(r1) 71 + std r6,STK_REG(R27)(r1) 72 + std r7,STK_REG(R28)(r1) 73 + std r8,STK_REG(R29)(r1) 74 + std r9,STK_REG(R30)(r1) 75 + std r10,STK_REG(R31)(r1) 76 + mr r3,r0 77 + addi r4,r1,STK_REG(R24) 78 + bl __trace_opal_entry 79 + ld r0,STK_REG(R23)(r1) 80 + ld r3,STK_REG(R24)(r1) 81 + ld r4,STK_REG(R25)(r1) 82 + ld r5,STK_REG(R26)(r1) 83 + ld r6,STK_REG(R27)(r1) 84 + ld r7,STK_REG(R28)(r1) 85 + ld r8,STK_REG(R29)(r1) 86 + ld r9,STK_REG(R30)(r1) 87 + ld r10,STK_REG(R31)(r1) 88 + LOAD_REG_ADDR(r11,opal_tracepoint_return) 89 + mfcr r12 90 + std r11,16(r1) 91 + stw r12,8(r1) 92 + std r1,PACAR1(r13) 93 + li r11,0 94 + mfmsr r12 95 + ori r11,r11,MSR_EE 96 + std r12,PACASAVEDMSR(r13) 97 + andc r12,r12,r11 98 + mtmsrd r12,1 99 + LOAD_REG_ADDR(r11,opal_return) 100 + mtlr r11 101 + li r11,MSR_DR|MSR_IR|MSR_LE 102 + andc r12,r12,r11 103 + mtspr SPRN_HSRR1,r12 104 + LOAD_REG_ADDR(r11,opal) 105 + ld r12,8(r11) 106 + ld r2,0(r11) 107 + mtspr SPRN_HSRR0,r12 108 + hrfid 109 + 110 + opal_tracepoint_return: 111 + std r3,STK_REG(R31)(r1) 112 + mr r4,r3 113 + ld r0,STK_REG(R23)(r1) 114 + bl __trace_opal_exit 115 + ld r3,STK_REG(R31)(r1) 116 + addi r1,r1,STACKFRAMESIZE 117 + ld r0,16(r1) 118 + mtlr r0 119 + blr 120 + #endif 102 121 103 122 OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); 104 123 OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);