Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sh: ftrace: Make code modification NMI safe.

This cribs the x86 implementation of ftrace_nmi_enter() and friends to
make ftrace_modify_code() NMI safe, particularly on SMP configurations.

For additional notes on the problems involved, see the comment below
ftrace_call_replace().

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

+146 -1
+1
arch/sh/Kconfig
··· 38 38 select HAVE_DYNAMIC_FTRACE 39 39 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 40 40 select HAVE_FTRACE_SYSCALLS 41 + select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE 41 42 select HAVE_FUNCTION_GRAPH_TRACER 42 43 select HAVE_ARCH_KGDB 43 44 select ARCH_HIBERNATION_POSSIBLE if MMU
+145 -1
arch/sh/kernel/ftrace.c
··· 62 62 return ftrace_replaced_code; 63 63 } 64 64 65 + /* 66 + * Modifying code must take extra care. On an SMP machine, if 67 + * the code being modified is also being executed on another CPU 68 + * that CPU will have undefined results and possibly take a GPF. 69 + * We use kstop_machine to stop other CPUS from exectuing code. 70 + * But this does not stop NMIs from happening. We still need 71 + * to protect against that. We separate out the modification of 72 + * the code to take care of this. 73 + * 74 + * Two buffers are added: An IP buffer and a "code" buffer. 75 + * 76 + * 1) Put the instruction pointer into the IP buffer 77 + * and the new code into the "code" buffer. 78 + * 2) Wait for any running NMIs to finish and set a flag that says 79 + * we are modifying code, it is done in an atomic operation. 80 + * 3) Write the code 81 + * 4) clear the flag. 82 + * 5) Wait for any running NMIs to finish. 83 + * 84 + * If an NMI is executed, the first thing it does is to call 85 + * "ftrace_nmi_enter". This will check if the flag is set to write 86 + * and if it is, it will write what is in the IP and "code" buffers. 87 + * 88 + * The trick is, it does not matter if everyone is writing the same 89 + * content to the code location. Also, if a CPU is executing code 90 + * it is OK to write to that code location if the contents being written 91 + * are the same as what exists. 92 + */ 93 + #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ 94 + static atomic_t nmi_running = ATOMIC_INIT(0); 95 + static int mod_code_status; /* holds return value of text write */ 96 + static void *mod_code_ip; /* holds the IP to write to */ 97 + static void *mod_code_newcode; /* holds the text to write to the IP */ 98 + 99 + static unsigned nmi_wait_count; 100 + static atomic_t nmi_update_count = ATOMIC_INIT(0); 101 + 102 + int ftrace_arch_read_dyn_info(char *buf, int size) 103 + { 104 + int r; 105 + 106 + r = snprintf(buf, size, "%u %u", 107 + nmi_wait_count, 108 + atomic_read(&nmi_update_count)); 109 + return r; 110 + } 111 + 112 + static void clear_mod_flag(void) 113 + { 114 + int old = atomic_read(&nmi_running); 115 + 116 + for (;;) { 117 + int new = old & ~MOD_CODE_WRITE_FLAG; 118 + 119 + if (old == new) 120 + break; 121 + 122 + old = atomic_cmpxchg(&nmi_running, old, new); 123 + } 124 + } 125 + 126 + static void ftrace_mod_code(void) 127 + { 128 + /* 129 + * Yes, more than one CPU process can be writing to mod_code_status. 130 + * (and the code itself) 131 + * But if one were to fail, then they all should, and if one were 132 + * to succeed, then they all should. 133 + */ 134 + mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 135 + MCOUNT_INSN_SIZE); 136 + 137 + /* if we fail, then kill any new writers */ 138 + if (mod_code_status) 139 + clear_mod_flag(); 140 + } 141 + 142 + void ftrace_nmi_enter(void) 143 + { 144 + if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 145 + smp_rmb(); 146 + ftrace_mod_code(); 147 + atomic_inc(&nmi_update_count); 148 + } 149 + /* Must have previous changes seen before executions */ 150 + smp_mb(); 151 + } 152 + 153 + void ftrace_nmi_exit(void) 154 + { 155 + /* Finish all executions before clearing nmi_running */ 156 + smp_mb(); 157 + atomic_dec(&nmi_running); 158 + } 159 + 160 + static void wait_for_nmi_and_set_mod_flag(void) 161 + { 162 + if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) 163 + return; 164 + 165 + do { 166 + cpu_relax(); 167 + } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); 168 + 169 + nmi_wait_count++; 170 + } 171 + 172 + static void wait_for_nmi(void) 173 + { 174 + if (!atomic_read(&nmi_running)) 175 + return; 176 + 177 + do { 178 + cpu_relax(); 179 + } while (atomic_read(&nmi_running)); 180 + 181 + nmi_wait_count++; 182 + } 183 + 184 + static int 185 + do_ftrace_mod_code(unsigned long ip, void *new_code) 186 + { 187 + mod_code_ip = (void *)ip; 188 + mod_code_newcode = new_code; 189 + 190 + /* The buffers need to be visible before we let NMIs write them */ 191 + smp_mb(); 192 + 193 + wait_for_nmi_and_set_mod_flag(); 194 + 195 + /* Make sure all running NMIs have finished before we write the code */ 196 + smp_mb(); 197 + 198 + ftrace_mod_code(); 199 + 200 + /* Make sure the write happens before clearing the bit */ 201 + smp_mb(); 202 + 203 + clear_mod_flag(); 204 + wait_for_nmi(); 205 + 206 + return mod_code_status; 207 + } 208 + 65 209 static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, 66 210 unsigned char *new_code) 67 211 { ··· 230 86 return -EINVAL; 231 87 232 88 /* replace the text with the new text */ 233 - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 89 + if (do_ftrace_mod_code(ip, new_code)) 234 90 return -EPERM; 235 91 236 92 flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);