Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'core/kprobes' into perf/core, to pick up a completed branch

Signed-off-by: Ingo Molnar <mingo@kernel.org>

+614 -1141
+2 -2
arch/arm/kernel/Makefile
··· 53 53 obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o 54 54 obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o 55 55 obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o 56 - obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o 57 - obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o 56 + obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o 57 + obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o 58 58 obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o 59 59 obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o 60 60 # Main staffs in KPROBES are in arch/arm/probes/ .
+2 -8
arch/arm/kernel/ftrace.c
··· 22 22 #include <asm/ftrace.h> 23 23 #include <asm/insn.h> 24 24 #include <asm/set_memory.h> 25 + #include <asm/patch.h> 25 26 26 27 #ifdef CONFIG_THUMB2_KERNEL 27 28 #define NOP 0xf85deb04 /* pop.w {lr} */ ··· 36 35 { 37 36 int *command = data; 38 37 39 - set_kernel_text_rw(); 40 38 ftrace_modify_all_code(*command); 41 - set_kernel_text_ro(); 42 39 43 40 return 0; 44 41 } ··· 58 59 59 60 int ftrace_arch_code_modify_prepare(void) 60 61 { 61 - set_all_modules_text_rw(); 62 62 return 0; 63 63 } 64 64 65 65 int ftrace_arch_code_modify_post_process(void) 66 66 { 67 - set_all_modules_text_ro(); 68 67 /* Make sure any TLB misses during machine stop are cleared. */ 69 68 flush_tlb_all(); 70 69 return 0; ··· 94 97 return -EINVAL; 95 98 } 96 99 97 - if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE)) 98 - return -EPERM; 99 - 100 - flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); 100 + __patch_text((void *)pc, new); 101 101 102 102 return 0; 103 103 }
-12
arch/nds32/kernel/ftrace.c
··· 89 89 return 0; 90 90 } 91 91 92 - int ftrace_arch_code_modify_prepare(void) 93 - { 94 - set_all_modules_text_rw(); 95 - return 0; 96 - } 97 - 98 - int ftrace_arch_code_modify_post_process(void) 99 - { 100 - set_all_modules_text_ro(); 101 - return 0; 102 - } 103 - 104 92 static unsigned long gen_sethi_insn(unsigned long addr) 105 93 { 106 94 unsigned long opcode = 0x46000000;
-2
arch/x86/include/asm/ftrace.h
··· 47 47 /* No extra data needed for x86 */ 48 48 }; 49 49 50 - int ftrace_int3_handler(struct pt_regs *regs); 51 - 52 50 #define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR 53 51 54 52 #endif /* CONFIG_DYNAMIC_FTRACE */
+5 -9
arch/x86/include/asm/kprobes.h
··· 11 11 12 12 #include <asm-generic/kprobes.h> 13 13 14 - #define BREAKPOINT_INSTRUCTION 0xcc 15 - 16 14 #ifdef CONFIG_KPROBES 17 15 #include <linux/types.h> 18 16 #include <linux/ptrace.h> 19 17 #include <linux/percpu.h> 18 + #include <asm/text-patching.h> 20 19 #include <asm/insn.h> 21 20 22 21 #define __ARCH_WANT_KPROBES_INSN_SLOT ··· 24 25 struct kprobe; 25 26 26 27 typedef u8 kprobe_opcode_t; 27 - #define RELATIVEJUMP_OPCODE 0xe9 28 - #define RELATIVEJUMP_SIZE 5 29 - #define RELATIVECALL_OPCODE 0xe8 30 - #define RELATIVE_ADDR_SIZE 4 28 + 31 29 #define MAX_STACK_SIZE 64 32 30 #define CUR_STACK_SIZE(ADDR) \ 33 31 (current_top_of_stack() - (unsigned long)(ADDR)) ··· 39 43 extern __visible kprobe_opcode_t optprobe_template_val[]; 40 44 extern __visible kprobe_opcode_t optprobe_template_call[]; 41 45 extern __visible kprobe_opcode_t optprobe_template_end[]; 42 - #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) 46 + #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + DISP32_SIZE) 43 47 #define MAX_OPTINSN_SIZE \ 44 48 (((unsigned long)optprobe_template_end - \ 45 49 (unsigned long)optprobe_template_entry) + \ 46 - MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) 50 + MAX_OPTIMIZED_LENGTH + JMP32_INSN_SIZE) 47 51 48 52 extern const int kretprobe_blacklist_size; 49 53 ··· 69 73 70 74 struct arch_optimized_insn { 71 75 /* copy of the original instructions */ 72 - kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE]; 76 + kprobe_opcode_t copied_insn[DISP32_SIZE]; 73 77 /* detour code buffer */ 74 78 kprobe_opcode_t *insn; 75 79 /* the size of instructions copied to detour code buffer */
-2
arch/x86/include/asm/set_memory.h
··· 81 81 int set_direct_map_default_noflush(struct page *page); 82 82 83 83 extern int kernel_set_to_readonly; 84 - void set_kernel_text_rw(void); 85 - void set_kernel_text_ro(void); 86 84 87 85 #ifdef CONFIG_X86_64 88 86 static inline int set_mce_nospec(unsigned long pfn)
+67 -19
arch/x86/include/asm/text-patching.h
··· 25 25 */ 26 26 #define POKE_MAX_OPCODE_SIZE 5 27 27 28 - struct text_poke_loc { 29 - void *addr; 30 - int len; 31 - s32 rel32; 32 - u8 opcode; 33 - const u8 text[POKE_MAX_OPCODE_SIZE]; 34 - }; 35 - 36 28 extern void text_poke_early(void *addr, const void *opcode, size_t len); 37 29 38 30 /* ··· 42 50 * an inconsistent instruction while you patch. 43 51 */ 44 52 extern void *text_poke(void *addr, const void *opcode, size_t len); 53 + extern void text_poke_sync(void); 45 54 extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); 46 55 extern int poke_int3_handler(struct pt_regs *regs); 47 56 extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); 48 - extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); 49 - extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr, 50 - const void *opcode, size_t len, const void *emulate); 51 - extern int after_bootmem; 52 - extern __ro_after_init struct mm_struct *poking_mm; 53 - extern __ro_after_init unsigned long poking_addr; 54 57 55 - #ifndef CONFIG_UML_X86 56 - static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) 57 - { 58 - regs->ip = ip; 59 - } 58 + extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate); 59 + extern void text_poke_finish(void); 60 60 61 61 #define INT3_INSN_SIZE 1 62 62 #define INT3_INSN_OPCODE 0xCC ··· 62 78 #define JMP8_INSN_SIZE 2 63 79 #define JMP8_INSN_OPCODE 0xEB 64 80 81 + #define DISP32_SIZE 4 82 + 83 + static inline int text_opcode_size(u8 opcode) 84 + { 85 + int size = 0; 86 + 87 + #define __CASE(insn) \ 88 + case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break 89 + 90 + switch(opcode) { 91 + __CASE(INT3); 92 + __CASE(CALL); 93 + __CASE(JMP32); 94 + __CASE(JMP8); 95 + } 96 + 97 + #undef __CASE 98 + 99 + return size; 100 + } 101 + 102 + union text_poke_insn { 103 + u8 text[POKE_MAX_OPCODE_SIZE]; 104 + struct { 105 + u8 opcode; 106 + s32 disp; 107 + } __attribute__((packed)); 108 + }; 109 + 110 + static __always_inline 111 + void *text_gen_insn(u8 opcode, const void *addr, const void *dest) 112 + { 113 + static union text_poke_insn insn; /* per instance */ 114 + int size = text_opcode_size(opcode); 115 + 116 + insn.opcode = opcode; 117 + 118 + if (size > 1) { 119 + insn.disp = (long)dest - (long)(addr + size); 120 + if (size == 2) { 121 + /* 122 + * Ensure that for JMP9 the displacement 123 + * actually fits the signed byte. 124 + */ 125 + BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); 126 + } 127 + } 128 + 129 + return &insn.text; 130 + } 131 + 132 + extern int after_bootmem; 133 + extern __ro_after_init struct mm_struct *poking_mm; 134 + extern __ro_after_init unsigned long poking_addr; 135 + 136 + #ifndef CONFIG_UML_X86 137 + static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) 138 + { 139 + regs->ip = ip; 140 + } 141 + 65 142 static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) 66 143 { 67 144 /* ··· 130 85 * stack where the break point happened, and the saving of 131 86 * pt_regs. We can extend the original stack because of 132 87 * this gap. See the idtentry macro's create_gap option. 88 + * 89 + * Similarly entry_32.S will have a gap on the stack for (any) hardware 90 + * exception and pt_regs; see FIXUP_FRAME. 133 91 */ 134 92 regs->sp -= sizeof(unsigned long); 135 93 *(unsigned long *)regs->sp = val;
+105 -25
arch/x86/kernel/alternative.c
··· 936 936 sync_core(); 937 937 } 938 938 939 + void text_poke_sync(void) 940 + { 941 + on_each_cpu(do_sync_core, NULL, 1); 942 + } 943 + 944 + struct text_poke_loc { 945 + s32 rel_addr; /* addr := _stext + rel_addr */ 946 + s32 rel32; 947 + u8 opcode; 948 + const u8 text[POKE_MAX_OPCODE_SIZE]; 949 + }; 950 + 939 951 static struct bp_patching_desc { 940 952 struct text_poke_loc *vec; 941 953 int nr_entries; 942 954 } bp_patching; 943 955 944 - static int patch_cmp(const void *key, const void *elt) 956 + static inline void *text_poke_addr(struct text_poke_loc *tp) 957 + { 958 + return _stext + tp->rel_addr; 959 + } 960 + 961 + static int notrace patch_cmp(const void *key, const void *elt) 945 962 { 946 963 struct text_poke_loc *tp = (struct text_poke_loc *) elt; 947 964 948 - if (key < tp->addr) 965 + if (key < text_poke_addr(tp)) 949 966 return -1; 950 - if (key > tp->addr) 967 + if (key > text_poke_addr(tp)) 951 968 return 1; 952 969 return 0; 953 970 } 954 971 NOKPROBE_SYMBOL(patch_cmp); 955 972 956 - int poke_int3_handler(struct pt_regs *regs) 973 + int notrace poke_int3_handler(struct pt_regs *regs) 957 974 { 958 975 struct text_poke_loc *tp; 959 976 void *ip; 977 + int len; 960 978 961 979 /* 962 980 * Having observed our INT3 instruction, we now must observe ··· 1010 992 return 0; 1011 993 } else { 1012 994 tp = bp_patching.vec; 1013 - if (tp->addr != ip) 995 + if (text_poke_addr(tp) != ip) 1014 996 return 0; 1015 997 } 1016 998 1017 - ip += tp->len; 999 + len = text_opcode_size(tp->opcode); 1000 + ip += len; 1018 1001 1019 1002 switch (tp->opcode) { 1020 1003 case INT3_INSN_OPCODE: ··· 1042 1023 } 1043 1024 NOKPROBE_SYMBOL(poke_int3_handler); 1044 1025 1026 + #define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) 1027 + static struct text_poke_loc tp_vec[TP_VEC_MAX]; 1028 + static int tp_vec_nr; 1029 + 1045 1030 /** 1046 1031 * text_poke_bp_batch() -- update instructions on live kernel on SMP 1047 1032 * @tp: vector of instructions to patch ··· 1067 1044 * replacing opcode 1068 1045 * - sync cores 1069 1046 */ 1070 - void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) 1047 + static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) 1071 1048 { 1072 1049 unsigned char int3 = INT3_INSN_OPCODE; 1073 1050 unsigned int i; ··· 1088 1065 * First step: add a int3 trap to the address that will be patched. 1089 1066 */ 1090 1067 for (i = 0; i < nr_entries; i++) 1091 - text_poke(tp[i].addr, &int3, sizeof(int3)); 1068 + text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE); 1092 1069 1093 - on_each_cpu(do_sync_core, NULL, 1); 1070 + text_poke_sync(); 1094 1071 1095 1072 /* 1096 1073 * Second step: update all but the first byte of the patched range. 1097 1074 */ 1098 1075 for (do_sync = 0, i = 0; i < nr_entries; i++) { 1099 - if (tp[i].len - sizeof(int3) > 0) { 1100 - text_poke((char *)tp[i].addr + sizeof(int3), 1101 - (const char *)tp[i].text + sizeof(int3), 1102 - tp[i].len - sizeof(int3)); 1076 + int len = text_opcode_size(tp[i].opcode); 1077 + 1078 + if (len - INT3_INSN_SIZE > 0) { 1079 + text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, 1080 + (const char *)tp[i].text + INT3_INSN_SIZE, 1081 + len - INT3_INSN_SIZE); 1103 1082 do_sync++; 1104 1083 } 1105 1084 } ··· 1112 1087 * not necessary and we'd be safe even without it. But 1113 1088 * better safe than sorry (plus there's not only Intel). 1114 1089 */ 1115 - on_each_cpu(do_sync_core, NULL, 1); 1090 + text_poke_sync(); 1116 1091 } 1117 1092 1118 1093 /* ··· 1123 1098 if (tp[i].text[0] == INT3_INSN_OPCODE) 1124 1099 continue; 1125 1100 1126 - text_poke(tp[i].addr, tp[i].text, sizeof(int3)); 1101 + text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE); 1127 1102 do_sync++; 1128 1103 } 1129 1104 1130 1105 if (do_sync) 1131 - on_each_cpu(do_sync_core, NULL, 1); 1106 + text_poke_sync(); 1132 1107 1133 1108 /* 1134 1109 * sync_core() implies an smp_mb() and orders this store against 1135 1110 * the writing of the new instruction. 1136 1111 */ 1137 - bp_patching.vec = NULL; 1138 1112 bp_patching.nr_entries = 0; 1113 + /* 1114 + * This sync_core () call ensures that all INT3 handlers in progress 1115 + * have finished. This allows poke_int3_handler() after this to 1116 + * avoid touching bp_paching.vec by checking nr_entries == 0. 1117 + */ 1118 + text_poke_sync(); 1119 + bp_patching.vec = NULL; 1139 1120 } 1140 1121 1141 1122 void text_poke_loc_init(struct text_poke_loc *tp, void *addr, ··· 1149 1118 { 1150 1119 struct insn insn; 1151 1120 1152 - if (!opcode) 1153 - opcode = (void *)tp->text; 1154 - else 1155 - memcpy((void *)tp->text, opcode, len); 1156 - 1121 + memcpy((void *)tp->text, opcode, len); 1157 1122 if (!emulate) 1158 1123 emulate = opcode; 1159 1124 ··· 1159 1132 BUG_ON(!insn_complete(&insn)); 1160 1133 BUG_ON(len != insn.length); 1161 1134 1162 - tp->addr = addr; 1163 - tp->len = len; 1135 + tp->rel_addr = addr - (void *)_stext; 1164 1136 tp->opcode = insn.opcode.bytes[0]; 1165 1137 1166 1138 switch (tp->opcode) { ··· 1193 1167 } 1194 1168 } 1195 1169 1170 + /* 1171 + * We hard rely on the tp_vec being ordered; ensure this is so by flushing 1172 + * early if needed. 1173 + */ 1174 + static bool tp_order_fail(void *addr) 1175 + { 1176 + struct text_poke_loc *tp; 1177 + 1178 + if (!tp_vec_nr) 1179 + return false; 1180 + 1181 + if (!addr) /* force */ 1182 + return true; 1183 + 1184 + tp = &tp_vec[tp_vec_nr - 1]; 1185 + if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr) 1186 + return true; 1187 + 1188 + return false; 1189 + } 1190 + 1191 + static void text_poke_flush(void *addr) 1192 + { 1193 + if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) { 1194 + text_poke_bp_batch(tp_vec, tp_vec_nr); 1195 + tp_vec_nr = 0; 1196 + } 1197 + } 1198 + 1199 + void text_poke_finish(void) 1200 + { 1201 + text_poke_flush(NULL); 1202 + } 1203 + 1204 + void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate) 1205 + { 1206 + struct text_poke_loc *tp; 1207 + 1208 + if (unlikely(system_state == SYSTEM_BOOTING)) { 1209 + text_poke_early(addr, opcode, len); 1210 + return; 1211 + } 1212 + 1213 + text_poke_flush(addr); 1214 + 1215 + tp = &tp_vec[tp_vec_nr++]; 1216 + text_poke_loc_init(tp, addr, opcode, len, emulate); 1217 + } 1218 + 1196 1219 /** 1197 1220 * text_poke_bp() -- update instructions on live kernel on SMP 1198 1221 * @addr: address to patch ··· 1253 1178 * dynamically allocated memory. This function should be used when it is 1254 1179 * not possible to allocate memory. 1255 1180 */ 1256 - void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) 1181 + void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) 1257 1182 { 1258 1183 struct text_poke_loc tp; 1184 + 1185 + if (unlikely(system_state == SYSTEM_BOOTING)) { 1186 + text_poke_early(addr, opcode, len); 1187 + return; 1188 + } 1259 1189 1260 1190 text_poke_loc_init(&tp, addr, opcode, len, emulate); 1261 1191 text_poke_bp_batch(&tp, 1);
+135 -571
arch/x86/kernel/ftrace.c
··· 34 34 35 35 #ifdef CONFIG_DYNAMIC_FTRACE 36 36 37 + static int ftrace_poke_late = 0; 38 + 37 39 int ftrace_arch_code_modify_prepare(void) 38 40 __acquires(&text_mutex) 39 41 { ··· 45 43 * ftrace has it set to "read/write". 46 44 */ 47 45 mutex_lock(&text_mutex); 48 - set_kernel_text_rw(); 49 - set_all_modules_text_rw(); 46 + ftrace_poke_late = 1; 50 47 return 0; 51 48 } 52 49 53 50 int ftrace_arch_code_modify_post_process(void) 54 51 __releases(&text_mutex) 55 52 { 56 - set_all_modules_text_ro(); 57 - set_kernel_text_ro(); 53 + /* 54 + * ftrace_make_{call,nop}() may be called during 55 + * module load, and we need to finish the text_poke_queue() 56 + * that they do, here. 57 + */ 58 + text_poke_finish(); 59 + ftrace_poke_late = 0; 58 60 mutex_unlock(&text_mutex); 59 61 return 0; 60 62 } 61 63 62 - union ftrace_code_union { 63 - char code[MCOUNT_INSN_SIZE]; 64 - struct { 65 - unsigned char op; 66 - int offset; 67 - } __attribute__((packed)); 68 - }; 69 - 70 - static int ftrace_calc_offset(long ip, long addr) 71 - { 72 - return (int)(addr - ip); 73 - } 74 - 75 - static unsigned char * 76 - ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr) 77 - { 78 - static union ftrace_code_union calc; 79 - 80 - calc.op = op; 81 - calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 82 - 83 - return calc.code; 84 - } 85 - 86 - static unsigned char * 87 - ftrace_call_replace(unsigned long ip, unsigned long addr) 88 - { 89 - return ftrace_text_replace(0xe8, ip, addr); 90 - } 91 - 92 - static inline int 93 - within(unsigned long addr, unsigned long start, unsigned long end) 94 - { 95 - return addr >= start && addr < end; 96 - } 97 - 98 - static unsigned long text_ip_addr(unsigned long ip) 99 - { 100 - /* 101 - * On x86_64, kernel text mappings are mapped read-only, so we use 102 - * the kernel identity mapping instead of the kernel text mapping 103 - * to modify the kernel text. 104 - * 105 - * For 32bit kernels, these mappings are same and we can use 106 - * kernel identity mapping to modify code. 107 - */ 108 - if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 109 - ip = (unsigned long)__va(__pa_symbol(ip)); 110 - 111 - return ip; 112 - } 113 - 114 - static const unsigned char *ftrace_nop_replace(void) 64 + static const char *ftrace_nop_replace(void) 115 65 { 116 66 return ideal_nops[NOP_ATOMIC5]; 117 67 } 118 68 119 - static int 120 - ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, 121 - unsigned const char *new_code) 69 + static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) 122 70 { 123 - unsigned char replaced[MCOUNT_INSN_SIZE]; 71 + return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); 72 + } 124 73 125 - ftrace_expected = old_code; 74 + static int ftrace_verify_code(unsigned long ip, const char *old_code) 75 + { 76 + char cur_code[MCOUNT_INSN_SIZE]; 126 77 127 78 /* 128 79 * Note: ··· 84 129 * Carefully read and modify the code with probe_kernel_*(), and make 85 130 * sure what we read is what we expected it to be before modifying it. 86 131 */ 87 - 88 132 /* read the text we want to modify */ 89 - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 133 + if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { 134 + WARN_ON(1); 90 135 return -EFAULT; 136 + } 91 137 92 138 /* Make sure it is what we expect it to be */ 93 - if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 139 + if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) { 140 + WARN_ON(1); 94 141 return -EINVAL; 95 - 96 - ip = text_ip_addr(ip); 97 - 98 - /* replace the text with the new text */ 99 - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 100 - return -EPERM; 101 - 102 - sync_core(); 142 + } 103 143 104 144 return 0; 105 145 } 106 146 107 - int ftrace_make_nop(struct module *mod, 108 - struct dyn_ftrace *rec, unsigned long addr) 147 + /* 148 + * Marked __ref because it calls text_poke_early() which is .init.text. That is 149 + * ok because that call will happen early, during boot, when .init sections are 150 + * still present. 151 + */ 152 + static int __ref 153 + ftrace_modify_code_direct(unsigned long ip, const char *old_code, 154 + const char *new_code) 109 155 { 110 - unsigned const char *new, *old; 156 + int ret = ftrace_verify_code(ip, old_code); 157 + if (ret) 158 + return ret; 159 + 160 + /* replace the text with the new text */ 161 + if (ftrace_poke_late) 162 + text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL); 163 + else 164 + text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE); 165 + return 0; 166 + } 167 + 168 + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) 169 + { 111 170 unsigned long ip = rec->ip; 171 + const char *new, *old; 112 172 113 173 old = ftrace_call_replace(ip, addr); 114 174 new = ftrace_nop_replace(); ··· 137 167 * just modify the code directly. 138 168 */ 139 169 if (addr == MCOUNT_ADDR) 140 - return ftrace_modify_code_direct(rec->ip, old, new); 170 + return ftrace_modify_code_direct(ip, old, new); 141 171 142 - ftrace_expected = NULL; 143 - 144 - /* Normal cases use add_brk_on_nop */ 172 + /* 173 + * x86 overrides ftrace_replace_code -- this function will never be used 174 + * in this case. 175 + */ 145 176 WARN_ONCE(1, "invalid use of ftrace_make_nop"); 146 177 return -EINVAL; 147 178 } 148 179 149 180 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 150 181 { 151 - unsigned const char *new, *old; 152 182 unsigned long ip = rec->ip; 183 + const char *new, *old; 153 184 154 185 old = ftrace_nop_replace(); 155 186 new = ftrace_call_replace(ip, addr); ··· 158 187 /* Should only be called when module is loaded */ 159 188 return ftrace_modify_code_direct(rec->ip, old, new); 160 189 } 161 - 162 - /* 163 - * The modifying_ftrace_code is used to tell the breakpoint 164 - * handler to call ftrace_int3_handler(). If it fails to 165 - * call this handler for a breakpoint added by ftrace, then 166 - * the kernel may crash. 167 - * 168 - * As atomic_writes on x86 do not need a barrier, we do not 169 - * need to add smp_mb()s for this to work. It is also considered 170 - * that we can not read the modifying_ftrace_code before 171 - * executing the breakpoint. That would be quite remarkable if 172 - * it could do that. Here's the flow that is required: 173 - * 174 - * CPU-0 CPU-1 175 - * 176 - * atomic_inc(mfc); 177 - * write int3s 178 - * <trap-int3> // implicit (r)mb 179 - * if (atomic_read(mfc)) 180 - * call ftrace_int3_handler() 181 - * 182 - * Then when we are finished: 183 - * 184 - * atomic_dec(mfc); 185 - * 186 - * If we hit a breakpoint that was not set by ftrace, it does not 187 - * matter if ftrace_int3_handler() is called or not. It will 188 - * simply be ignored. But it is crucial that a ftrace nop/caller 189 - * breakpoint is handled. No other user should ever place a 190 - * breakpoint on an ftrace nop/caller location. It must only 191 - * be done by this code. 192 - */ 193 - atomic_t modifying_ftrace_code __read_mostly; 194 - 195 - static int 196 - ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 197 - unsigned const char *new_code); 198 190 199 191 /* 200 192 * Should never be called: ··· 171 237 unsigned long addr) 172 238 { 173 239 WARN_ON(1); 174 - ftrace_expected = NULL; 175 240 return -EINVAL; 176 - } 177 - 178 - static unsigned long ftrace_update_func; 179 - static unsigned long ftrace_update_func_call; 180 - 181 - static int update_ftrace_func(unsigned long ip, void *new) 182 - { 183 - unsigned char old[MCOUNT_INSN_SIZE]; 184 - int ret; 185 - 186 - memcpy(old, (void *)ip, MCOUNT_INSN_SIZE); 187 - 188 - ftrace_update_func = ip; 189 - /* Make sure the breakpoints see the ftrace_update_func update */ 190 - smp_wmb(); 191 - 192 - /* See comment above by declaration of modifying_ftrace_code */ 193 - atomic_inc(&modifying_ftrace_code); 194 - 195 - ret = ftrace_modify_code(ip, old, new); 196 - 197 - atomic_dec(&modifying_ftrace_code); 198 - 199 - return ret; 200 241 } 201 242 202 243 int ftrace_update_ftrace_func(ftrace_func_t func) 203 244 { 204 - unsigned long ip = (unsigned long)(&ftrace_call); 205 - unsigned char *new; 206 - int ret; 207 - 208 - ftrace_update_func_call = (unsigned long)func; 209 - 210 - new = ftrace_call_replace(ip, (unsigned long)func); 211 - ret = update_ftrace_func(ip, new); 212 - 213 - /* Also update the regs callback function */ 214 - if (!ret) { 215 - ip = (unsigned long)(&ftrace_regs_call); 216 - new = ftrace_call_replace(ip, (unsigned long)func); 217 - ret = update_ftrace_func(ip, new); 218 - } 219 - 220 - return ret; 221 - } 222 - 223 - static nokprobe_inline int is_ftrace_caller(unsigned long ip) 224 - { 225 - if (ip == ftrace_update_func) 226 - return 1; 227 - 228 - return 0; 229 - } 230 - 231 - /* 232 - * A breakpoint was added to the code address we are about to 233 - * modify, and this is the handle that will just skip over it. 234 - * We are either changing a nop into a trace call, or a trace 235 - * call to a nop. While the change is taking place, we treat 236 - * it just like it was a nop. 237 - */ 238 - int ftrace_int3_handler(struct pt_regs *regs) 239 - { 240 245 unsigned long ip; 246 + const char *new; 241 247 242 - if (WARN_ON_ONCE(!regs)) 243 - return 0; 248 + ip = (unsigned long)(&ftrace_call); 249 + new = ftrace_call_replace(ip, (unsigned long)func); 250 + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); 244 251 245 - ip = regs->ip - INT3_INSN_SIZE; 246 - 247 - if (ftrace_location(ip)) { 248 - int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); 249 - return 1; 250 - } else if (is_ftrace_caller(ip)) { 251 - if (!ftrace_update_func_call) { 252 - int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); 253 - return 1; 254 - } 255 - int3_emulate_call(regs, ftrace_update_func_call); 256 - return 1; 257 - } 252 + ip = (unsigned long)(&ftrace_regs_call); 253 + new = ftrace_call_replace(ip, (unsigned long)func); 254 + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); 258 255 259 256 return 0; 260 - } 261 - NOKPROBE_SYMBOL(ftrace_int3_handler); 262 - 263 - static int ftrace_write(unsigned long ip, const char *val, int size) 264 - { 265 - ip = text_ip_addr(ip); 266 - 267 - if (probe_kernel_write((void *)ip, val, size)) 268 - return -EPERM; 269 - 270 - return 0; 271 - } 272 - 273 - static int add_break(unsigned long ip, const char *old) 274 - { 275 - unsigned char replaced[MCOUNT_INSN_SIZE]; 276 - unsigned char brk = BREAKPOINT_INSTRUCTION; 277 - 278 - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 279 - return -EFAULT; 280 - 281 - ftrace_expected = old; 282 - 283 - /* Make sure it is what we expect it to be */ 284 - if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) 285 - return -EINVAL; 286 - 287 - return ftrace_write(ip, &brk, 1); 288 - } 289 - 290 - static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) 291 - { 292 - unsigned const char *old; 293 - unsigned long ip = rec->ip; 294 - 295 - old = ftrace_call_replace(ip, addr); 296 - 297 - return add_break(rec->ip, old); 298 - } 299 - 300 - 301 - static int add_brk_on_nop(struct dyn_ftrace *rec) 302 - { 303 - unsigned const char *old; 304 - 305 - old = ftrace_nop_replace(); 306 - 307 - return add_break(rec->ip, old); 308 - } 309 - 310 - static int add_breakpoints(struct dyn_ftrace *rec, bool enable) 311 - { 312 - unsigned long ftrace_addr; 313 - int ret; 314 - 315 - ftrace_addr = ftrace_get_addr_curr(rec); 316 - 317 - ret = ftrace_test_record(rec, enable); 318 - 319 - switch (ret) { 320 - case FTRACE_UPDATE_IGNORE: 321 - return 0; 322 - 323 - case FTRACE_UPDATE_MAKE_CALL: 324 - /* converting nop to call */ 325 - return add_brk_on_nop(rec); 326 - 327 - case FTRACE_UPDATE_MODIFY_CALL: 328 - case FTRACE_UPDATE_MAKE_NOP: 329 - /* converting a call to a nop */ 330 - return add_brk_on_call(rec, ftrace_addr); 331 - } 332 - return 0; 333 - } 334 - 335 - /* 336 - * On error, we need to remove breakpoints. This needs to 337 - * be done caefully. If the address does not currently have a 338 - * breakpoint, we know we are done. Otherwise, we look at the 339 - * remaining 4 bytes of the instruction. If it matches a nop 340 - * we replace the breakpoint with the nop. Otherwise we replace 341 - * it with the call instruction. 342 - */ 343 - static int remove_breakpoint(struct dyn_ftrace *rec) 344 - { 345 - unsigned char ins[MCOUNT_INSN_SIZE]; 346 - unsigned char brk = BREAKPOINT_INSTRUCTION; 347 - const unsigned char *nop; 348 - unsigned long ftrace_addr; 349 - unsigned long ip = rec->ip; 350 - 351 - /* If we fail the read, just give up */ 352 - if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) 353 - return -EFAULT; 354 - 355 - /* If this does not have a breakpoint, we are done */ 356 - if (ins[0] != brk) 357 - return 0; 358 - 359 - nop = ftrace_nop_replace(); 360 - 361 - /* 362 - * If the last 4 bytes of the instruction do not match 363 - * a nop, then we assume that this is a call to ftrace_addr. 364 - */ 365 - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { 366 - /* 367 - * For extra paranoidism, we check if the breakpoint is on 368 - * a call that would actually jump to the ftrace_addr. 369 - * If not, don't touch the breakpoint, we make just create 370 - * a disaster. 371 - */ 372 - ftrace_addr = ftrace_get_addr_new(rec); 373 - nop = ftrace_call_replace(ip, ftrace_addr); 374 - 375 - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) 376 - goto update; 377 - 378 - /* Check both ftrace_addr and ftrace_old_addr */ 379 - ftrace_addr = ftrace_get_addr_curr(rec); 380 - nop = ftrace_call_replace(ip, ftrace_addr); 381 - 382 - ftrace_expected = nop; 383 - 384 - if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) 385 - return -EINVAL; 386 - } 387 - 388 - update: 389 - return ftrace_write(ip, nop, 1); 390 - } 391 - 392 - static int add_update_code(unsigned long ip, unsigned const char *new) 393 - { 394 - /* skip breakpoint */ 395 - ip++; 396 - new++; 397 - return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1); 398 - } 399 - 400 - static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) 401 - { 402 - unsigned long ip = rec->ip; 403 - unsigned const char *new; 404 - 405 - new = ftrace_call_replace(ip, addr); 406 - return add_update_code(ip, new); 407 - } 408 - 409 - static int add_update_nop(struct dyn_ftrace *rec) 410 - { 411 - unsigned long ip = rec->ip; 412 - unsigned const char *new; 413 - 414 - new = ftrace_nop_replace(); 415 - return add_update_code(ip, new); 416 - } 417 - 418 - static int add_update(struct dyn_ftrace *rec, bool enable) 419 - { 420 - unsigned long ftrace_addr; 421 - int ret; 422 - 423 - ret = ftrace_test_record(rec, enable); 424 - 425 - ftrace_addr = ftrace_get_addr_new(rec); 426 - 427 - switch (ret) { 428 - case FTRACE_UPDATE_IGNORE: 429 - return 0; 430 - 431 - case FTRACE_UPDATE_MODIFY_CALL: 432 - case FTRACE_UPDATE_MAKE_CALL: 433 - /* converting nop to call */ 434 - return add_update_call(rec, ftrace_addr); 435 - 436 - case FTRACE_UPDATE_MAKE_NOP: 437 - /* converting a call to a nop */ 438 - return add_update_nop(rec); 439 - } 440 - 441 - return 0; 442 - } 443 - 444 - static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) 445 - { 446 - unsigned long ip = rec->ip; 447 - unsigned const char *new; 448 - 449 - new = ftrace_call_replace(ip, addr); 450 - 451 - return ftrace_write(ip, new, 1); 452 - } 453 - 454 - static int finish_update_nop(struct dyn_ftrace *rec) 455 - { 456 - unsigned long ip = rec->ip; 457 - unsigned const char *new; 458 - 459 - new = ftrace_nop_replace(); 460 - 461 - return ftrace_write(ip, new, 1); 462 - } 463 - 464 - static int finish_update(struct dyn_ftrace *rec, bool enable) 465 - { 466 - unsigned long ftrace_addr; 467 - int ret; 468 - 469 - ret = ftrace_update_record(rec, enable); 470 - 471 - ftrace_addr = ftrace_get_addr_new(rec); 472 - 473 - switch (ret) { 474 - case FTRACE_UPDATE_IGNORE: 475 - return 0; 476 - 477 - case FTRACE_UPDATE_MODIFY_CALL: 478 - case FTRACE_UPDATE_MAKE_CALL: 479 - /* converting nop to call */ 480 - return finish_update_call(rec, ftrace_addr); 481 - 482 - case FTRACE_UPDATE_MAKE_NOP: 483 - /* converting a call to a nop */ 484 - return finish_update_nop(rec); 485 - } 486 - 487 - return 0; 488 - } 489 - 490 - static void do_sync_core(void *data) 491 - { 492 - sync_core(); 493 - } 494 - 495 - static void run_sync(void) 496 - { 497 - int enable_irqs; 498 - 499 - /* No need to sync if there's only one CPU */ 500 - if (num_online_cpus() == 1) 501 - return; 502 - 503 - enable_irqs = irqs_disabled(); 504 - 505 - /* We may be called with interrupts disabled (on bootup). */ 506 - if (enable_irqs) 507 - local_irq_enable(); 508 - on_each_cpu(do_sync_core, NULL, 1); 509 - if (enable_irqs) 510 - local_irq_disable(); 511 257 } 512 258 513 259 void ftrace_replace_code(int enable) 514 260 { 515 261 struct ftrace_rec_iter *iter; 516 262 struct dyn_ftrace *rec; 517 - const char *report = "adding breakpoints"; 518 - int count = 0; 263 + const char *new, *old; 519 264 int ret; 520 265 521 266 for_ftrace_rec_iter(iter) { 522 267 rec = ftrace_rec_iter_record(iter); 523 268 524 - ret = add_breakpoints(rec, enable); 525 - if (ret) 526 - goto remove_breakpoints; 527 - count++; 269 + switch (ftrace_test_record(rec, enable)) { 270 + case FTRACE_UPDATE_IGNORE: 271 + default: 272 + continue; 273 + 274 + case FTRACE_UPDATE_MAKE_CALL: 275 + old = ftrace_nop_replace(); 276 + break; 277 + 278 + case FTRACE_UPDATE_MODIFY_CALL: 279 + case FTRACE_UPDATE_MAKE_NOP: 280 + old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec)); 281 + break; 282 + } 283 + 284 + ret = ftrace_verify_code(rec->ip, old); 285 + if (ret) { 286 + ftrace_bug(ret, rec); 287 + return; 288 + } 528 289 } 529 - 530 - run_sync(); 531 - 532 - report = "updating code"; 533 - count = 0; 534 290 535 291 for_ftrace_rec_iter(iter) { 536 292 rec = ftrace_rec_iter_record(iter); 537 293 538 - ret = add_update(rec, enable); 539 - if (ret) 540 - goto remove_breakpoints; 541 - count++; 294 + switch (ftrace_test_record(rec, enable)) { 295 + case FTRACE_UPDATE_IGNORE: 296 + default: 297 + continue; 298 + 299 + case FTRACE_UPDATE_MAKE_CALL: 300 + case FTRACE_UPDATE_MODIFY_CALL: 301 + new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec)); 302 + break; 303 + 304 + case FTRACE_UPDATE_MAKE_NOP: 305 + new = ftrace_nop_replace(); 306 + break; 307 + } 308 + 309 + text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL); 310 + ftrace_update_record(rec, enable); 542 311 } 543 - 544 - run_sync(); 545 - 546 - report = "removing breakpoints"; 547 - count = 0; 548 - 549 - for_ftrace_rec_iter(iter) { 550 - rec = ftrace_rec_iter_record(iter); 551 - 552 - ret = finish_update(rec, enable); 553 - if (ret) 554 - goto remove_breakpoints; 555 - count++; 556 - } 557 - 558 - run_sync(); 559 - 560 - return; 561 - 562 - remove_breakpoints: 563 - pr_warn("Failed on %s (%d):\n", report, count); 564 - ftrace_bug(ret, rec); 565 - for_ftrace_rec_iter(iter) { 566 - rec = ftrace_rec_iter_record(iter); 567 - /* 568 - * Breakpoints are handled only when this function is in 569 - * progress. The system could not work with them. 570 - */ 571 - if (remove_breakpoint(rec)) 572 - BUG(); 573 - } 574 - run_sync(); 575 - } 576 - 577 - static int 578 - ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 579 - unsigned const char *new_code) 580 - { 581 - int ret; 582 - 583 - ret = add_break(ip, old_code); 584 - if (ret) 585 - goto out; 586 - 587 - run_sync(); 588 - 589 - ret = add_update_code(ip, new_code); 590 - if (ret) 591 - goto fail_update; 592 - 593 - run_sync(); 594 - 595 - ret = ftrace_write(ip, new_code, 1); 596 - /* 597 - * The breakpoint is handled only when this function is in progress. 598 - * The system could not work if we could not remove it. 599 - */ 600 - BUG_ON(ret); 601 - out: 602 - run_sync(); 603 - return ret; 604 - 605 - fail_update: 606 - /* Also here the system could not work with the breakpoint */ 607 - if (ftrace_write(ip, old_code, 1)) 608 - BUG(); 609 - goto out; 312 + text_poke_finish(); 610 313 } 611 314 612 315 void arch_ftrace_update_code(int command) 613 316 { 614 - /* See comment above by declaration of modifying_ftrace_code */ 615 - atomic_inc(&modifying_ftrace_code); 616 - 617 317 ftrace_modify_all_code(command); 618 - 619 - atomic_dec(&modifying_ftrace_code); 620 318 } 621 319 622 320 int __init ftrace_dyn_arch_init(void) ··· 313 747 unsigned long start_offset; 314 748 unsigned long end_offset; 315 749 unsigned long op_offset; 750 + unsigned long call_offset; 316 751 unsigned long offset; 317 752 unsigned long npages; 318 753 unsigned long size; ··· 330 763 start_offset = (unsigned long)ftrace_regs_caller; 331 764 end_offset = (unsigned long)ftrace_regs_caller_end; 332 765 op_offset = (unsigned long)ftrace_regs_caller_op_ptr; 766 + call_offset = (unsigned long)ftrace_regs_call; 333 767 } else { 334 768 start_offset = (unsigned long)ftrace_caller; 335 769 end_offset = (unsigned long)ftrace_epilogue; 336 770 op_offset = (unsigned long)ftrace_caller_op_ptr; 771 + call_offset = (unsigned long)ftrace_call; 337 772 } 338 773 339 774 size = end_offset - start_offset; ··· 392 823 /* put in the new offset to the ftrace_ops */ 393 824 memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); 394 825 826 + /* put in the call to the function */ 827 + mutex_lock(&text_mutex); 828 + call_offset -= start_offset; 829 + memcpy(trampoline + call_offset, 830 + text_gen_insn(CALL_INSN_OPCODE, 831 + trampoline + call_offset, 832 + ftrace_ops_get_func(ops)), CALL_INSN_SIZE); 833 + mutex_unlock(&text_mutex); 834 + 395 835 /* ALLOC_TRAMP flags lets us know we created it */ 396 836 ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; 397 837 398 838 set_vm_flush_reset_perms(trampoline); 399 839 400 - /* 401 - * Module allocation needs to be completed by making the page 402 - * executable. The page is still writable, which is a security hazard, 403 - * but anyhow ftrace breaks W^X completely. 404 - */ 840 + set_memory_ro((unsigned long)trampoline, npages); 405 841 set_memory_x((unsigned long)trampoline, npages); 406 842 return (unsigned long)trampoline; 407 843 fail: ··· 433 859 void arch_ftrace_update_trampoline(struct ftrace_ops *ops) 434 860 { 435 861 ftrace_func_t func; 436 - unsigned char *new; 437 862 unsigned long offset; 438 863 unsigned long ip; 439 864 unsigned int size; 440 - int ret, npages; 865 + const char *new; 441 866 442 - if (ops->trampoline) { 443 - /* 444 - * The ftrace_ops caller may set up its own trampoline. 445 - * In such a case, this code must not modify it. 446 - */ 447 - if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) 448 - return; 449 - npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT; 450 - set_memory_rw(ops->trampoline, npages); 451 - } else { 867 + if (!ops->trampoline) { 452 868 ops->trampoline = create_trampoline(ops, &size); 453 869 if (!ops->trampoline) 454 870 return; 455 871 ops->trampoline_size = size; 456 - npages = PAGE_ALIGN(size) >> PAGE_SHIFT; 872 + return; 457 873 } 874 + 875 + /* 876 + * The ftrace_ops caller may set up its own trampoline. 877 + * In such a case, this code must not modify it. 878 + */ 879 + if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) 880 + return; 458 881 459 882 offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); 460 883 ip = ops->trampoline + offset; 461 - 462 884 func = ftrace_ops_get_func(ops); 463 885 464 - ftrace_update_func_call = (unsigned long)func; 465 - 886 + mutex_lock(&text_mutex); 466 887 /* Do a safe modify in case the trampoline is executing */ 467 888 new = ftrace_call_replace(ip, (unsigned long)func); 468 - ret = update_ftrace_func(ip, new); 469 - set_memory_ro(ops->trampoline, npages); 470 - 471 - /* The update should never fail */ 472 - WARN_ON(ret); 889 + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); 890 + mutex_unlock(&text_mutex); 473 891 } 474 892 475 893 /* Return the address of the function the trampoline calls */ 476 894 static void *addr_from_call(void *ptr) 477 895 { 478 - union ftrace_code_union calc; 896 + union text_poke_insn call; 479 897 int ret; 480 898 481 - ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE); 899 + ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE); 482 900 if (WARN_ON_ONCE(ret < 0)) 483 901 return NULL; 484 902 485 903 /* Make sure this is a call */ 486 - if (WARN_ON_ONCE(calc.op != 0xe8)) { 487 - pr_warn("Expected e8, got %x\n", calc.op); 904 + if (WARN_ON_ONCE(call.opcode != CALL_INSN_OPCODE)) { 905 + pr_warn("Expected E8, got %x\n", call.opcode); 488 906 return NULL; 489 907 } 490 908 491 - return ptr + MCOUNT_INSN_SIZE + calc.offset; 909 + return ptr + CALL_INSN_SIZE + call.disp; 492 910 } 493 911 494 912 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, ··· 547 981 #ifdef CONFIG_DYNAMIC_FTRACE 548 982 extern void ftrace_graph_call(void); 549 983 550 - static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) 984 + static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) 551 985 { 552 - return ftrace_text_replace(0xe9, ip, addr); 986 + return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr); 553 987 } 554 988 555 989 static int ftrace_mod_jmp(unsigned long ip, void *func) 556 990 { 557 - unsigned char *new; 991 + const char *new; 558 992 559 - ftrace_update_func_call = 0UL; 560 993 new = ftrace_jmp_replace(ip, (unsigned long)func); 561 - 562 - return update_ftrace_func(ip, new); 994 + text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL); 995 + return 0; 563 996 } 564 997 565 998 int ftrace_enable_ftrace_graph_caller(void) ··· 584 1019 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, 585 1020 unsigned long frame_pointer) 586 1021 { 1022 + unsigned long return_hooker = (unsigned long)&return_to_handler; 587 1023 unsigned long old; 588 1024 int faulted; 589 - unsigned long return_hooker = (unsigned long) 590 - &return_to_handler; 591 1025 592 1026 /* 593 1027 * When resuming from suspend-to-ram, this function can be indirectly
+39 -79
arch/x86/kernel/jump_label.c
··· 16 16 #include <asm/alternative.h> 17 17 #include <asm/text-patching.h> 18 18 19 - union jump_code_union { 20 - char code[JUMP_LABEL_NOP_SIZE]; 21 - struct { 22 - char jump; 23 - int offset; 24 - } __attribute__((packed)); 25 - }; 26 - 27 - static void bug_at(unsigned char *ip, int line) 19 + static void bug_at(const void *ip, int line) 28 20 { 29 21 /* 30 22 * The location is not an op that we were expecting. ··· 27 35 BUG(); 28 36 } 29 37 30 - static void __jump_label_set_jump_code(struct jump_entry *entry, 31 - enum jump_label_type type, 32 - union jump_code_union *code, 33 - int init) 38 + static const void * 39 + __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init) 34 40 { 35 41 const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; 36 42 const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; 37 - const void *expect; 43 + const void *expect, *code; 44 + const void *addr, *dest; 38 45 int line; 39 46 40 - code->jump = 0xe9; 41 - code->offset = jump_entry_target(entry) - 42 - (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); 47 + addr = (void *)jump_entry_code(entry); 48 + dest = (void *)jump_entry_target(entry); 49 + 50 + code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); 43 51 44 52 if (init) { 45 53 expect = default_nop; line = __LINE__; 46 54 } else if (type == JUMP_LABEL_JMP) { 47 55 expect = ideal_nop; line = __LINE__; 48 56 } else { 49 - expect = code->code; line = __LINE__; 57 + expect = code; line = __LINE__; 50 58 } 51 59 52 - if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE)) 53 - bug_at((void *)jump_entry_code(entry), line); 60 + if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) 61 + bug_at(addr, line); 54 62 55 63 if (type == JUMP_LABEL_NOP) 56 - memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE); 64 + code = ideal_nop; 65 + 66 + return code; 57 67 } 58 68 59 - static void __ref __jump_label_transform(struct jump_entry *entry, 60 - enum jump_label_type type, 61 - int init) 69 + static void inline __jump_label_transform(struct jump_entry *entry, 70 + enum jump_label_type type, 71 + int init) 62 72 { 63 - union jump_code_union code; 64 - 65 - __jump_label_set_jump_code(entry, type, &code, init); 73 + const void *opcode = __jump_label_set_jump_code(entry, type, init); 66 74 67 75 /* 68 76 * As long as only a single processor is running and the code is still ··· 76 84 * always nop being the 'currently valid' instruction 77 85 */ 78 86 if (init || system_state == SYSTEM_BOOTING) { 79 - text_poke_early((void *)jump_entry_code(entry), &code, 87 + text_poke_early((void *)jump_entry_code(entry), opcode, 80 88 JUMP_LABEL_NOP_SIZE); 81 89 return; 82 90 } 83 91 84 - text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL); 92 + text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL); 93 + } 94 + 95 + static void __ref jump_label_transform(struct jump_entry *entry, 96 + enum jump_label_type type, 97 + int init) 98 + { 99 + mutex_lock(&text_mutex); 100 + __jump_label_transform(entry, type, init); 101 + mutex_unlock(&text_mutex); 85 102 } 86 103 87 104 void arch_jump_label_transform(struct jump_entry *entry, 88 105 enum jump_label_type type) 89 106 { 90 - mutex_lock(&text_mutex); 91 - __jump_label_transform(entry, type, 0); 92 - mutex_unlock(&text_mutex); 107 + jump_label_transform(entry, type, 0); 93 108 } 94 - 95 - #define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) 96 - static struct text_poke_loc tp_vec[TP_VEC_MAX]; 97 - static int tp_vec_nr; 98 109 99 110 bool arch_jump_label_transform_queue(struct jump_entry *entry, 100 111 enum jump_label_type type) 101 112 { 102 - struct text_poke_loc *tp; 103 - void *entry_code; 113 + const void *opcode; 104 114 105 115 if (system_state == SYSTEM_BOOTING) { 106 116 /* ··· 112 118 return true; 113 119 } 114 120 115 - /* 116 - * No more space in the vector, tell upper layer to apply 117 - * the queue before continuing. 118 - */ 119 - if (tp_vec_nr == TP_VEC_MAX) 120 - return false; 121 - 122 - tp = &tp_vec[tp_vec_nr]; 123 - 124 - entry_code = (void *)jump_entry_code(entry); 125 - 126 - /* 127 - * The INT3 handler will do a bsearch in the queue, so we need entries 128 - * to be sorted. We can survive an unsorted list by rejecting the entry, 129 - * forcing the generic jump_label code to apply the queue. Warning once, 130 - * to raise the attention to the case of an unsorted entry that is 131 - * better not happen, because, in the worst case we will perform in the 132 - * same way as we do without batching - with some more overhead. 133 - */ 134 - if (tp_vec_nr > 0) { 135 - int prev = tp_vec_nr - 1; 136 - struct text_poke_loc *prev_tp = &tp_vec[prev]; 137 - 138 - if (WARN_ON_ONCE(prev_tp->addr > entry_code)) 139 - return false; 140 - } 141 - 142 - __jump_label_set_jump_code(entry, type, 143 - (union jump_code_union *)&tp->text, 0); 144 - 145 - text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL); 146 - 147 - tp_vec_nr++; 148 - 121 + mutex_lock(&text_mutex); 122 + opcode = __jump_label_set_jump_code(entry, type, 0); 123 + text_poke_queue((void *)jump_entry_code(entry), 124 + opcode, JUMP_LABEL_NOP_SIZE, NULL); 125 + mutex_unlock(&text_mutex); 149 126 return true; 150 127 } 151 128 152 129 void arch_jump_label_transform_apply(void) 153 130 { 154 - if (!tp_vec_nr) 155 - return; 156 - 157 131 mutex_lock(&text_mutex); 158 - text_poke_bp_batch(tp_vec, tp_vec_nr); 132 + text_poke_finish(); 159 133 mutex_unlock(&text_mutex); 160 - 161 - tp_vec_nr = 0; 162 134 } 163 135 164 136 static enum { ··· 153 193 jlstate = JL_STATE_NO_UPDATE; 154 194 } 155 195 if (jlstate == JL_STATE_UPDATE) 156 - __jump_label_transform(entry, type, 1); 196 + jump_label_transform(entry, type, 1); 157 197 }
+11 -9
arch/x86/kernel/kprobes/core.c
··· 119 119 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 120 120 void synthesize_reljump(void *dest, void *from, void *to) 121 121 { 122 - __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE); 122 + __synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE); 123 123 } 124 124 NOKPROBE_SYMBOL(synthesize_reljump); 125 125 126 126 /* Insert a call instruction at address 'from', which calls address 'to'.*/ 127 127 void synthesize_relcall(void *dest, void *from, void *to) 128 128 { 129 - __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE); 129 + __synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE); 130 130 } 131 131 NOKPROBE_SYMBOL(synthesize_relcall); 132 132 ··· 301 301 * Another debugging subsystem might insert this breakpoint. 302 302 * In that case, we can't recover it. 303 303 */ 304 - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 304 + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) 305 305 return 0; 306 306 addr += insn.length; 307 307 } ··· 356 356 return 0; 357 357 358 358 /* Another subsystem puts a breakpoint, failed to recover */ 359 - if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 359 + if (insn->opcode.bytes[0] == INT3_INSN_OPCODE) 360 360 return 0; 361 361 362 362 /* We should not singlestep on the exception masking instructions */ ··· 400 400 int len = insn->length; 401 401 402 402 if (can_boost(insn, p->addr) && 403 - MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) { 403 + MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) { 404 404 /* 405 405 * These instructions can be executed directly if it 406 406 * jumps back to correct address. 407 407 */ 408 408 synthesize_reljump(buf + len, p->ainsn.insn + len, 409 409 p->addr + insn->length); 410 - len += RELATIVEJUMP_SIZE; 410 + len += JMP32_INSN_SIZE; 411 411 p->ainsn.boostable = true; 412 412 } else { 413 413 p->ainsn.boostable = false; ··· 501 501 502 502 void arch_arm_kprobe(struct kprobe *p) 503 503 { 504 - text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); 504 + text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1); 505 + text_poke_sync(); 505 506 } 506 507 507 508 void arch_disarm_kprobe(struct kprobe *p) 508 509 { 509 510 text_poke(p->addr, &p->opcode, 1); 511 + text_poke_sync(); 510 512 } 511 513 512 514 void arch_remove_kprobe(struct kprobe *p) ··· 611 609 regs->flags |= X86_EFLAGS_TF; 612 610 regs->flags &= ~X86_EFLAGS_IF; 613 611 /* single step inline if the instruction is an int3 */ 614 - if (p->opcode == BREAKPOINT_INSTRUCTION) 612 + if (p->opcode == INT3_INSN_OPCODE) 615 613 regs->ip = (unsigned long)p->addr; 616 614 else 617 615 regs->ip = (unsigned long)p->ainsn.insn; ··· 697 695 reset_current_kprobe(); 698 696 return 1; 699 697 } 700 - } else if (*addr != BREAKPOINT_INSTRUCTION) { 698 + } else if (*addr != INT3_INSN_OPCODE) { 701 699 /* 702 700 * The breakpoint instruction was removed right 703 701 * after we hit it. Another cpu has removed
+33 -34
arch/x86/kernel/kprobes/opt.c
··· 38 38 long offs; 39 39 int i; 40 40 41 - for (i = 0; i < RELATIVEJUMP_SIZE; i++) { 41 + for (i = 0; i < JMP32_INSN_SIZE; i++) { 42 42 kp = get_kprobe((void *)addr - i); 43 43 /* This function only handles jump-optimized kprobe */ 44 44 if (kp && kprobe_optimized(kp)) { ··· 62 62 63 63 if (addr == (unsigned long)kp->addr) { 64 64 buf[0] = kp->opcode; 65 - memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 65 + memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE); 66 66 } else { 67 67 offs = addr - (unsigned long)kp->addr - 1; 68 - memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); 68 + memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs); 69 69 } 70 70 71 71 return (unsigned long)buf; ··· 141 141 #define TMPL_END_IDX \ 142 142 ((long)optprobe_template_end - (long)optprobe_template_entry) 143 143 144 - #define INT3_SIZE sizeof(kprobe_opcode_t) 145 - 146 144 /* Optimized kprobe call back function: called from optinsn */ 147 145 static void 148 146 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) ··· 160 162 regs->cs |= get_kernel_rpl(); 161 163 regs->gs = 0; 162 164 #endif 163 - regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 165 + regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE; 164 166 regs->orig_ax = ~0UL; 165 167 166 168 __this_cpu_write(current_kprobe, &op->kp); ··· 177 179 struct insn insn; 178 180 int len = 0, ret; 179 181 180 - while (len < RELATIVEJUMP_SIZE) { 182 + while (len < JMP32_INSN_SIZE) { 181 183 ret = __copy_instruction(dest + len, src + len, real + len, &insn); 182 184 if (!ret || !can_boost(&insn, src + len)) 183 185 return -EINVAL; ··· 269 271 return 0; 270 272 271 273 /* Check there is enough space for a relative jump. */ 272 - if (size - offset < RELATIVEJUMP_SIZE) 274 + if (size - offset < JMP32_INSN_SIZE) 273 275 return 0; 274 276 275 277 /* Decode instructions */ ··· 288 290 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 289 291 insn_get_length(&insn); 290 292 /* Another subsystem puts a breakpoint */ 291 - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 293 + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) 292 294 return 0; 293 295 /* Recover address */ 294 296 insn.kaddr = (void *)addr; 295 297 insn.next_byte = (void *)(addr + insn.length); 296 298 /* Check any instructions don't jump into target */ 297 299 if (insn_is_indirect_jump(&insn) || 298 - insn_jump_into_range(&insn, paddr + INT3_SIZE, 299 - RELATIVE_ADDR_SIZE)) 300 + insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE, 301 + DISP32_SIZE)) 300 302 return 0; 301 303 addr += insn.length; 302 304 } ··· 372 374 * Verify if the address gap is in 2GB range, because this uses 373 375 * a relative jump. 374 376 */ 375 - rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; 377 + rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE; 376 378 if (abs(rel) > 0x7fffffff) { 377 379 ret = -ERANGE; 378 380 goto err; ··· 399 401 /* Set returning jmp instruction at the tail of out-of-line buffer */ 400 402 synthesize_reljump(buf + len, slot + len, 401 403 (u8 *)op->kp.addr + op->optinsn.size); 402 - len += RELATIVEJUMP_SIZE; 404 + len += JMP32_INSN_SIZE; 403 405 404 406 /* We have to use text_poke() for instruction buffer because it is RO */ 405 407 text_poke(slot, buf, len); ··· 414 416 } 415 417 416 418 /* 417 - * Replace breakpoints (int3) with relative jumps. 419 + * Replace breakpoints (INT3) with relative jumps (JMP.d32). 418 420 * Caller must call with locking kprobe_mutex and text_mutex. 421 + * 422 + * The caller will have installed a regular kprobe and after that issued 423 + * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in 424 + * the 4 bytes after the INT3 are unused and can now be overwritten. 419 425 */ 420 426 void arch_optimize_kprobes(struct list_head *oplist) 421 427 { 422 428 struct optimized_kprobe *op, *tmp; 423 - u8 insn_buff[RELATIVEJUMP_SIZE]; 429 + u8 insn_buff[JMP32_INSN_SIZE]; 424 430 425 431 list_for_each_entry_safe(op, tmp, oplist, list) { 426 432 s32 rel = (s32)((long)op->optinsn.insn - 427 - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 433 + ((long)op->kp.addr + JMP32_INSN_SIZE)); 428 434 429 435 WARN_ON(kprobe_disabled(&op->kp)); 430 436 431 437 /* Backup instructions which will be replaced by jump address */ 432 - memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 433 - RELATIVE_ADDR_SIZE); 438 + memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE, 439 + DISP32_SIZE); 434 440 435 - insn_buff[0] = RELATIVEJUMP_OPCODE; 441 + insn_buff[0] = JMP32_INSN_OPCODE; 436 442 *(s32 *)(&insn_buff[1]) = rel; 437 443 438 - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL); 444 + text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL); 439 445 440 446 list_del_init(&op->list); 441 447 } 442 448 } 443 449 444 - /* Replace a relative jump with a breakpoint (int3). */ 450 + /* 451 + * Replace a relative jump (JMP.d32) with a breakpoint (INT3). 452 + * 453 + * After that, we can restore the 4 bytes after the INT3 to undo what 454 + * arch_optimize_kprobes() scribbled. This is safe since those bytes will be 455 + * unused once the INT3 lands. 456 + */ 445 457 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 446 458 { 447 - u8 insn_buff[RELATIVEJUMP_SIZE]; 448 - u8 emulate_buff[RELATIVEJUMP_SIZE]; 449 - 450 - /* Set int3 to first byte for kprobes */ 451 - insn_buff[0] = BREAKPOINT_INSTRUCTION; 452 - memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 453 - 454 - emulate_buff[0] = RELATIVEJUMP_OPCODE; 455 - *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn - 456 - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 457 - 458 - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, 459 - emulate_buff); 459 + arch_arm_kprobe(&op->kp); 460 + text_poke(op->kp.addr + INT3_INSN_SIZE, 461 + op->optinsn.copied_insn, DISP32_SIZE); 462 + text_poke_sync(); 460 463 } 461 464 462 465 /*
-9
arch/x86/kernel/traps.c
··· 572 572 573 573 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) 574 574 { 575 - #ifdef CONFIG_DYNAMIC_FTRACE 576 - /* 577 - * ftrace must be first, everything else may cause a recursive crash. 578 - * See note by declaration of modifying_ftrace_code in ftrace.c 579 - */ 580 - if (unlikely(atomic_read(&modifying_ftrace_code)) && 581 - ftrace_int3_handler(regs)) 582 - return; 583 - #endif 584 575 if (poke_int3_handler(regs)) 585 576 return; 586 577
-28
arch/x86/mm/init_32.c
··· 874 874 875 875 int kernel_set_to_readonly __read_mostly; 876 876 877 - void set_kernel_text_rw(void) 878 - { 879 - unsigned long start = PFN_ALIGN(_text); 880 - unsigned long size = PFN_ALIGN(_etext) - start; 881 - 882 - if (!kernel_set_to_readonly) 883 - return; 884 - 885 - pr_debug("Set kernel text: %lx - %lx for read write\n", 886 - start, start+size); 887 - 888 - set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); 889 - } 890 - 891 - void set_kernel_text_ro(void) 892 - { 893 - unsigned long start = PFN_ALIGN(_text); 894 - unsigned long size = PFN_ALIGN(_etext) - start; 895 - 896 - if (!kernel_set_to_readonly) 897 - return; 898 - 899 - pr_debug("Set kernel text: %lx - %lx for read only\n", 900 - start, start+size); 901 - 902 - set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 903 - } 904 - 905 877 static void mark_nxdata_nx(void) 906 878 { 907 879 /*
-36
arch/x86/mm/init_64.c
··· 1260 1260 1261 1261 int kernel_set_to_readonly; 1262 1262 1263 - void set_kernel_text_rw(void) 1264 - { 1265 - unsigned long start = PFN_ALIGN(_text); 1266 - unsigned long end = PFN_ALIGN(_etext); 1267 - 1268 - if (!kernel_set_to_readonly) 1269 - return; 1270 - 1271 - pr_debug("Set kernel text: %lx - %lx for read write\n", 1272 - start, end); 1273 - 1274 - /* 1275 - * Make the kernel identity mapping for text RW. Kernel text 1276 - * mapping will always be RO. Refer to the comment in 1277 - * static_protections() in pageattr.c 1278 - */ 1279 - set_memory_rw(start, (end - start) >> PAGE_SHIFT); 1280 - } 1281 - 1282 - void set_kernel_text_ro(void) 1283 - { 1284 - unsigned long start = PFN_ALIGN(_text); 1285 - unsigned long end = PFN_ALIGN(_etext); 1286 - 1287 - if (!kernel_set_to_readonly) 1288 - return; 1289 - 1290 - pr_debug("Set kernel text: %lx - %lx for read only\n", 1291 - start, end); 1292 - 1293 - /* 1294 - * Set the kernel identity mapping for text RO. 1295 - */ 1296 - set_memory_ro(start, (end - start) >> PAGE_SHIFT); 1297 - } 1298 - 1299 1263 void mark_rodata_ro(void) 1300 1264 { 1301 1265 unsigned long start = PFN_ALIGN(_text);
+4 -4
drivers/infiniband/hw/hfi1/trace_tid.h
··· 138 138 TP_ARGS(dd, index, type, pa, order), 139 139 TP_STRUCT__entry(/* entry */ 140 140 DD_DEV_ENTRY(dd) 141 - __field(unsigned long, pa); 142 - __field(u32, index); 143 - __field(u32, type); 144 - __field(u16, order); 141 + __field(unsigned long, pa) 142 + __field(u32, index) 143 + __field(u32, type) 144 + __field(u16, order) 145 145 ), 146 146 TP_fast_assign(/* assign */ 147 147 DD_DEV_ASSIGN(dd);
+1 -1
drivers/infiniband/hw/hfi1/trace_tx.h
··· 588 588 TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i), 589 589 TP_ARGS(dd, ctxt, subctxt, i), 590 590 TP_STRUCT__entry( 591 - DD_DEV_ENTRY(dd); 591 + DD_DEV_ENTRY(dd) 592 592 __field(u16, ctxt) 593 593 __field(u8, subctxt) 594 594 __field(u8, ver_opcode)
+4 -4
drivers/lightnvm/pblk-trace.h
··· 46 46 TP_STRUCT__entry( 47 47 __string(name, name) 48 48 __field(u64, ppa) 49 - __field(int, state); 49 + __field(int, state) 50 50 ), 51 51 52 52 TP_fast_assign( ··· 72 72 TP_STRUCT__entry( 73 73 __string(name, name) 74 74 __field(u64, ppa) 75 - __field(int, state); 75 + __field(int, state) 76 76 ), 77 77 78 78 TP_fast_assign( ··· 98 98 TP_STRUCT__entry( 99 99 __string(name, name) 100 100 __field(int, line) 101 - __field(int, state); 101 + __field(int, state) 102 102 ), 103 103 104 104 TP_fast_assign( ··· 121 121 122 122 TP_STRUCT__entry( 123 123 __string(name, name) 124 - __field(int, state); 124 + __field(int, state) 125 125 ), 126 126 127 127 TP_fast_assign(
+1 -1
drivers/net/fjes/fjes_trace.h
··· 28 28 __field(u8, cs_busy) 29 29 __field(u8, cs_complete) 30 30 __field(int, timeout) 31 - __field(int, ret); 31 + __field(int, ret) 32 32 ), 33 33 TP_fast_assign( 34 34 __entry->cr_req = cr->bits.req_code;
+3 -3
drivers/net/wireless/ath/ath10k/trace.h
··· 239 239 TP_STRUCT__entry( 240 240 __string(device, dev_name(ar->dev)) 241 241 __string(driver, dev_driver_string(ar->dev)) 242 - __field(u8, hw_type); 242 + __field(u8, hw_type) 243 243 __field(size_t, buf_len) 244 244 __dynamic_array(u8, buf, buf_len) 245 245 ), ··· 269 269 TP_STRUCT__entry( 270 270 __string(device, dev_name(ar->dev)) 271 271 __string(driver, dev_driver_string(ar->dev)) 272 - __field(u8, hw_type); 272 + __field(u8, hw_type) 273 273 __field(u16, buf_len) 274 274 __dynamic_array(u8, pktlog, buf_len) 275 275 ), ··· 435 435 TP_STRUCT__entry( 436 436 __string(device, dev_name(ar->dev)) 437 437 __string(driver, dev_driver_string(ar->dev)) 438 - __field(u8, hw_type); 438 + __field(u8, hw_type) 439 439 __field(u16, len) 440 440 __dynamic_array(u8, rxdesc, len) 441 441 ),
+3 -3
fs/xfs/scrub/trace.h
··· 329 329 __field(int, level) 330 330 __field(xfs_agnumber_t, agno) 331 331 __field(xfs_agblock_t, bno) 332 - __field(int, ptr); 332 + __field(int, ptr) 333 333 __field(int, error) 334 334 __field(void *, ret_ip) 335 335 ), ··· 414 414 __field(int, level) 415 415 __field(xfs_agnumber_t, agno) 416 416 __field(xfs_agblock_t, bno) 417 - __field(int, ptr); 417 + __field(int, ptr) 418 418 __field(void *, ret_ip) 419 419 ), 420 420 TP_fast_assign( ··· 452 452 __field(int, level) 453 453 __field(xfs_agnumber_t, agno) 454 454 __field(xfs_agblock_t, bno) 455 - __field(int, ptr); 455 + __field(int, ptr) 456 456 __field(void *, ret_ip) 457 457 ), 458 458 TP_fast_assign(
+2 -2
fs/xfs/xfs_trace.h
··· 218 218 TP_STRUCT__entry( 219 219 __field(dev_t, dev) 220 220 __field(xfs_ino_t, ino) 221 - __field(void *, leaf); 222 - __field(int, pos); 221 + __field(void *, leaf) 222 + __field(int, pos) 223 223 __field(xfs_fileoff_t, startoff) 224 224 __field(xfs_fsblock_t, startblock) 225 225 __field(xfs_filblks_t, blockcount)
-4
include/linux/module.h
··· 849 849 #define __MODULE_STRING(x) __stringify(x) 850 850 851 851 #ifdef CONFIG_STRICT_MODULE_RWX 852 - extern void set_all_modules_text_rw(void); 853 - extern void set_all_modules_text_ro(void); 854 852 extern void module_enable_ro(const struct module *mod, bool after_init); 855 853 extern void module_disable_ro(const struct module *mod); 856 854 #else 857 - static inline void set_all_modules_text_rw(void) { } 858 - static inline void set_all_modules_text_ro(void) { } 859 855 static inline void module_enable_ro(const struct module *mod, bool after_init) { } 860 856 static inline void module_disable_ro(const struct module *mod) { } 861 857 #endif
+17 -1
include/linux/trace_events.h
··· 192 192 193 193 struct trace_event_call; 194 194 195 + #define TRACE_FUNCTION_TYPE ((const char *)~0UL) 196 + 197 + struct trace_event_fields { 198 + const char *type; 199 + union { 200 + struct { 201 + const char *name; 202 + const int size; 203 + const int align; 204 + const int is_signed; 205 + const int filter_type; 206 + }; 207 + int (*define_fields)(struct trace_event_call *); 208 + }; 209 + }; 210 + 195 211 struct trace_event_class { 196 212 const char *system; 197 213 void *probe; ··· 216 200 #endif 217 201 int (*reg)(struct trace_event_call *event, 218 202 enum trace_reg type, void *data); 219 - int (*define_fields)(struct trace_event_call *); 203 + struct trace_event_fields *fields_array; 220 204 struct list_head *(*get_fields)(struct trace_event_call *); 221 205 struct list_head fields; 222 206 int (*raw_init)(struct trace_event_call *);
+1 -1
include/trace/events/filemap.h
··· 85 85 TP_ARGS(file, old), 86 86 87 87 TP_STRUCT__entry( 88 - __field(struct file *, file); 88 + __field(struct file *, file) 89 89 __field(unsigned long, i_ino) 90 90 __field(dev_t, s_dev) 91 91 __field(errseq_t, old)
+21 -43
include/trace/trace_events.h
··· 400 400 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 401 401 402 402 #undef __field_ext 403 - #define __field_ext(type, item, filter_type) \ 404 - ret = trace_define_field(event_call, #type, #item, \ 405 - offsetof(typeof(field), item), \ 406 - sizeof(field.item), \ 407 - is_signed_type(type), filter_type); \ 408 - if (ret) \ 409 - return ret; 403 + #define __field_ext(_type, _item, _filter_type) { \ 404 + .type = #_type, .name = #_item, \ 405 + .size = sizeof(_type), .align = __alignof__(_type), \ 406 + .is_signed = is_signed_type(_type), .filter_type = _filter_type }, 410 407 411 408 #undef __field_struct_ext 412 - #define __field_struct_ext(type, item, filter_type) \ 413 - ret = trace_define_field(event_call, #type, #item, \ 414 - offsetof(typeof(field), item), \ 415 - sizeof(field.item), \ 416 - 0, filter_type); \ 417 - if (ret) \ 418 - return ret; 409 + #define __field_struct_ext(_type, _item, _filter_type) { \ 410 + .type = #_type, .name = #_item, \ 411 + .size = sizeof(_type), .align = __alignof__(_type), \ 412 + 0, .filter_type = _filter_type }, 419 413 420 414 #undef __field 421 415 #define __field(type, item) __field_ext(type, item, FILTER_OTHER) ··· 418 424 #define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER) 419 425 420 426 #undef __array 421 - #define __array(type, item, len) \ 422 - do { \ 423 - char *type_str = #type"["__stringify(len)"]"; \ 424 - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 425 - BUILD_BUG_ON(len <= 0); \ 426 - ret = trace_define_field(event_call, type_str, #item, \ 427 - offsetof(typeof(field), item), \ 428 - sizeof(field.item), \ 429 - is_signed_type(type), FILTER_OTHER); \ 430 - if (ret) \ 431 - return ret; \ 432 - } while (0); 427 + #define __array(_type, _item, _len) { \ 428 + .type = #_type"["__stringify(_len)"]", .name = #_item, \ 429 + .size = sizeof(_type[_len]), .align = __alignof__(_type), \ 430 + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, 433 431 434 432 #undef __dynamic_array 435 - #define __dynamic_array(type, item, len) \ 436 - ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ 437 - offsetof(typeof(field), __data_loc_##item), \ 438 - sizeof(field.__data_loc_##item), \ 439 - is_signed_type(type), FILTER_OTHER); 433 + #define __dynamic_array(_type, _item, _len) { \ 434 + .type = "__data_loc " #_type "[]", .name = #_item, \ 435 + .size = 4, .align = 4, \ 436 + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, 440 437 441 438 #undef __string 442 439 #define __string(item, src) __dynamic_array(char, item, -1) ··· 437 452 438 453 #undef DECLARE_EVENT_CLASS 439 454 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ 440 - static int notrace __init \ 441 - trace_event_define_fields_##call(struct trace_event_call *event_call) \ 442 - { \ 443 - struct trace_event_raw_##call field; \ 444 - int ret; \ 445 - \ 446 - tstruct; \ 447 - \ 448 - return ret; \ 449 - } 455 + static struct trace_event_fields trace_event_fields_##call[] = { \ 456 + tstruct \ 457 + {} }; 450 458 451 459 #undef DEFINE_EVENT 452 460 #define DEFINE_EVENT(template, name, proto, args) ··· 597 619 * 598 620 * static struct trace_event_class __used event_class_<template> = { 599 621 * .system = "<system>", 600 - * .define_fields = trace_event_define_fields_<call>, 622 + * .fields_array = trace_event_fields_<call>, 601 623 * .fields = LIST_HEAD_INIT(event_class_##call.fields), 602 624 * .raw_init = trace_event_raw_init, 603 625 * .probe = trace_event_raw_event_##call, ··· 746 768 static char print_fmt_##call[] = print; \ 747 769 static struct trace_event_class __used __refdata event_class_##call = { \ 748 770 .system = TRACE_SYSTEM_STRING, \ 749 - .define_fields = trace_event_define_fields_##call, \ 771 + .fields_array = trace_event_fields_##call, \ 750 772 .fields = LIST_HEAD_INIT(event_class_##call.fields),\ 751 773 .raw_init = trace_event_raw_init, \ 752 774 .probe = trace_event_raw_event_##call, \
+3 -1
kernel/kprobes.c
··· 510 510 arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); 511 511 /* Loop free_list for disarming */ 512 512 list_for_each_entry_safe(op, tmp, &freeing_list, list) { 513 + /* Switching from detour code to origin */ 514 + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 513 515 /* Disarm probes if marked disabled */ 514 516 if (kprobe_disabled(&op->kp)) 515 517 arch_disarm_kprobe(&op->kp); ··· 651 649 { 652 650 lockdep_assert_cpus_held(); 653 651 arch_unoptimize_kprobe(op); 652 + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 654 653 if (kprobe_disabled(&op->kp)) 655 654 arch_disarm_kprobe(&op->kp); 656 655 } ··· 679 676 return; 680 677 } 681 678 682 - op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 683 679 if (!list_empty(&op->list)) { 684 680 /* Dequeue from the optimization queue */ 685 681 list_del_init(&op->list);
-43
kernel/module.c
··· 2031 2031 frob_writable_data(&mod->init_layout, set_memory_nx); 2032 2032 } 2033 2033 2034 - /* Iterate through all modules and set each module's text as RW */ 2035 - void set_all_modules_text_rw(void) 2036 - { 2037 - struct module *mod; 2038 - 2039 - if (!rodata_enabled) 2040 - return; 2041 - 2042 - mutex_lock(&module_mutex); 2043 - list_for_each_entry_rcu(mod, &modules, list) { 2044 - if (mod->state == MODULE_STATE_UNFORMED) 2045 - continue; 2046 - 2047 - frob_text(&mod->core_layout, set_memory_rw); 2048 - frob_text(&mod->init_layout, set_memory_rw); 2049 - } 2050 - mutex_unlock(&module_mutex); 2051 - } 2052 - 2053 - /* Iterate through all modules and set each module's text as RO */ 2054 - void set_all_modules_text_ro(void) 2055 - { 2056 - struct module *mod; 2057 - 2058 - if (!rodata_enabled) 2059 - return; 2060 - 2061 - mutex_lock(&module_mutex); 2062 - list_for_each_entry_rcu(mod, &modules, list) { 2063 - /* 2064 - * Ignore going modules since it's possible that ro 2065 - * protection has already been disabled, otherwise we'll 2066 - * run into protection faults at module deallocation. 2067 - */ 2068 - if (mod->state == MODULE_STATE_UNFORMED || 2069 - mod->state == MODULE_STATE_GOING) 2070 - continue; 2071 - 2072 - frob_text(&mod->core_layout, set_memory_ro); 2073 - frob_text(&mod->init_layout, set_memory_ro); 2074 - } 2075 - mutex_unlock(&module_mutex); 2076 - } 2077 2034 #else /* !CONFIG_STRICT_MODULE_RWX */ 2078 2035 static void module_enable_nx(const struct module *mod) { } 2079 2036 #endif /* CONFIG_STRICT_MODULE_RWX */
+14 -17
kernel/trace/trace.h
··· 52 52 #undef __field 53 53 #define __field(type, item) type item; 54 54 55 + #undef __field_fn 56 + #define __field_fn(type, item) type item; 57 + 55 58 #undef __field_struct 56 59 #define __field_struct(type, item) __field(type, item) 57 60 ··· 74 71 #define F_STRUCT(args...) args 75 72 76 73 #undef FTRACE_ENTRY 77 - #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ 74 + #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 78 75 struct struct_name { \ 79 76 struct trace_entry ent; \ 80 77 tstruct \ 81 78 } 82 79 83 80 #undef FTRACE_ENTRY_DUP 84 - #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter) 81 + #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) 85 82 86 83 #undef FTRACE_ENTRY_REG 87 - #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ 88 - filter, regfn) \ 89 - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ 90 - filter) 84 + #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \ 85 + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) 91 86 92 87 #undef FTRACE_ENTRY_PACKED 93 - #define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print, \ 94 - filter) \ 95 - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ 96 - filter) __packed 88 + #define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print) \ 89 + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) __packed 97 90 98 91 #include "trace_entries.h" 99 92 ··· 1916 1917 #define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str)) 1917 1918 1918 1919 #undef FTRACE_ENTRY 1919 - #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ 1920 + #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ 1920 1921 extern struct trace_event_call \ 1921 1922 __aligned(4) event_##call; 1922 1923 #undef FTRACE_ENTRY_DUP 1923 - #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ 1924 - FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ 1925 - filter) 1924 + #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ 1925 + FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 1926 1926 #undef FTRACE_ENTRY_PACKED 1927 - #define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print, filter) \ 1928 - FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ 1929 - filter) 1927 + #define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print) \ 1928 + FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 1930 1929 1931 1930 #include "trace_entries.h" 1932 1931
+18 -48
kernel/trace/trace_entries.h
··· 61 61 TRACE_FN, 62 62 63 63 F_STRUCT( 64 - __field( unsigned long, ip ) 65 - __field( unsigned long, parent_ip ) 64 + __field_fn( unsigned long, ip ) 65 + __field_fn( unsigned long, parent_ip ) 66 66 ), 67 67 68 68 F_printk(" %ps <-- %ps", 69 69 (void *)__entry->ip, (void *)__entry->parent_ip), 70 - 71 - FILTER_TRACE_FN, 72 70 73 71 perf_ftrace_event_register 74 72 ); ··· 82 84 __field_desc( int, graph_ent, depth ) 83 85 ), 84 86 85 - F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth), 86 - 87 - FILTER_OTHER 87 + F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth) 88 88 ); 89 89 90 90 /* Function return entry */ ··· 93 97 F_STRUCT( 94 98 __field_struct( struct ftrace_graph_ret, ret ) 95 99 __field_desc( unsigned long, ret, func ) 100 + __field_desc( unsigned long, ret, overrun ) 96 101 __field_desc( unsigned long long, ret, calltime) 97 102 __field_desc( unsigned long long, ret, rettime ) 98 - __field_desc( unsigned long, ret, overrun ) 99 103 __field_desc( int, ret, depth ) 100 104 ), 101 105 102 106 F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d", 103 107 (void *)__entry->func, __entry->depth, 104 108 __entry->calltime, __entry->rettime, 105 - __entry->depth), 106 - 107 - FILTER_OTHER 109 + __entry->depth) 108 110 ); 109 111 110 112 /* ··· 131 137 F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", 132 138 __entry->prev_pid, __entry->prev_prio, __entry->prev_state, 133 139 __entry->next_pid, __entry->next_prio, __entry->next_state, 134 - __entry->next_cpu), 135 - 136 - FILTER_OTHER 140 + __entry->next_cpu) 137 141 ); 138 142 139 143 /* ··· 149 157 F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", 150 158 __entry->prev_pid, __entry->prev_prio, __entry->prev_state, 151 159 __entry->next_pid, __entry->next_prio, __entry->next_state, 152 - __entry->next_cpu), 153 - 154 - FILTER_OTHER 160 + __entry->next_cpu) 155 161 ); 156 162 157 163 /* ··· 173 183 (void *)__entry->caller[0], (void *)__entry->caller[1], 174 184 (void *)__entry->caller[2], (void *)__entry->caller[3], 175 185 (void *)__entry->caller[4], (void *)__entry->caller[5], 176 - (void *)__entry->caller[6], (void *)__entry->caller[7]), 177 - 178 - FILTER_OTHER 186 + (void *)__entry->caller[6], (void *)__entry->caller[7]) 179 187 ); 180 188 181 189 FTRACE_ENTRY(user_stack, userstack_entry, ··· 191 203 (void *)__entry->caller[0], (void *)__entry->caller[1], 192 204 (void *)__entry->caller[2], (void *)__entry->caller[3], 193 205 (void *)__entry->caller[4], (void *)__entry->caller[5], 194 - (void *)__entry->caller[6], (void *)__entry->caller[7]), 195 - 196 - FILTER_OTHER 206 + (void *)__entry->caller[6], (void *)__entry->caller[7]) 197 207 ); 198 208 199 209 /* ··· 208 222 ), 209 223 210 224 F_printk("%ps: %s", 211 - (void *)__entry->ip, __entry->fmt), 212 - 213 - FILTER_OTHER 225 + (void *)__entry->ip, __entry->fmt) 214 226 ); 215 227 216 228 FTRACE_ENTRY_REG(print, print_entry, ··· 223 239 F_printk("%ps: %s", 224 240 (void *)__entry->ip, __entry->buf), 225 241 226 - FILTER_OTHER, 227 - 228 242 ftrace_event_register 229 243 ); 230 244 ··· 236 254 ), 237 255 238 256 F_printk("id:%04x %08x", 239 - __entry->id, (int)__entry->buf[0]), 240 - 241 - FILTER_OTHER 257 + __entry->id, (int)__entry->buf[0]) 242 258 ); 243 259 244 260 FTRACE_ENTRY(bputs, bputs_entry, ··· 249 269 ), 250 270 251 271 F_printk("%ps: %s", 252 - (void *)__entry->ip, __entry->str), 253 - 254 - FILTER_OTHER 272 + (void *)__entry->ip, __entry->str) 255 273 ); 256 274 257 275 FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, ··· 261 283 __field_desc( resource_size_t, rw, phys ) 262 284 __field_desc( unsigned long, rw, value ) 263 285 __field_desc( unsigned long, rw, pc ) 264 - __field_desc( int, rw, map_id ) 286 + __field_desc( int, rw, map_id ) 265 287 __field_desc( unsigned char, rw, opcode ) 266 288 __field_desc( unsigned char, rw, width ) 267 289 ), 268 290 269 291 F_printk("%lx %lx %lx %d %x %x", 270 292 (unsigned long)__entry->phys, __entry->value, __entry->pc, 271 - __entry->map_id, __entry->opcode, __entry->width), 272 - 273 - FILTER_OTHER 293 + __entry->map_id, __entry->opcode, __entry->width) 274 294 ); 275 295 276 296 FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, ··· 280 304 __field_desc( resource_size_t, map, phys ) 281 305 __field_desc( unsigned long, map, virt ) 282 306 __field_desc( unsigned long, map, len ) 283 - __field_desc( int, map, map_id ) 307 + __field_desc( int, map, map_id ) 284 308 __field_desc( unsigned char, map, opcode ) 285 309 ), 286 310 287 311 F_printk("%lx %lx %lx %d %x", 288 312 (unsigned long)__entry->phys, __entry->virt, __entry->len, 289 - __entry->map_id, __entry->opcode), 290 - 291 - FILTER_OTHER 313 + __entry->map_id, __entry->opcode) 292 314 ); 293 315 294 316 ··· 308 334 F_printk("%u:%s:%s (%u)%s", 309 335 __entry->line, 310 336 __entry->func, __entry->file, __entry->correct, 311 - __entry->constant ? " CONSTANT" : ""), 312 - 313 - FILTER_OTHER 337 + __entry->constant ? " CONSTANT" : "") 314 338 ); 315 339 316 340 ··· 334 362 __entry->duration, 335 363 __entry->outer_duration, 336 364 __entry->nmi_total_ts, 337 - __entry->nmi_count), 338 - 339 - FILTER_OTHER 365 + __entry->nmi_count) 340 366 );
+19 -1
kernel/trace/trace_events.c
··· 24 24 #include <linux/delay.h> 25 25 26 26 #include <trace/events/sched.h> 27 + #include <trace/syscall.h> 27 28 28 29 #include <asm/setup.h> 29 30 ··· 2018 2017 */ 2019 2018 head = trace_get_fields(call); 2020 2019 if (list_empty(head)) { 2021 - ret = call->class->define_fields(call); 2020 + struct trace_event_fields *field = call->class->fields_array; 2021 + unsigned int offset = sizeof(struct trace_entry); 2022 + 2023 + for (; field->type; field++) { 2024 + if (field->type == TRACE_FUNCTION_TYPE) { 2025 + ret = field->define_fields(call); 2026 + break; 2027 + } 2028 + 2029 + offset = ALIGN(offset, field->align); 2030 + ret = trace_define_field(call, field->type, field->name, 2031 + offset, field->size, 2032 + field->is_signed, field->filter_type); 2033 + if (ret) 2034 + break; 2035 + 2036 + offset += field->size; 2037 + } 2022 2038 if (ret < 0) { 2023 2039 pr_warn("Could not initialize trace point events/%s\n", 2024 2040 name);
+7 -1
kernel/trace/trace_events_hist.c
··· 1154 1154 return NULL; 1155 1155 } 1156 1156 1157 + static struct trace_event_fields synth_event_fields_array[] = { 1158 + { .type = TRACE_FUNCTION_TYPE, 1159 + .define_fields = synth_event_define_fields }, 1160 + {} 1161 + }; 1162 + 1157 1163 static int register_synth_event(struct synth_event *event) 1158 1164 { 1159 1165 struct trace_event_call *call = &event->call; ··· 1181 1175 1182 1176 INIT_LIST_HEAD(&call->class->fields); 1183 1177 call->event.funcs = &synth_event_funcs; 1184 - call->class->define_fields = synth_event_define_fields; 1178 + call->class->fields_array = synth_event_fields_array; 1185 1179 1186 1180 ret = register_trace_event(&call->event); 1187 1181 if (!ret) {
+39 -67
kernel/trace/trace_export.c
··· 29 29 * function and thus become accesible via perf. 30 30 */ 31 31 #undef FTRACE_ENTRY_REG 32 - #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ 33 - filter, regfn) \ 34 - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ 35 - filter) 32 + #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \ 33 + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) 36 34 37 35 /* not needed for this file */ 38 36 #undef __field_struct ··· 38 40 39 41 #undef __field 40 42 #define __field(type, item) type item; 43 + 44 + #undef __field_fn 45 + #define __field_fn(type, item) type item; 41 46 42 47 #undef __field_desc 43 48 #define __field_desc(type, container, item) type item; ··· 61 60 #define F_printk(fmt, args...) fmt, args 62 61 63 62 #undef FTRACE_ENTRY 64 - #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ 63 + #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 65 64 struct ____ftrace_##name { \ 66 65 tstruct \ 67 66 }; \ ··· 74 73 } 75 74 76 75 #undef FTRACE_ENTRY_DUP 77 - #define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \ 78 - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ 79 - filter) 76 + #define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ 77 + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) 80 78 81 79 #include "trace_entries.h" 82 80 81 + #undef __field_ext 82 + #define __field_ext(_type, _item, _filter_type) { \ 83 + .type = #_type, .name = #_item, \ 84 + .size = sizeof(_type), .align = __alignof__(_type), \ 85 + is_signed_type(_type), .filter_type = _filter_type }, 86 + 83 87 #undef __field 84 - #define __field(type, item) \ 85 - ret = trace_define_field(event_call, #type, #item, \ 86 - offsetof(typeof(field), item), \ 87 - sizeof(field.item), \ 88 - is_signed_type(type), filter_type); \ 89 - if (ret) \ 90 - return ret; 88 + #define __field(_type, _item) __field_ext(_type, _item, FILTER_OTHER) 89 + 90 + #undef __field_fn 91 + #define __field_fn(_type, _item) __field_ext(_type, _item, FILTER_TRACE_FN) 91 92 92 93 #undef __field_desc 93 - #define __field_desc(type, container, item) \ 94 - ret = trace_define_field(event_call, #type, #item, \ 95 - offsetof(typeof(field), \ 96 - container.item), \ 97 - sizeof(field.container.item), \ 98 - is_signed_type(type), filter_type); \ 99 - if (ret) \ 100 - return ret; 94 + #define __field_desc(_type, _container, _item) __field_ext(_type, _item, FILTER_OTHER) 101 95 102 96 #undef __array 103 - #define __array(type, item, len) \ 104 - do { \ 105 - char *type_str = #type"["__stringify(len)"]"; \ 106 - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 107 - ret = trace_define_field(event_call, type_str, #item, \ 108 - offsetof(typeof(field), item), \ 109 - sizeof(field.item), \ 110 - is_signed_type(type), filter_type); \ 111 - if (ret) \ 112 - return ret; \ 113 - } while (0); 97 + #define __array(_type, _item, _len) { \ 98 + .type = #_type"["__stringify(_len)"]", .name = #_item, \ 99 + .size = sizeof(_type[_len]), .align = __alignof__(_type), \ 100 + is_signed_type(_type), .filter_type = FILTER_OTHER }, 114 101 115 102 #undef __array_desc 116 - #define __array_desc(type, container, item, len) \ 117 - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 118 - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 119 - offsetof(typeof(field), \ 120 - container.item), \ 121 - sizeof(field.container.item), \ 122 - is_signed_type(type), filter_type); \ 123 - if (ret) \ 124 - return ret; 103 + #define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len) 125 104 126 105 #undef __dynamic_array 127 - #define __dynamic_array(type, item) \ 128 - ret = trace_define_field(event_call, #type "[]", #item, \ 129 - offsetof(typeof(field), item), \ 130 - 0, is_signed_type(type), filter_type);\ 131 - if (ret) \ 132 - return ret; 106 + #define __dynamic_array(_type, _item) { \ 107 + .type = #_type "[]", .name = #_item, \ 108 + .size = 0, .align = __alignof__(_type), \ 109 + is_signed_type(_type), .filter_type = FILTER_OTHER }, 133 110 134 111 #undef FTRACE_ENTRY 135 - #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ 136 - static int __init \ 137 - ftrace_define_fields_##name(struct trace_event_call *event_call) \ 138 - { \ 139 - struct struct_name field; \ 140 - int ret; \ 141 - int filter_type = filter; \ 142 - \ 143 - tstruct; \ 144 - \ 145 - return ret; \ 146 - } 112 + #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 113 + static struct trace_event_fields ftrace_event_fields_##name[] = { \ 114 + tstruct \ 115 + {} }; 147 116 148 117 #include "trace_entries.h" 149 118 ··· 122 151 123 152 #undef __field 124 153 #define __field(type, item) 154 + 155 + #undef __field_fn 156 + #define __field_fn(type, item) 125 157 126 158 #undef __field_desc 127 159 #define __field_desc(type, container, item) ··· 142 168 #define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args) 143 169 144 170 #undef FTRACE_ENTRY_REG 145 - #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ 146 - regfn) \ 147 - \ 171 + #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, regfn) \ 148 172 static struct trace_event_class __refdata event_class_ftrace_##call = { \ 149 173 .system = __stringify(TRACE_SYSTEM), \ 150 - .define_fields = ftrace_define_fields_##call, \ 174 + .fields_array = ftrace_event_fields_##call, \ 151 175 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ 152 176 .reg = regfn, \ 153 177 }; \ ··· 163 191 __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; 164 192 165 193 #undef FTRACE_ENTRY 166 - #define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \ 194 + #define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \ 167 195 FTRACE_ENTRY_REG(call, struct_name, etype, \ 168 - PARAMS(tstruct), PARAMS(print), filter, NULL) 196 + PARAMS(tstruct), PARAMS(print), NULL) 169 197 170 198 bool ftrace_event_is_function(struct trace_event_call *call) 171 199 {
+14 -2
kernel/trace/trace_kprobe.c
··· 1555 1555 .trace = print_kprobe_event 1556 1556 }; 1557 1557 1558 + static struct trace_event_fields kretprobe_fields_array[] = { 1559 + { .type = TRACE_FUNCTION_TYPE, 1560 + .define_fields = kretprobe_event_define_fields }, 1561 + {} 1562 + }; 1563 + 1564 + static struct trace_event_fields kprobe_fields_array[] = { 1565 + { .type = TRACE_FUNCTION_TYPE, 1566 + .define_fields = kprobe_event_define_fields }, 1567 + {} 1568 + }; 1569 + 1558 1570 static inline void init_trace_event_call(struct trace_kprobe *tk) 1559 1571 { 1560 1572 struct trace_event_call *call = trace_probe_event_call(&tk->tp); 1561 1573 1562 1574 if (trace_kprobe_is_return(tk)) { 1563 1575 call->event.funcs = &kretprobe_funcs; 1564 - call->class->define_fields = kretprobe_event_define_fields; 1576 + call->class->fields_array = kretprobe_fields_array; 1565 1577 } else { 1566 1578 call->event.funcs = &kprobe_funcs; 1567 - call->class->define_fields = kprobe_event_define_fields; 1579 + call->class->fields_array = kprobe_fields_array; 1568 1580 } 1569 1581 1570 1582 call->flags = TRACE_EVENT_FL_KPROBE;
+20 -30
kernel/trace/trace_syscalls.c
··· 203 203 204 204 extern char *__bad_type_size(void); 205 205 206 - #define SYSCALL_FIELD(type, field, name) \ 207 - sizeof(type) != sizeof(trace.field) ? \ 208 - __bad_type_size() : \ 209 - #type, #name, offsetof(typeof(trace), field), \ 210 - sizeof(trace.field), is_signed_type(type) 206 + #define SYSCALL_FIELD(_type, _name) { \ 207 + .type = #_type, .name = #_name, \ 208 + .size = sizeof(_type), .align = __alignof__(_type), \ 209 + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER } 211 210 212 211 static int __init 213 212 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) ··· 273 274 { 274 275 struct syscall_trace_enter trace; 275 276 struct syscall_metadata *meta = call->data; 276 - int ret; 277 - int i; 278 277 int offset = offsetof(typeof(trace), args); 279 - 280 - ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), 281 - FILTER_OTHER); 282 - if (ret) 283 - return ret; 278 + int ret, i; 284 279 285 280 for (i = 0; i < meta->nb_args; i++) { 286 281 ret = trace_define_field(call, meta->types[i], 287 282 meta->args[i], offset, 288 283 sizeof(unsigned long), 0, 289 284 FILTER_OTHER); 285 + if (ret) 286 + break; 290 287 offset += sizeof(unsigned long); 291 288 } 292 - 293 - return ret; 294 - } 295 - 296 - static int __init syscall_exit_define_fields(struct trace_event_call *call) 297 - { 298 - struct syscall_trace_exit trace; 299 - int ret; 300 - 301 - ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), 302 - FILTER_OTHER); 303 - if (ret) 304 - return ret; 305 - 306 - ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret), 307 - FILTER_OTHER); 308 289 309 290 return ret; 310 291 } ··· 486 507 return id; 487 508 } 488 509 510 + static struct trace_event_fields __refdata syscall_enter_fields_array[] = { 511 + SYSCALL_FIELD(int, __syscall_nr), 512 + { .type = TRACE_FUNCTION_TYPE, 513 + .define_fields = syscall_enter_define_fields }, 514 + {} 515 + }; 516 + 489 517 struct trace_event_functions enter_syscall_print_funcs = { 490 518 .trace = print_syscall_enter, 491 519 }; ··· 504 518 struct trace_event_class __refdata event_class_syscall_enter = { 505 519 .system = "syscalls", 506 520 .reg = syscall_enter_register, 507 - .define_fields = syscall_enter_define_fields, 521 + .fields_array = syscall_enter_fields_array, 508 522 .get_fields = syscall_get_enter_fields, 509 523 .raw_init = init_syscall_trace, 510 524 }; ··· 512 526 struct trace_event_class __refdata event_class_syscall_exit = { 513 527 .system = "syscalls", 514 528 .reg = syscall_exit_register, 515 - .define_fields = syscall_exit_define_fields, 529 + .fields_array = (struct trace_event_fields[]){ 530 + SYSCALL_FIELD(int, __syscall_nr), 531 + SYSCALL_FIELD(long, ret), 532 + {} 533 + }, 516 534 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), 517 535 .raw_init = init_syscall_trace, 518 536 };
+7 -2
kernel/trace/trace_uprobe.c
··· 1507 1507 .trace = print_uprobe_event 1508 1508 }; 1509 1509 1510 + static struct trace_event_fields uprobe_fields_array[] = { 1511 + { .type = TRACE_FUNCTION_TYPE, 1512 + .define_fields = uprobe_event_define_fields }, 1513 + {} 1514 + }; 1515 + 1510 1516 static inline void init_trace_event_call(struct trace_uprobe *tu) 1511 1517 { 1512 1518 struct trace_event_call *call = trace_probe_event_call(&tu->tp); 1513 - 1514 1519 call->event.funcs = &uprobe_funcs; 1515 - call->class->define_fields = uprobe_event_define_fields; 1520 + call->class->fields_array = uprobe_fields_array; 1516 1521 1517 1522 call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY; 1518 1523 call->class->reg = trace_uprobe_register;
+14 -14
net/mac80211/trace.h
··· 408 408 __field(u32, basic_rates) 409 409 __array(int, mcast_rate, NUM_NL80211_BANDS) 410 410 __field(u16, ht_operation_mode) 411 - __field(s32, cqm_rssi_thold); 412 - __field(s32, cqm_rssi_hyst); 413 - __field(u32, channel_width); 414 - __field(u32, channel_cfreq1); 411 + __field(s32, cqm_rssi_thold) 412 + __field(s32, cqm_rssi_hyst) 413 + __field(u32, channel_width) 414 + __field(u32, channel_cfreq1) 415 415 __dynamic_array(u32, arp_addr_list, 416 416 info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? 417 417 IEEE80211_BSS_ARP_ADDR_LIST_LEN : 418 - info->arp_addr_cnt); 419 - __field(int, arp_addr_cnt); 420 - __field(bool, qos); 421 - __field(bool, idle); 422 - __field(bool, ps); 423 - __dynamic_array(u8, ssid, info->ssid_len); 424 - __field(bool, hidden_ssid); 418 + info->arp_addr_cnt) 419 + __field(int, arp_addr_cnt) 420 + __field(bool, qos) 421 + __field(bool, idle) 422 + __field(bool, ps) 423 + __dynamic_array(u8, ssid, info->ssid_len) 424 + __field(bool, hidden_ssid) 425 425 __field(int, txpower) 426 426 __field(u8, p2p_oppps_ctwindow) 427 427 ), ··· 1672 1672 VIF_ENTRY 1673 1673 __field(u8, dtimper) 1674 1674 __field(u16, bcnint) 1675 - __dynamic_array(u8, ssid, info->ssid_len); 1676 - __field(bool, hidden_ssid); 1675 + __dynamic_array(u8, ssid, info->ssid_len) 1676 + __field(bool, hidden_ssid) 1677 1677 ), 1678 1678 1679 1679 TP_fast_assign( ··· 1739 1739 VIF_ENTRY 1740 1740 __field(u8, dtimper) 1741 1741 __field(u16, bcnint) 1742 - __dynamic_array(u8, ssid, info->ssid_len); 1742 + __dynamic_array(u8, ssid, info->ssid_len) 1743 1743 ), 1744 1744 1745 1745 TP_fast_assign(
+3 -3
net/wireless/trace.h
··· 2009 2009 WIPHY_ENTRY 2010 2010 WDEV_ENTRY 2011 2011 __field(u8, master_pref) 2012 - __field(u8, bands); 2012 + __field(u8, bands) 2013 2013 ), 2014 2014 TP_fast_assign( 2015 2015 WIPHY_ASSIGN; ··· 2031 2031 WIPHY_ENTRY 2032 2032 WDEV_ENTRY 2033 2033 __field(u8, master_pref) 2034 - __field(u8, bands); 2035 - __field(u32, changes); 2034 + __field(u8, bands) 2035 + __field(u32, changes) 2036 2036 ), 2037 2037 TP_fast_assign( 2038 2038 WIPHY_ASSIGN;