···5353obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o5454obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o5555obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o5656-obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o5757-obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o5656+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o5757+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o5858obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o5959obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o6060# Main staffs in KPROBES are in arch/arm/probes/ .
+2-8
arch/arm/kernel/ftrace.c
···2222#include <asm/ftrace.h>2323#include <asm/insn.h>2424#include <asm/set_memory.h>2525+#include <asm/patch.h>25262627#ifdef CONFIG_THUMB2_KERNEL2728#define NOP 0xf85deb04 /* pop.w {lr} */···3635{3736 int *command = data;38373939- set_kernel_text_rw();4038 ftrace_modify_all_code(*command);4141- set_kernel_text_ro();42394340 return 0;4441}···58595960int ftrace_arch_code_modify_prepare(void)6061{6161- set_all_modules_text_rw();6262 return 0;6363}64646565int ftrace_arch_code_modify_post_process(void)6666{6767- set_all_modules_text_ro();6867 /* Make sure any TLB misses during machine stop are cleared. */6968 flush_tlb_all();7069 return 0;···9497 return -EINVAL;9598 }96999797- if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE))9898- return -EPERM;9999-100100- flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);100100+ __patch_text((void *)pc, new);101101102102 return 0;103103}
-12
arch/nds32/kernel/ftrace.c
···8989 return 0;9090}91919292-int ftrace_arch_code_modify_prepare(void)9393-{9494- set_all_modules_text_rw();9595- return 0;9696-}9797-9898-int ftrace_arch_code_modify_post_process(void)9999-{100100- set_all_modules_text_ro();101101- return 0;102102-}103103-10492static unsigned long gen_sethi_insn(unsigned long addr)10593{10694 unsigned long opcode = 0x46000000;
-2
arch/x86/include/asm/ftrace.h
···4747 /* No extra data needed for x86 */4848};49495050-int ftrace_int3_handler(struct pt_regs *regs);5151-5250#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR53515452#endif /* CONFIG_DYNAMIC_FTRACE */
···8181int set_direct_map_default_noflush(struct page *page);82828383extern int kernel_set_to_readonly;8484-void set_kernel_text_rw(void);8585-void set_kernel_text_ro(void);86848785#ifdef CONFIG_X86_648886static inline int set_mce_nospec(unsigned long pfn)
+67-19
arch/x86/include/asm/text-patching.h
···2525 */2626#define POKE_MAX_OPCODE_SIZE 527272828-struct text_poke_loc {2929- void *addr;3030- int len;3131- s32 rel32;3232- u8 opcode;3333- const u8 text[POKE_MAX_OPCODE_SIZE];3434-};3535-3628extern void text_poke_early(void *addr, const void *opcode, size_t len);37293830/*···4250 * an inconsistent instruction while you patch.4351 */4452extern void *text_poke(void *addr, const void *opcode, size_t len);5353+extern void text_poke_sync(void);4554extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);4655extern int poke_int3_handler(struct pt_regs *regs);4756extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);4848-extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);4949-extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,5050- const void *opcode, size_t len, const void *emulate);5151-extern int after_bootmem;5252-extern __ro_after_init struct mm_struct *poking_mm;5353-extern __ro_after_init unsigned long poking_addr;54575555-#ifndef CONFIG_UML_X865656-static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)5757-{5858- regs->ip = ip;5959-}5858+extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate);5959+extern void text_poke_finish(void);60606161#define INT3_INSN_SIZE 16262#define INT3_INSN_OPCODE 0xCC···6278#define JMP8_INSN_SIZE 26379#define JMP8_INSN_OPCODE 0xEB64808181+#define DISP32_SIZE 48282+8383+static inline int text_opcode_size(u8 opcode)8484+{8585+ int size = 0;8686+8787+#define __CASE(insn) \8888+ case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break8989+9090+ switch(opcode) {9191+ __CASE(INT3);9292+ __CASE(CALL);9393+ __CASE(JMP32);9494+ __CASE(JMP8);9595+ }9696+9797+#undef __CASE9898+9999+ return size;100100+}101101+102102+union text_poke_insn {103103+ u8 text[POKE_MAX_OPCODE_SIZE];104104+ struct {105105+ u8 opcode;106106+ s32 disp;107107+ } __attribute__((packed));108108+};109109+110110+static __always_inline111111+void *text_gen_insn(u8 opcode, const void *addr, const void *dest)112112+{113113+ static union text_poke_insn insn; /* per instance */114114+ int size = text_opcode_size(opcode);115115+116116+ insn.opcode = opcode;117117+118118+ if (size > 1) {119119+ insn.disp = (long)dest - (long)(addr + size);120120+ if (size == 2) {121121+ /*122122+ * Ensure that for JMP9 the displacement123123+ * actually fits the signed byte.124124+ */125125+ BUG_ON((insn.disp >> 31) != (insn.disp >> 7));126126+ }127127+ }128128+129129+ return &insn.text;130130+}131131+132132+extern int after_bootmem;133133+extern __ro_after_init struct mm_struct *poking_mm;134134+extern __ro_after_init unsigned long poking_addr;135135+136136+#ifndef CONFIG_UML_X86137137+static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)138138+{139139+ regs->ip = ip;140140+}141141+65142static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)66143{67144 /*···13085 * stack where the break point happened, and the saving of13186 * pt_regs. We can extend the original stack because of13287 * this gap. See the idtentry macro's create_gap option.8888+ *8989+ * Similarly entry_32.S will have a gap on the stack for (any) hardware9090+ * exception and pt_regs; see FIXUP_FRAME.13391 */13492 regs->sp -= sizeof(unsigned long);13593 *(unsigned long *)regs->sp = val;
+105-25
arch/x86/kernel/alternative.c
···936936 sync_core();937937}938938939939+void text_poke_sync(void)940940+{941941+ on_each_cpu(do_sync_core, NULL, 1);942942+}943943+944944+struct text_poke_loc {945945+ s32 rel_addr; /* addr := _stext + rel_addr */946946+ s32 rel32;947947+ u8 opcode;948948+ const u8 text[POKE_MAX_OPCODE_SIZE];949949+};950950+939951static struct bp_patching_desc {940952 struct text_poke_loc *vec;941953 int nr_entries;942954} bp_patching;943955944944-static int patch_cmp(const void *key, const void *elt)956956+static inline void *text_poke_addr(struct text_poke_loc *tp)957957+{958958+ return _stext + tp->rel_addr;959959+}960960+961961+static int notrace patch_cmp(const void *key, const void *elt)945962{946963 struct text_poke_loc *tp = (struct text_poke_loc *) elt;947964948948- if (key < tp->addr)965965+ if (key < text_poke_addr(tp))949966 return -1;950950- if (key > tp->addr)967967+ if (key > text_poke_addr(tp))951968 return 1;952969 return 0;953970}954971NOKPROBE_SYMBOL(patch_cmp);955972956956-int poke_int3_handler(struct pt_regs *regs)973973+int notrace poke_int3_handler(struct pt_regs *regs)957974{958975 struct text_poke_loc *tp;959976 void *ip;977977+ int len;960978961979 /*962980 * Having observed our INT3 instruction, we now must observe···1010992 return 0;1011993 } else {1012994 tp = bp_patching.vec;10131013- if (tp->addr != ip)995995+ if (text_poke_addr(tp) != ip)1014996 return 0;1015997 }101699810171017- ip += tp->len;999999+ len = text_opcode_size(tp->opcode);10001000+ ip += len;1018100110191002 switch (tp->opcode) {10201003 case INT3_INSN_OPCODE:···10421023}10431024NOKPROBE_SYMBOL(poke_int3_handler);1044102510261026+#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))10271027+static struct text_poke_loc tp_vec[TP_VEC_MAX];10281028+static int tp_vec_nr;10291029+10451030/**10461031 * text_poke_bp_batch() -- update instructions on live kernel on SMP10471032 * @tp: vector of instructions to patch···10671044 * replacing opcode10681045 * - sync cores10691046 */10701070-void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)10471047+static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)10711048{10721049 unsigned char int3 = INT3_INSN_OPCODE;10731050 unsigned int i;···10881065 * First step: add a int3 trap to the address that will be patched.10891066 */10901067 for (i = 0; i < nr_entries; i++)10911091- text_poke(tp[i].addr, &int3, sizeof(int3));10681068+ text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);1092106910931093- on_each_cpu(do_sync_core, NULL, 1);10701070+ text_poke_sync();1094107110951072 /*10961073 * Second step: update all but the first byte of the patched range.10971074 */10981075 for (do_sync = 0, i = 0; i < nr_entries; i++) {10991099- if (tp[i].len - sizeof(int3) > 0) {11001100- text_poke((char *)tp[i].addr + sizeof(int3),11011101- (const char *)tp[i].text + sizeof(int3),11021102- tp[i].len - sizeof(int3));10761076+ int len = text_opcode_size(tp[i].opcode);10771077+10781078+ if (len - INT3_INSN_SIZE > 0) {10791079+ text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,10801080+ (const char *)tp[i].text + INT3_INSN_SIZE,10811081+ len - INT3_INSN_SIZE);11031082 do_sync++;11041083 }11051084 }···11121087 * not necessary and we'd be safe even without it. But11131088 * better safe than sorry (plus there's not only Intel).11141089 */11151115- on_each_cpu(do_sync_core, NULL, 1);10901090+ text_poke_sync();11161091 }1117109211181093 /*···11231098 if (tp[i].text[0] == INT3_INSN_OPCODE)11241099 continue;1125110011261126- text_poke(tp[i].addr, tp[i].text, sizeof(int3));11011101+ text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE);11271102 do_sync++;11281103 }1129110411301105 if (do_sync)11311131- on_each_cpu(do_sync_core, NULL, 1);11061106+ text_poke_sync();1132110711331108 /*11341109 * sync_core() implies an smp_mb() and orders this store against11351110 * the writing of the new instruction.11361111 */11371137- bp_patching.vec = NULL;11381112 bp_patching.nr_entries = 0;11131113+ /*11141114+ * This sync_core () call ensures that all INT3 handlers in progress11151115+ * have finished. This allows poke_int3_handler() after this to11161116+ * avoid touching bp_paching.vec by checking nr_entries == 0.11171117+ */11181118+ text_poke_sync();11191119+ bp_patching.vec = NULL;11391120}1140112111411122void text_poke_loc_init(struct text_poke_loc *tp, void *addr,···11491118{11501119 struct insn insn;1151112011521152- if (!opcode)11531153- opcode = (void *)tp->text;11541154- else11551155- memcpy((void *)tp->text, opcode, len);11561156-11211121+ memcpy((void *)tp->text, opcode, len);11571122 if (!emulate)11581123 emulate = opcode;11591124···11591132 BUG_ON(!insn_complete(&insn));11601133 BUG_ON(len != insn.length);1161113411621162- tp->addr = addr;11631163- tp->len = len;11351135+ tp->rel_addr = addr - (void *)_stext;11641136 tp->opcode = insn.opcode.bytes[0];1165113711661138 switch (tp->opcode) {···11931167 }11941168}1195116911701170+/*11711171+ * We hard rely on the tp_vec being ordered; ensure this is so by flushing11721172+ * early if needed.11731173+ */11741174+static bool tp_order_fail(void *addr)11751175+{11761176+ struct text_poke_loc *tp;11771177+11781178+ if (!tp_vec_nr)11791179+ return false;11801180+11811181+ if (!addr) /* force */11821182+ return true;11831183+11841184+ tp = &tp_vec[tp_vec_nr - 1];11851185+ if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr)11861186+ return true;11871187+11881188+ return false;11891189+}11901190+11911191+static void text_poke_flush(void *addr)11921192+{11931193+ if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {11941194+ text_poke_bp_batch(tp_vec, tp_vec_nr);11951195+ tp_vec_nr = 0;11961196+ }11971197+}11981198+11991199+void text_poke_finish(void)12001200+{12011201+ text_poke_flush(NULL);12021202+}12031203+12041204+void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate)12051205+{12061206+ struct text_poke_loc *tp;12071207+12081208+ if (unlikely(system_state == SYSTEM_BOOTING)) {12091209+ text_poke_early(addr, opcode, len);12101210+ return;12111211+ }12121212+12131213+ text_poke_flush(addr);12141214+12151215+ tp = &tp_vec[tp_vec_nr++];12161216+ text_poke_loc_init(tp, addr, opcode, len, emulate);12171217+}12181218+11961219/**11971220 * text_poke_bp() -- update instructions on live kernel on SMP11981221 * @addr: address to patch···12531178 * dynamically allocated memory. This function should be used when it is12541179 * not possible to allocate memory.12551180 */12561256-void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)11811181+void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)12571182{12581183 struct text_poke_loc tp;11841184+11851185+ if (unlikely(system_state == SYSTEM_BOOTING)) {11861186+ text_poke_early(addr, opcode, len);11871187+ return;11881188+ }1259118912601190 text_poke_loc_init(&tp, addr, opcode, len, emulate);12611191 text_poke_bp_batch(&tp, 1);
+135-571
arch/x86/kernel/ftrace.c
···34343535#ifdef CONFIG_DYNAMIC_FTRACE36363737+static int ftrace_poke_late = 0;3838+3739int ftrace_arch_code_modify_prepare(void)3840 __acquires(&text_mutex)3941{···4543 * ftrace has it set to "read/write".4644 */4745 mutex_lock(&text_mutex);4848- set_kernel_text_rw();4949- set_all_modules_text_rw();4646+ ftrace_poke_late = 1;5047 return 0;5148}52495350int ftrace_arch_code_modify_post_process(void)5451 __releases(&text_mutex)5552{5656- set_all_modules_text_ro();5757- set_kernel_text_ro();5353+ /*5454+ * ftrace_make_{call,nop}() may be called during5555+ * module load, and we need to finish the text_poke_queue()5656+ * that they do, here.5757+ */5858+ text_poke_finish();5959+ ftrace_poke_late = 0;5860 mutex_unlock(&text_mutex);5961 return 0;6062}61636262-union ftrace_code_union {6363- char code[MCOUNT_INSN_SIZE];6464- struct {6565- unsigned char op;6666- int offset;6767- } __attribute__((packed));6868-};6969-7070-static int ftrace_calc_offset(long ip, long addr)7171-{7272- return (int)(addr - ip);7373-}7474-7575-static unsigned char *7676-ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr)7777-{7878- static union ftrace_code_union calc;7979-8080- calc.op = op;8181- calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);8282-8383- return calc.code;8484-}8585-8686-static unsigned char *8787-ftrace_call_replace(unsigned long ip, unsigned long addr)8888-{8989- return ftrace_text_replace(0xe8, ip, addr);9090-}9191-9292-static inline int9393-within(unsigned long addr, unsigned long start, unsigned long end)9494-{9595- return addr >= start && addr < end;9696-}9797-9898-static unsigned long text_ip_addr(unsigned long ip)9999-{100100- /*101101- * On x86_64, kernel text mappings are mapped read-only, so we use102102- * the kernel identity mapping instead of the kernel text mapping103103- * to modify the kernel text.104104- *105105- * For 32bit kernels, these mappings are same and we can use106106- * kernel identity mapping to modify code.107107- */108108- if (within(ip, (unsigned long)_text, (unsigned long)_etext))109109- ip = (unsigned long)__va(__pa_symbol(ip));110110-111111- return ip;112112-}113113-114114-static const unsigned char *ftrace_nop_replace(void)6464+static const char *ftrace_nop_replace(void)11565{11666 return ideal_nops[NOP_ATOMIC5];11767}11868119119-static int120120-ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,121121- unsigned const char *new_code)6969+static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)12270{123123- unsigned char replaced[MCOUNT_INSN_SIZE];7171+ return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);7272+}12473125125- ftrace_expected = old_code;7474+static int ftrace_verify_code(unsigned long ip, const char *old_code)7575+{7676+ char cur_code[MCOUNT_INSN_SIZE];1267712778 /*12879 * Note:···84129 * Carefully read and modify the code with probe_kernel_*(), and make85130 * sure what we read is what we expected it to be before modifying it.86131 */8787-88132 /* read the text we want to modify */8989- if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))133133+ if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) {134134+ WARN_ON(1);90135 return -EFAULT;136136+ }9113792138 /* Make sure it is what we expect it to be */9393- if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)139139+ if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) {140140+ WARN_ON(1);94141 return -EINVAL;9595-9696- ip = text_ip_addr(ip);9797-9898- /* replace the text with the new text */9999- if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))100100- return -EPERM;101101-102102- sync_core();142142+ }103143104144 return 0;105145}106146107107-int ftrace_make_nop(struct module *mod,108108- struct dyn_ftrace *rec, unsigned long addr)147147+/*148148+ * Marked __ref because it calls text_poke_early() which is .init.text. That is149149+ * ok because that call will happen early, during boot, when .init sections are150150+ * still present.151151+ */152152+static int __ref153153+ftrace_modify_code_direct(unsigned long ip, const char *old_code,154154+ const char *new_code)109155{110110- unsigned const char *new, *old;156156+ int ret = ftrace_verify_code(ip, old_code);157157+ if (ret)158158+ return ret;159159+160160+ /* replace the text with the new text */161161+ if (ftrace_poke_late)162162+ text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);163163+ else164164+ text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);165165+ return 0;166166+}167167+168168+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)169169+{111170 unsigned long ip = rec->ip;171171+ const char *new, *old;112172113173 old = ftrace_call_replace(ip, addr);114174 new = ftrace_nop_replace();···137167 * just modify the code directly.138168 */139169 if (addr == MCOUNT_ADDR)140140- return ftrace_modify_code_direct(rec->ip, old, new);170170+ return ftrace_modify_code_direct(ip, old, new);141171142142- ftrace_expected = NULL;143143-144144- /* Normal cases use add_brk_on_nop */172172+ /*173173+ * x86 overrides ftrace_replace_code -- this function will never be used174174+ * in this case.175175+ */145176 WARN_ONCE(1, "invalid use of ftrace_make_nop");146177 return -EINVAL;147178}148179149180int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)150181{151151- unsigned const char *new, *old;152182 unsigned long ip = rec->ip;183183+ const char *new, *old;153184154185 old = ftrace_nop_replace();155186 new = ftrace_call_replace(ip, addr);···158187 /* Should only be called when module is loaded */159188 return ftrace_modify_code_direct(rec->ip, old, new);160189}161161-162162-/*163163- * The modifying_ftrace_code is used to tell the breakpoint164164- * handler to call ftrace_int3_handler(). If it fails to165165- * call this handler for a breakpoint added by ftrace, then166166- * the kernel may crash.167167- *168168- * As atomic_writes on x86 do not need a barrier, we do not169169- * need to add smp_mb()s for this to work. It is also considered170170- * that we can not read the modifying_ftrace_code before171171- * executing the breakpoint. That would be quite remarkable if172172- * it could do that. Here's the flow that is required:173173- *174174- * CPU-0 CPU-1175175- *176176- * atomic_inc(mfc);177177- * write int3s178178- * <trap-int3> // implicit (r)mb179179- * if (atomic_read(mfc))180180- * call ftrace_int3_handler()181181- *182182- * Then when we are finished:183183- *184184- * atomic_dec(mfc);185185- *186186- * If we hit a breakpoint that was not set by ftrace, it does not187187- * matter if ftrace_int3_handler() is called or not. It will188188- * simply be ignored. But it is crucial that a ftrace nop/caller189189- * breakpoint is handled. No other user should ever place a190190- * breakpoint on an ftrace nop/caller location. It must only191191- * be done by this code.192192- */193193-atomic_t modifying_ftrace_code __read_mostly;194194-195195-static int196196-ftrace_modify_code(unsigned long ip, unsigned const char *old_code,197197- unsigned const char *new_code);198190199191/*200192 * Should never be called:···171237 unsigned long addr)172238{173239 WARN_ON(1);174174- ftrace_expected = NULL;175240 return -EINVAL;176176-}177177-178178-static unsigned long ftrace_update_func;179179-static unsigned long ftrace_update_func_call;180180-181181-static int update_ftrace_func(unsigned long ip, void *new)182182-{183183- unsigned char old[MCOUNT_INSN_SIZE];184184- int ret;185185-186186- memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);187187-188188- ftrace_update_func = ip;189189- /* Make sure the breakpoints see the ftrace_update_func update */190190- smp_wmb();191191-192192- /* See comment above by declaration of modifying_ftrace_code */193193- atomic_inc(&modifying_ftrace_code);194194-195195- ret = ftrace_modify_code(ip, old, new);196196-197197- atomic_dec(&modifying_ftrace_code);198198-199199- return ret;200241}201242202243int ftrace_update_ftrace_func(ftrace_func_t func)203244{204204- unsigned long ip = (unsigned long)(&ftrace_call);205205- unsigned char *new;206206- int ret;207207-208208- ftrace_update_func_call = (unsigned long)func;209209-210210- new = ftrace_call_replace(ip, (unsigned long)func);211211- ret = update_ftrace_func(ip, new);212212-213213- /* Also update the regs callback function */214214- if (!ret) {215215- ip = (unsigned long)(&ftrace_regs_call);216216- new = ftrace_call_replace(ip, (unsigned long)func);217217- ret = update_ftrace_func(ip, new);218218- }219219-220220- return ret;221221-}222222-223223-static nokprobe_inline int is_ftrace_caller(unsigned long ip)224224-{225225- if (ip == ftrace_update_func)226226- return 1;227227-228228- return 0;229229-}230230-231231-/*232232- * A breakpoint was added to the code address we are about to233233- * modify, and this is the handle that will just skip over it.234234- * We are either changing a nop into a trace call, or a trace235235- * call to a nop. While the change is taking place, we treat236236- * it just like it was a nop.237237- */238238-int ftrace_int3_handler(struct pt_regs *regs)239239-{240245 unsigned long ip;246246+ const char *new;241247242242- if (WARN_ON_ONCE(!regs))243243- return 0;248248+ ip = (unsigned long)(&ftrace_call);249249+ new = ftrace_call_replace(ip, (unsigned long)func);250250+ text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);244251245245- ip = regs->ip - INT3_INSN_SIZE;246246-247247- if (ftrace_location(ip)) {248248- int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);249249- return 1;250250- } else if (is_ftrace_caller(ip)) {251251- if (!ftrace_update_func_call) {252252- int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);253253- return 1;254254- }255255- int3_emulate_call(regs, ftrace_update_func_call);256256- return 1;257257- }252252+ ip = (unsigned long)(&ftrace_regs_call);253253+ new = ftrace_call_replace(ip, (unsigned long)func);254254+ text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);258255259256 return 0;260260-}261261-NOKPROBE_SYMBOL(ftrace_int3_handler);262262-263263-static int ftrace_write(unsigned long ip, const char *val, int size)264264-{265265- ip = text_ip_addr(ip);266266-267267- if (probe_kernel_write((void *)ip, val, size))268268- return -EPERM;269269-270270- return 0;271271-}272272-273273-static int add_break(unsigned long ip, const char *old)274274-{275275- unsigned char replaced[MCOUNT_INSN_SIZE];276276- unsigned char brk = BREAKPOINT_INSTRUCTION;277277-278278- if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))279279- return -EFAULT;280280-281281- ftrace_expected = old;282282-283283- /* Make sure it is what we expect it to be */284284- if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)285285- return -EINVAL;286286-287287- return ftrace_write(ip, &brk, 1);288288-}289289-290290-static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)291291-{292292- unsigned const char *old;293293- unsigned long ip = rec->ip;294294-295295- old = ftrace_call_replace(ip, addr);296296-297297- return add_break(rec->ip, old);298298-}299299-300300-301301-static int add_brk_on_nop(struct dyn_ftrace *rec)302302-{303303- unsigned const char *old;304304-305305- old = ftrace_nop_replace();306306-307307- return add_break(rec->ip, old);308308-}309309-310310-static int add_breakpoints(struct dyn_ftrace *rec, bool enable)311311-{312312- unsigned long ftrace_addr;313313- int ret;314314-315315- ftrace_addr = ftrace_get_addr_curr(rec);316316-317317- ret = ftrace_test_record(rec, enable);318318-319319- switch (ret) {320320- case FTRACE_UPDATE_IGNORE:321321- return 0;322322-323323- case FTRACE_UPDATE_MAKE_CALL:324324- /* converting nop to call */325325- return add_brk_on_nop(rec);326326-327327- case FTRACE_UPDATE_MODIFY_CALL:328328- case FTRACE_UPDATE_MAKE_NOP:329329- /* converting a call to a nop */330330- return add_brk_on_call(rec, ftrace_addr);331331- }332332- return 0;333333-}334334-335335-/*336336- * On error, we need to remove breakpoints. This needs to337337- * be done caefully. If the address does not currently have a338338- * breakpoint, we know we are done. Otherwise, we look at the339339- * remaining 4 bytes of the instruction. If it matches a nop340340- * we replace the breakpoint with the nop. Otherwise we replace341341- * it with the call instruction.342342- */343343-static int remove_breakpoint(struct dyn_ftrace *rec)344344-{345345- unsigned char ins[MCOUNT_INSN_SIZE];346346- unsigned char brk = BREAKPOINT_INSTRUCTION;347347- const unsigned char *nop;348348- unsigned long ftrace_addr;349349- unsigned long ip = rec->ip;350350-351351- /* If we fail the read, just give up */352352- if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))353353- return -EFAULT;354354-355355- /* If this does not have a breakpoint, we are done */356356- if (ins[0] != brk)357357- return 0;358358-359359- nop = ftrace_nop_replace();360360-361361- /*362362- * If the last 4 bytes of the instruction do not match363363- * a nop, then we assume that this is a call to ftrace_addr.364364- */365365- if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {366366- /*367367- * For extra paranoidism, we check if the breakpoint is on368368- * a call that would actually jump to the ftrace_addr.369369- * If not, don't touch the breakpoint, we make just create370370- * a disaster.371371- */372372- ftrace_addr = ftrace_get_addr_new(rec);373373- nop = ftrace_call_replace(ip, ftrace_addr);374374-375375- if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)376376- goto update;377377-378378- /* Check both ftrace_addr and ftrace_old_addr */379379- ftrace_addr = ftrace_get_addr_curr(rec);380380- nop = ftrace_call_replace(ip, ftrace_addr);381381-382382- ftrace_expected = nop;383383-384384- if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)385385- return -EINVAL;386386- }387387-388388- update:389389- return ftrace_write(ip, nop, 1);390390-}391391-392392-static int add_update_code(unsigned long ip, unsigned const char *new)393393-{394394- /* skip breakpoint */395395- ip++;396396- new++;397397- return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);398398-}399399-400400-static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)401401-{402402- unsigned long ip = rec->ip;403403- unsigned const char *new;404404-405405- new = ftrace_call_replace(ip, addr);406406- return add_update_code(ip, new);407407-}408408-409409-static int add_update_nop(struct dyn_ftrace *rec)410410-{411411- unsigned long ip = rec->ip;412412- unsigned const char *new;413413-414414- new = ftrace_nop_replace();415415- return add_update_code(ip, new);416416-}417417-418418-static int add_update(struct dyn_ftrace *rec, bool enable)419419-{420420- unsigned long ftrace_addr;421421- int ret;422422-423423- ret = ftrace_test_record(rec, enable);424424-425425- ftrace_addr = ftrace_get_addr_new(rec);426426-427427- switch (ret) {428428- case FTRACE_UPDATE_IGNORE:429429- return 0;430430-431431- case FTRACE_UPDATE_MODIFY_CALL:432432- case FTRACE_UPDATE_MAKE_CALL:433433- /* converting nop to call */434434- return add_update_call(rec, ftrace_addr);435435-436436- case FTRACE_UPDATE_MAKE_NOP:437437- /* converting a call to a nop */438438- return add_update_nop(rec);439439- }440440-441441- return 0;442442-}443443-444444-static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)445445-{446446- unsigned long ip = rec->ip;447447- unsigned const char *new;448448-449449- new = ftrace_call_replace(ip, addr);450450-451451- return ftrace_write(ip, new, 1);452452-}453453-454454-static int finish_update_nop(struct dyn_ftrace *rec)455455-{456456- unsigned long ip = rec->ip;457457- unsigned const char *new;458458-459459- new = ftrace_nop_replace();460460-461461- return ftrace_write(ip, new, 1);462462-}463463-464464-static int finish_update(struct dyn_ftrace *rec, bool enable)465465-{466466- unsigned long ftrace_addr;467467- int ret;468468-469469- ret = ftrace_update_record(rec, enable);470470-471471- ftrace_addr = ftrace_get_addr_new(rec);472472-473473- switch (ret) {474474- case FTRACE_UPDATE_IGNORE:475475- return 0;476476-477477- case FTRACE_UPDATE_MODIFY_CALL:478478- case FTRACE_UPDATE_MAKE_CALL:479479- /* converting nop to call */480480- return finish_update_call(rec, ftrace_addr);481481-482482- case FTRACE_UPDATE_MAKE_NOP:483483- /* converting a call to a nop */484484- return finish_update_nop(rec);485485- }486486-487487- return 0;488488-}489489-490490-static void do_sync_core(void *data)491491-{492492- sync_core();493493-}494494-495495-static void run_sync(void)496496-{497497- int enable_irqs;498498-499499- /* No need to sync if there's only one CPU */500500- if (num_online_cpus() == 1)501501- return;502502-503503- enable_irqs = irqs_disabled();504504-505505- /* We may be called with interrupts disabled (on bootup). */506506- if (enable_irqs)507507- local_irq_enable();508508- on_each_cpu(do_sync_core, NULL, 1);509509- if (enable_irqs)510510- local_irq_disable();511257}512258513259void ftrace_replace_code(int enable)514260{515261 struct ftrace_rec_iter *iter;516262 struct dyn_ftrace *rec;517517- const char *report = "adding breakpoints";518518- int count = 0;263263+ const char *new, *old;519264 int ret;520265521266 for_ftrace_rec_iter(iter) {522267 rec = ftrace_rec_iter_record(iter);523268524524- ret = add_breakpoints(rec, enable);525525- if (ret)526526- goto remove_breakpoints;527527- count++;269269+ switch (ftrace_test_record(rec, enable)) {270270+ case FTRACE_UPDATE_IGNORE:271271+ default:272272+ continue;273273+274274+ case FTRACE_UPDATE_MAKE_CALL:275275+ old = ftrace_nop_replace();276276+ break;277277+278278+ case FTRACE_UPDATE_MODIFY_CALL:279279+ case FTRACE_UPDATE_MAKE_NOP:280280+ old = ftrace_call_replace(rec->ip, ftrace_get_addr_curr(rec));281281+ break;282282+ }283283+284284+ ret = ftrace_verify_code(rec->ip, old);285285+ if (ret) {286286+ ftrace_bug(ret, rec);287287+ return;288288+ }528289 }529529-530530- run_sync();531531-532532- report = "updating code";533533- count = 0;534290535291 for_ftrace_rec_iter(iter) {536292 rec = ftrace_rec_iter_record(iter);537293538538- ret = add_update(rec, enable);539539- if (ret)540540- goto remove_breakpoints;541541- count++;294294+ switch (ftrace_test_record(rec, enable)) {295295+ case FTRACE_UPDATE_IGNORE:296296+ default:297297+ continue;298298+299299+ case FTRACE_UPDATE_MAKE_CALL:300300+ case FTRACE_UPDATE_MODIFY_CALL:301301+ new = ftrace_call_replace(rec->ip, ftrace_get_addr_new(rec));302302+ break;303303+304304+ case FTRACE_UPDATE_MAKE_NOP:305305+ new = ftrace_nop_replace();306306+ break;307307+ }308308+309309+ text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL);310310+ ftrace_update_record(rec, enable);542311 }543543-544544- run_sync();545545-546546- report = "removing breakpoints";547547- count = 0;548548-549549- for_ftrace_rec_iter(iter) {550550- rec = ftrace_rec_iter_record(iter);551551-552552- ret = finish_update(rec, enable);553553- if (ret)554554- goto remove_breakpoints;555555- count++;556556- }557557-558558- run_sync();559559-560560- return;561561-562562- remove_breakpoints:563563- pr_warn("Failed on %s (%d):\n", report, count);564564- ftrace_bug(ret, rec);565565- for_ftrace_rec_iter(iter) {566566- rec = ftrace_rec_iter_record(iter);567567- /*568568- * Breakpoints are handled only when this function is in569569- * progress. The system could not work with them.570570- */571571- if (remove_breakpoint(rec))572572- BUG();573573- }574574- run_sync();575575-}576576-577577-static int578578-ftrace_modify_code(unsigned long ip, unsigned const char *old_code,579579- unsigned const char *new_code)580580-{581581- int ret;582582-583583- ret = add_break(ip, old_code);584584- if (ret)585585- goto out;586586-587587- run_sync();588588-589589- ret = add_update_code(ip, new_code);590590- if (ret)591591- goto fail_update;592592-593593- run_sync();594594-595595- ret = ftrace_write(ip, new_code, 1);596596- /*597597- * The breakpoint is handled only when this function is in progress.598598- * The system could not work if we could not remove it.599599- */600600- BUG_ON(ret);601601- out:602602- run_sync();603603- return ret;604604-605605- fail_update:606606- /* Also here the system could not work with the breakpoint */607607- if (ftrace_write(ip, old_code, 1))608608- BUG();609609- goto out;312312+ text_poke_finish();610313}611314612315void arch_ftrace_update_code(int command)613316{614614- /* See comment above by declaration of modifying_ftrace_code */615615- atomic_inc(&modifying_ftrace_code);616616-617317 ftrace_modify_all_code(command);618618-619619- atomic_dec(&modifying_ftrace_code);620318}621319622320int __init ftrace_dyn_arch_init(void)···313747 unsigned long start_offset;314748 unsigned long end_offset;315749 unsigned long op_offset;750750+ unsigned long call_offset;316751 unsigned long offset;317752 unsigned long npages;318753 unsigned long size;···330763 start_offset = (unsigned long)ftrace_regs_caller;331764 end_offset = (unsigned long)ftrace_regs_caller_end;332765 op_offset = (unsigned long)ftrace_regs_caller_op_ptr;766766+ call_offset = (unsigned long)ftrace_regs_call;333767 } else {334768 start_offset = (unsigned long)ftrace_caller;335769 end_offset = (unsigned long)ftrace_epilogue;336770 op_offset = (unsigned long)ftrace_caller_op_ptr;771771+ call_offset = (unsigned long)ftrace_call;337772 }338773339774 size = end_offset - start_offset;···392823 /* put in the new offset to the ftrace_ops */393824 memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);394825826826+ /* put in the call to the function */827827+ mutex_lock(&text_mutex);828828+ call_offset -= start_offset;829829+ memcpy(trampoline + call_offset,830830+ text_gen_insn(CALL_INSN_OPCODE,831831+ trampoline + call_offset,832832+ ftrace_ops_get_func(ops)), CALL_INSN_SIZE);833833+ mutex_unlock(&text_mutex);834834+395835 /* ALLOC_TRAMP flags lets us know we created it */396836 ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;397837398838 set_vm_flush_reset_perms(trampoline);399839400400- /*401401- * Module allocation needs to be completed by making the page402402- * executable. The page is still writable, which is a security hazard,403403- * but anyhow ftrace breaks W^X completely.404404- */840840+ set_memory_ro((unsigned long)trampoline, npages);405841 set_memory_x((unsigned long)trampoline, npages);406842 return (unsigned long)trampoline;407843fail:···433859void arch_ftrace_update_trampoline(struct ftrace_ops *ops)434860{435861 ftrace_func_t func;436436- unsigned char *new;437862 unsigned long offset;438863 unsigned long ip;439864 unsigned int size;440440- int ret, npages;865865+ const char *new;441866442442- if (ops->trampoline) {443443- /*444444- * The ftrace_ops caller may set up its own trampoline.445445- * In such a case, this code must not modify it.446446- */447447- if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))448448- return;449449- npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;450450- set_memory_rw(ops->trampoline, npages);451451- } else {867867+ if (!ops->trampoline) {452868 ops->trampoline = create_trampoline(ops, &size);453869 if (!ops->trampoline)454870 return;455871 ops->trampoline_size = size;456456- npages = PAGE_ALIGN(size) >> PAGE_SHIFT;872872+ return;457873 }874874+875875+ /*876876+ * The ftrace_ops caller may set up its own trampoline.877877+ * In such a case, this code must not modify it.878878+ */879879+ if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))880880+ return;458881459882 offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);460883 ip = ops->trampoline + offset;461461-462884 func = ftrace_ops_get_func(ops);463885464464- ftrace_update_func_call = (unsigned long)func;465465-886886+ mutex_lock(&text_mutex);466887 /* Do a safe modify in case the trampoline is executing */467888 new = ftrace_call_replace(ip, (unsigned long)func);468468- ret = update_ftrace_func(ip, new);469469- set_memory_ro(ops->trampoline, npages);470470-471471- /* The update should never fail */472472- WARN_ON(ret);889889+ text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);890890+ mutex_unlock(&text_mutex);473891}474892475893/* Return the address of the function the trampoline calls */476894static void *addr_from_call(void *ptr)477895{478478- union ftrace_code_union calc;896896+ union text_poke_insn call;479897 int ret;480898481481- ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE);899899+ ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE);482900 if (WARN_ON_ONCE(ret < 0))483901 return NULL;484902485903 /* Make sure this is a call */486486- if (WARN_ON_ONCE(calc.op != 0xe8)) {487487- pr_warn("Expected e8, got %x\n", calc.op);904904+ if (WARN_ON_ONCE(call.opcode != CALL_INSN_OPCODE)) {905905+ pr_warn("Expected E8, got %x\n", call.opcode);488906 return NULL;489907 }490908491491- return ptr + MCOUNT_INSN_SIZE + calc.offset;909909+ return ptr + CALL_INSN_SIZE + call.disp;492910}493911494912void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,···547981#ifdef CONFIG_DYNAMIC_FTRACE548982extern void ftrace_graph_call(void);549983550550-static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)984984+static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)551985{552552- return ftrace_text_replace(0xe9, ip, addr);986986+ return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr);553987}554988555989static int ftrace_mod_jmp(unsigned long ip, void *func)556990{557557- unsigned char *new;991991+ const char *new;558992559559- ftrace_update_func_call = 0UL;560993 new = ftrace_jmp_replace(ip, (unsigned long)func);561561-562562- return update_ftrace_func(ip, new);994994+ text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);995995+ return 0;563996}564997565998int ftrace_enable_ftrace_graph_caller(void)···5841019void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,5851020 unsigned long frame_pointer)5861021{10221022+ unsigned long return_hooker = (unsigned long)&return_to_handler;5871023 unsigned long old;5881024 int faulted;589589- unsigned long return_hooker = (unsigned long)590590- &return_to_handler;59110255921026 /*5931027 * When resuming from suspend-to-ram, this function can be indirectly
+39-79
arch/x86/kernel/jump_label.c
···1616#include <asm/alternative.h>1717#include <asm/text-patching.h>18181919-union jump_code_union {2020- char code[JUMP_LABEL_NOP_SIZE];2121- struct {2222- char jump;2323- int offset;2424- } __attribute__((packed));2525-};2626-2727-static void bug_at(unsigned char *ip, int line)1919+static void bug_at(const void *ip, int line)2820{2921 /*3022 * The location is not an op that we were expecting.···2735 BUG();2836}29373030-static void __jump_label_set_jump_code(struct jump_entry *entry,3131- enum jump_label_type type,3232- union jump_code_union *code,3333- int init)3838+static const void *3939+__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init)3440{3541 const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };3642 const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];3737- const void *expect;4343+ const void *expect, *code;4444+ const void *addr, *dest;3845 int line;39464040- code->jump = 0xe9;4141- code->offset = jump_entry_target(entry) -4242- (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);4747+ addr = (void *)jump_entry_code(entry);4848+ dest = (void *)jump_entry_target(entry);4949+5050+ code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);43514452 if (init) {4553 expect = default_nop; line = __LINE__;4654 } else if (type == JUMP_LABEL_JMP) {4755 expect = ideal_nop; line = __LINE__;4856 } else {4949- expect = code->code; line = __LINE__;5757+ expect = code; line = __LINE__;5058 }51595252- if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))5353- bug_at((void *)jump_entry_code(entry), line);6060+ if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE))6161+ bug_at(addr, line);54625563 if (type == JUMP_LABEL_NOP)5656- memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);6464+ code = ideal_nop;6565+6666+ return code;5767}58685959-static void __ref __jump_label_transform(struct jump_entry *entry,6060- enum jump_label_type type,6161- int init)6969+static void inline __jump_label_transform(struct jump_entry *entry,7070+ enum jump_label_type type,7171+ int init)6272{6363- union jump_code_union code;6464-6565- __jump_label_set_jump_code(entry, type, &code, init);7373+ const void *opcode = __jump_label_set_jump_code(entry, type, init);66746775 /*6876 * As long as only a single processor is running and the code is still···7684 * always nop being the 'currently valid' instruction7785 */7886 if (init || system_state == SYSTEM_BOOTING) {7979- text_poke_early((void *)jump_entry_code(entry), &code,8787+ text_poke_early((void *)jump_entry_code(entry), opcode,8088 JUMP_LABEL_NOP_SIZE);8189 return;8290 }83918484- text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);9292+ text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL);9393+}9494+9595+static void __ref jump_label_transform(struct jump_entry *entry,9696+ enum jump_label_type type,9797+ int init)9898+{9999+ mutex_lock(&text_mutex);100100+ __jump_label_transform(entry, type, init);101101+ mutex_unlock(&text_mutex);85102}8610387104void arch_jump_label_transform(struct jump_entry *entry,88105 enum jump_label_type type)89106{9090- mutex_lock(&text_mutex);9191- __jump_label_transform(entry, type, 0);9292- mutex_unlock(&text_mutex);107107+ jump_label_transform(entry, type, 0);93108}9494-9595-#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))9696-static struct text_poke_loc tp_vec[TP_VEC_MAX];9797-static int tp_vec_nr;9810999110bool arch_jump_label_transform_queue(struct jump_entry *entry,100111 enum jump_label_type type)101112{102102- struct text_poke_loc *tp;103103- void *entry_code;113113+ const void *opcode;104114105115 if (system_state == SYSTEM_BOOTING) {106116 /*···112118 return true;113119 }114120115115- /*116116- * No more space in the vector, tell upper layer to apply117117- * the queue before continuing.118118- */119119- if (tp_vec_nr == TP_VEC_MAX)120120- return false;121121-122122- tp = &tp_vec[tp_vec_nr];123123-124124- entry_code = (void *)jump_entry_code(entry);125125-126126- /*127127- * The INT3 handler will do a bsearch in the queue, so we need entries128128- * to be sorted. We can survive an unsorted list by rejecting the entry,129129- * forcing the generic jump_label code to apply the queue. Warning once,130130- * to raise the attention to the case of an unsorted entry that is131131- * better not happen, because, in the worst case we will perform in the132132- * same way as we do without batching - with some more overhead.133133- */134134- if (tp_vec_nr > 0) {135135- int prev = tp_vec_nr - 1;136136- struct text_poke_loc *prev_tp = &tp_vec[prev];137137-138138- if (WARN_ON_ONCE(prev_tp->addr > entry_code))139139- return false;140140- }141141-142142- __jump_label_set_jump_code(entry, type,143143- (union jump_code_union *)&tp->text, 0);144144-145145- text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);146146-147147- tp_vec_nr++;148148-121121+ mutex_lock(&text_mutex);122122+ opcode = __jump_label_set_jump_code(entry, type, 0);123123+ text_poke_queue((void *)jump_entry_code(entry),124124+ opcode, JUMP_LABEL_NOP_SIZE, NULL);125125+ mutex_unlock(&text_mutex);149126 return true;150127}151128152129void arch_jump_label_transform_apply(void)153130{154154- if (!tp_vec_nr)155155- return;156156-157131 mutex_lock(&text_mutex);158158- text_poke_bp_batch(tp_vec, tp_vec_nr);132132+ text_poke_finish();159133 mutex_unlock(&text_mutex);160160-161161- tp_vec_nr = 0;162134}163135164136static enum {···153193 jlstate = JL_STATE_NO_UPDATE;154194 }155195 if (jlstate == JL_STATE_UPDATE)156156- __jump_label_transform(entry, type, 1);196196+ jump_label_transform(entry, type, 1);157197}
+11-9
arch/x86/kernel/kprobes/core.c
···119119/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/120120void synthesize_reljump(void *dest, void *from, void *to)121121{122122- __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE);122122+ __synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE);123123}124124NOKPROBE_SYMBOL(synthesize_reljump);125125126126/* Insert a call instruction at address 'from', which calls address 'to'.*/127127void synthesize_relcall(void *dest, void *from, void *to)128128{129129- __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE);129129+ __synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE);130130}131131NOKPROBE_SYMBOL(synthesize_relcall);132132···301301 * Another debugging subsystem might insert this breakpoint.302302 * In that case, we can't recover it.303303 */304304- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)304304+ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)305305 return 0;306306 addr += insn.length;307307 }···356356 return 0;357357358358 /* Another subsystem puts a breakpoint, failed to recover */359359- if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)359359+ if (insn->opcode.bytes[0] == INT3_INSN_OPCODE)360360 return 0;361361362362 /* We should not singlestep on the exception masking instructions */···400400 int len = insn->length;401401402402 if (can_boost(insn, p->addr) &&403403- MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) {403403+ MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) {404404 /*405405 * These instructions can be executed directly if it406406 * jumps back to correct address.407407 */408408 synthesize_reljump(buf + len, p->ainsn.insn + len,409409 p->addr + insn->length);410410- len += RELATIVEJUMP_SIZE;410410+ len += JMP32_INSN_SIZE;411411 p->ainsn.boostable = true;412412 } else {413413 p->ainsn.boostable = false;···501501502502void arch_arm_kprobe(struct kprobe *p)503503{504504- text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);504504+ text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1);505505+ text_poke_sync();505506}506507507508void arch_disarm_kprobe(struct kprobe *p)508509{509510 text_poke(p->addr, &p->opcode, 1);511511+ text_poke_sync();510512}511513512514void arch_remove_kprobe(struct kprobe *p)···611609 regs->flags |= X86_EFLAGS_TF;612610 regs->flags &= ~X86_EFLAGS_IF;613611 /* single step inline if the instruction is an int3 */614614- if (p->opcode == BREAKPOINT_INSTRUCTION)612612+ if (p->opcode == INT3_INSN_OPCODE)615613 regs->ip = (unsigned long)p->addr;616614 else617615 regs->ip = (unsigned long)p->ainsn.insn;···697695 reset_current_kprobe();698696 return 1;699697 }700700- } else if (*addr != BREAKPOINT_INSTRUCTION) {698698+ } else if (*addr != INT3_INSN_OPCODE) {701699 /*702700 * The breakpoint instruction was removed right703701 * after we hit it. Another cpu has removed
+33-34
arch/x86/kernel/kprobes/opt.c
···3838 long offs;3939 int i;40404141- for (i = 0; i < RELATIVEJUMP_SIZE; i++) {4141+ for (i = 0; i < JMP32_INSN_SIZE; i++) {4242 kp = get_kprobe((void *)addr - i);4343 /* This function only handles jump-optimized kprobe */4444 if (kp && kprobe_optimized(kp)) {···62626363 if (addr == (unsigned long)kp->addr) {6464 buf[0] = kp->opcode;6565- memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);6565+ memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);6666 } else {6767 offs = addr - (unsigned long)kp->addr - 1;6868- memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);6868+ memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);6969 }70707171 return (unsigned long)buf;···141141#define TMPL_END_IDX \142142 ((long)optprobe_template_end - (long)optprobe_template_entry)143143144144-#define INT3_SIZE sizeof(kprobe_opcode_t)145145-146144/* Optimized kprobe call back function: called from optinsn */147145static void148146optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)···160162 regs->cs |= get_kernel_rpl();161163 regs->gs = 0;162164#endif163163- regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;165165+ regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;164166 regs->orig_ax = ~0UL;165167166168 __this_cpu_write(current_kprobe, &op->kp);···177179 struct insn insn;178180 int len = 0, ret;179181180180- while (len < RELATIVEJUMP_SIZE) {182182+ while (len < JMP32_INSN_SIZE) {181183 ret = __copy_instruction(dest + len, src + len, real + len, &insn);182184 if (!ret || !can_boost(&insn, src + len))183185 return -EINVAL;···269271 return 0;270272271273 /* Check there is enough space for a relative jump. */272272- if (size - offset < RELATIVEJUMP_SIZE)274274+ if (size - offset < JMP32_INSN_SIZE)273275 return 0;274276275277 /* Decode instructions */···288290 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);289291 insn_get_length(&insn);290292 /* Another subsystem puts a breakpoint */291291- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)293293+ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)292294 return 0;293295 /* Recover address */294296 insn.kaddr = (void *)addr;295297 insn.next_byte = (void *)(addr + insn.length);296298 /* Check any instructions don't jump into target */297299 if (insn_is_indirect_jump(&insn) ||298298- insn_jump_into_range(&insn, paddr + INT3_SIZE,299299- RELATIVE_ADDR_SIZE))300300+ insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,301301+ DISP32_SIZE))300302 return 0;301303 addr += insn.length;302304 }···372374 * Verify if the address gap is in 2GB range, because this uses373375 * a relative jump.374376 */375375- rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;377377+ rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;376378 if (abs(rel) > 0x7fffffff) {377379 ret = -ERANGE;378380 goto err;···399401 /* Set returning jmp instruction at the tail of out-of-line buffer */400402 synthesize_reljump(buf + len, slot + len,401403 (u8 *)op->kp.addr + op->optinsn.size);402402- len += RELATIVEJUMP_SIZE;404404+ len += JMP32_INSN_SIZE;403405404406 /* We have to use text_poke() for instruction buffer because it is RO */405407 text_poke(slot, buf, len);···414416}415417416418/*417417- * Replace breakpoints (int3) with relative jumps.419419+ * Replace breakpoints (INT3) with relative jumps (JMP.d32).418420 * Caller must call with locking kprobe_mutex and text_mutex.421421+ *422422+ * The caller will have installed a regular kprobe and after that issued423423+ * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in424424+ * the 4 bytes after the INT3 are unused and can now be overwritten.419425 */420426void arch_optimize_kprobes(struct list_head *oplist)421427{422428 struct optimized_kprobe *op, *tmp;423423- u8 insn_buff[RELATIVEJUMP_SIZE];429429+ u8 insn_buff[JMP32_INSN_SIZE];424430425431 list_for_each_entry_safe(op, tmp, oplist, list) {426432 s32 rel = (s32)((long)op->optinsn.insn -427427- ((long)op->kp.addr + RELATIVEJUMP_SIZE));433433+ ((long)op->kp.addr + JMP32_INSN_SIZE));428434429435 WARN_ON(kprobe_disabled(&op->kp));430436431437 /* Backup instructions which will be replaced by jump address */432432- memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,433433- RELATIVE_ADDR_SIZE);438438+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,439439+ DISP32_SIZE);434440435435- insn_buff[0] = RELATIVEJUMP_OPCODE;441441+ insn_buff[0] = JMP32_INSN_OPCODE;436442 *(s32 *)(&insn_buff[1]) = rel;437443438438- text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);444444+ text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);439445440446 list_del_init(&op->list);441447 }442448}443449444444-/* Replace a relative jump with a breakpoint (int3). */450450+/*451451+ * Replace a relative jump (JMP.d32) with a breakpoint (INT3).452452+ *453453+ * After that, we can restore the 4 bytes after the INT3 to undo what454454+ * arch_optimize_kprobes() scribbled. This is safe since those bytes will be455455+ * unused once the INT3 lands.456456+ */445457void arch_unoptimize_kprobe(struct optimized_kprobe *op)446458{447447- u8 insn_buff[RELATIVEJUMP_SIZE];448448- u8 emulate_buff[RELATIVEJUMP_SIZE];449449-450450- /* Set int3 to first byte for kprobes */451451- insn_buff[0] = BREAKPOINT_INSTRUCTION;452452- memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);453453-454454- emulate_buff[0] = RELATIVEJUMP_OPCODE;455455- *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -456456- ((long)op->kp.addr + RELATIVEJUMP_SIZE));457457-458458- text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,459459- emulate_buff);459459+ arch_arm_kprobe(&op->kp);460460+ text_poke(op->kp.addr + INT3_INSN_SIZE,461461+ op->optinsn.copied_insn, DISP32_SIZE);462462+ text_poke_sync();460463}461464462465/*
-9
arch/x86/kernel/traps.c
···572572573573dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)574574{575575-#ifdef CONFIG_DYNAMIC_FTRACE576576- /*577577- * ftrace must be first, everything else may cause a recursive crash.578578- * See note by declaration of modifying_ftrace_code in ftrace.c579579- */580580- if (unlikely(atomic_read(&modifying_ftrace_code)) &&581581- ftrace_int3_handler(regs))582582- return;583583-#endif584575 if (poke_int3_handler(regs))585576 return;586577
-28
arch/x86/mm/init_32.c
···874874875875int kernel_set_to_readonly __read_mostly;876876877877-void set_kernel_text_rw(void)878878-{879879- unsigned long start = PFN_ALIGN(_text);880880- unsigned long size = PFN_ALIGN(_etext) - start;881881-882882- if (!kernel_set_to_readonly)883883- return;884884-885885- pr_debug("Set kernel text: %lx - %lx for read write\n",886886- start, start+size);887887-888888- set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);889889-}890890-891891-void set_kernel_text_ro(void)892892-{893893- unsigned long start = PFN_ALIGN(_text);894894- unsigned long size = PFN_ALIGN(_etext) - start;895895-896896- if (!kernel_set_to_readonly)897897- return;898898-899899- pr_debug("Set kernel text: %lx - %lx for read only\n",900900- start, start+size);901901-902902- set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);903903-}904904-905877static void mark_nxdata_nx(void)906878{907879 /*
-36
arch/x86/mm/init_64.c
···1260126012611261int kernel_set_to_readonly;1262126212631263-void set_kernel_text_rw(void)12641264-{12651265- unsigned long start = PFN_ALIGN(_text);12661266- unsigned long end = PFN_ALIGN(_etext);12671267-12681268- if (!kernel_set_to_readonly)12691269- return;12701270-12711271- pr_debug("Set kernel text: %lx - %lx for read write\n",12721272- start, end);12731273-12741274- /*12751275- * Make the kernel identity mapping for text RW. Kernel text12761276- * mapping will always be RO. Refer to the comment in12771277- * static_protections() in pageattr.c12781278- */12791279- set_memory_rw(start, (end - start) >> PAGE_SHIFT);12801280-}12811281-12821282-void set_kernel_text_ro(void)12831283-{12841284- unsigned long start = PFN_ALIGN(_text);12851285- unsigned long end = PFN_ALIGN(_etext);12861286-12871287- if (!kernel_set_to_readonly)12881288- return;12891289-12901290- pr_debug("Set kernel text: %lx - %lx for read only\n",12911291- start, end);12921292-12931293- /*12941294- * Set the kernel identity mapping for text RO.12951295- */12961296- set_memory_ro(start, (end - start) >> PAGE_SHIFT);12971297-}12981298-12991263void mark_rodata_ro(void)13001264{13011265 unsigned long start = PFN_ALIGN(_text);
···510510 arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);511511 /* Loop free_list for disarming */512512 list_for_each_entry_safe(op, tmp, &freeing_list, list) {513513+ /* Switching from detour code to origin */514514+ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;513515 /* Disarm probes if marked disabled */514516 if (kprobe_disabled(&op->kp))515517 arch_disarm_kprobe(&op->kp);···651649{652650 lockdep_assert_cpus_held();653651 arch_unoptimize_kprobe(op);652652+ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;654653 if (kprobe_disabled(&op->kp))655654 arch_disarm_kprobe(&op->kp);656655}···679676 return;680677 }681678682682- op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;683679 if (!list_empty(&op->list)) {684680 /* Dequeue from the optimization queue */685681 list_del_init(&op->list);
-43
kernel/module.c
···20312031 frob_writable_data(&mod->init_layout, set_memory_nx);20322032}2033203320342034-/* Iterate through all modules and set each module's text as RW */20352035-void set_all_modules_text_rw(void)20362036-{20372037- struct module *mod;20382038-20392039- if (!rodata_enabled)20402040- return;20412041-20422042- mutex_lock(&module_mutex);20432043- list_for_each_entry_rcu(mod, &modules, list) {20442044- if (mod->state == MODULE_STATE_UNFORMED)20452045- continue;20462046-20472047- frob_text(&mod->core_layout, set_memory_rw);20482048- frob_text(&mod->init_layout, set_memory_rw);20492049- }20502050- mutex_unlock(&module_mutex);20512051-}20522052-20532053-/* Iterate through all modules and set each module's text as RO */20542054-void set_all_modules_text_ro(void)20552055-{20562056- struct module *mod;20572057-20582058- if (!rodata_enabled)20592059- return;20602060-20612061- mutex_lock(&module_mutex);20622062- list_for_each_entry_rcu(mod, &modules, list) {20632063- /*20642064- * Ignore going modules since it's possible that ro20652065- * protection has already been disabled, otherwise we'll20662066- * run into protection faults at module deallocation.20672067- */20682068- if (mod->state == MODULE_STATE_UNFORMED ||20692069- mod->state == MODULE_STATE_GOING)20702070- continue;20712071-20722072- frob_text(&mod->core_layout, set_memory_ro);20732073- frob_text(&mod->init_layout, set_memory_ro);20742074- }20752075- mutex_unlock(&module_mutex);20762076-}20772034#else /* !CONFIG_STRICT_MODULE_RWX */20782035static void module_enable_nx(const struct module *mod) { }20792036#endif /* CONFIG_STRICT_MODULE_RWX */