[PATCH] kprobes: fix single-step out of line - take2

Now that PPC64 has no-execute support, here is a second try to fix the
single step out of line during kprobe execution. Kprobes on x86_64 already
solved this problem by allocating an executable page and using it as the
scratch area for stepping out of line. Reuse that.

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Ananth N Mavinakayanahalli and committed by Linus Torvalds 9ec4b1f3 d3b8a1a8

+128 -117
+22 -4
arch/ppc64/kernel/kprobes.c
··· 36 36 #include <asm/kdebug.h> 37 37 #include <asm/sstep.h> 38 38 39 + static DECLARE_MUTEX(kprobe_mutex); 40 + 39 41 static struct kprobe *current_kprobe; 40 42 static unsigned long kprobe_status, kprobe_saved_msr; 41 43 static struct kprobe *kprobe_prev; ··· 55 53 } else if (IS_MTMSRD(insn) || IS_RFID(insn)) { 56 54 printk("Cannot register a kprobe on rfid or mtmsrd\n"); 57 55 ret = -EINVAL; 56 + } 57 + 58 + /* insn must be on a special executable page on ppc64 */ 59 + if (!ret) { 60 + up(&kprobe_mutex); 61 + p->ainsn.insn = get_insn_slot(); 62 + down(&kprobe_mutex); 63 + if (!p->ainsn.insn) 64 + ret = -ENOMEM; 58 65 } 59 66 return ret; 60 67 } ··· 90 79 91 80 void arch_remove_kprobe(struct kprobe *p) 92 81 { 82 + up(&kprobe_mutex); 83 + free_insn_slot(p->ainsn.insn); 84 + down(&kprobe_mutex); 93 85 } 94 86 95 87 static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) 96 88 { 89 + kprobe_opcode_t insn = *p->ainsn.insn; 90 + 97 91 regs->msr |= MSR_SE; 98 - /*single step inline if it a breakpoint instruction*/ 99 - if (p->opcode == BREAKPOINT_INSTRUCTION) 92 + 93 + /* single step inline if it is a trap variant */ 94 + if (IS_TW(insn) || IS_TD(insn) || IS_TWI(insn) || IS_TDI(insn)) 100 95 regs->nip = (unsigned long)p->addr; 101 96 else 102 - regs->nip = (unsigned long)&p->ainsn.insn; 97 + regs->nip = (unsigned long)p->ainsn.insn; 103 98 } 104 99 105 100 static inline void save_previous_kprobe(void) ··· 222 205 static void resume_execution(struct kprobe *p, struct pt_regs *regs) 223 206 { 224 207 int ret; 208 + unsigned int insn = *p->ainsn.insn; 225 209 226 210 regs->nip = (unsigned long)p->addr; 227 - ret = emulate_step(regs, p->ainsn.insn[0]); 211 + ret = emulate_step(regs, insn); 228 212 if (ret == 0) 229 213 regs->nip = (unsigned long)p->addr + 4; 230 214 }
+1 -112
arch/x86_64/kernel/kprobes.c
··· 38 38 #include <linux/string.h> 39 39 #include <linux/slab.h> 40 40 #include <linux/preempt.h> 41 - #include <linux/moduleloader.h> 41 + 42 42 #include <asm/cacheflush.h> 43 43 #include <asm/pgtable.h> 44 44 #include <asm/kdebug.h> ··· 51 51 static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; 52 52 static struct pt_regs jprobe_saved_regs; 53 53 static long *jprobe_saved_rsp; 54 - static kprobe_opcode_t *get_insn_slot(void); 55 - static void free_insn_slot(kprobe_opcode_t *slot); 56 54 void jprobe_return_end(void); 57 55 58 56 /* copy of the kernel stack at the probe fire time */ ··· 678 680 return 1; 679 681 } 680 682 return 0; 681 - } 682 - 683 - /* 684 - * kprobe->ainsn.insn points to the copy of the instruction to be single-stepped. 685 - * By default on x86_64, pages we get from kmalloc or vmalloc are not 686 - * executable. Single-stepping an instruction on such a page yields an 687 - * oops. So instead of storing the instruction copies in their respective 688 - * kprobe objects, we allocate a page, map it executable, and store all the 689 - * instruction copies there. (We can allocate additional pages if somebody 690 - * inserts a huge number of probes.) Each page can hold up to INSNS_PER_PAGE 691 - * instruction slots, each of which is MAX_INSN_SIZE*sizeof(kprobe_opcode_t) 692 - * bytes. 693 - */ 694 - #define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE*sizeof(kprobe_opcode_t))) 695 - struct kprobe_insn_page { 696 - struct hlist_node hlist; 697 - kprobe_opcode_t *insns; /* page of instruction slots */ 698 - char slot_used[INSNS_PER_PAGE]; 699 - int nused; 700 - }; 701 - 702 - static struct hlist_head kprobe_insn_pages; 703 - 704 - /** 705 - * get_insn_slot() - Find a slot on an executable page for an instruction. 706 - * We allocate an executable page if there's no room on existing ones. 707 - */ 708 - static kprobe_opcode_t *get_insn_slot(void) 709 - { 710 - struct kprobe_insn_page *kip; 711 - struct hlist_node *pos; 712 - 713 - hlist_for_each(pos, &kprobe_insn_pages) { 714 - kip = hlist_entry(pos, struct kprobe_insn_page, hlist); 715 - if (kip->nused < INSNS_PER_PAGE) { 716 - int i; 717 - for (i = 0; i < INSNS_PER_PAGE; i++) { 718 - if (!kip->slot_used[i]) { 719 - kip->slot_used[i] = 1; 720 - kip->nused++; 721 - return kip->insns + (i*MAX_INSN_SIZE); 722 - } 723 - } 724 - /* Surprise! No unused slots. Fix kip->nused. */ 725 - kip->nused = INSNS_PER_PAGE; 726 - } 727 - } 728 - 729 - /* All out of space. Need to allocate a new page. Use slot 0.*/ 730 - kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); 731 - if (!kip) { 732 - return NULL; 733 - } 734 - 735 - /* 736 - * For the %rip-relative displacement fixups to be doable, we 737 - * need our instruction copy to be within +/- 2GB of any data it 738 - * might access via %rip. That is, within 2GB of where the 739 - * kernel image and loaded module images reside. So we allocate 740 - * a page in the module loading area. 741 - */ 742 - kip->insns = module_alloc(PAGE_SIZE); 743 - if (!kip->insns) { 744 - kfree(kip); 745 - return NULL; 746 - } 747 - INIT_HLIST_NODE(&kip->hlist); 748 - hlist_add_head(&kip->hlist, &kprobe_insn_pages); 749 - memset(kip->slot_used, 0, INSNS_PER_PAGE); 750 - kip->slot_used[0] = 1; 751 - kip->nused = 1; 752 - return kip->insns; 753 - } 754 - 755 - /** 756 - * free_insn_slot() - Free instruction slot obtained from get_insn_slot(). 757 - */ 758 - static void free_insn_slot(kprobe_opcode_t *slot) 759 - { 760 - struct kprobe_insn_page *kip; 761 - struct hlist_node *pos; 762 - 763 - hlist_for_each(pos, &kprobe_insn_pages) { 764 - kip = hlist_entry(pos, struct kprobe_insn_page, hlist); 765 - if (kip->insns <= slot 766 - && slot < kip->insns+(INSNS_PER_PAGE*MAX_INSN_SIZE)) { 767 - int i = (slot - kip->insns) / MAX_INSN_SIZE; 768 - kip->slot_used[i] = 0; 769 - kip->nused--; 770 - if (kip->nused == 0) { 771 - /* 772 - * Page is no longer in use. Free it unless 773 - * it's the last one. We keep the last one 774 - * so as not to have to set it up again the 775 - * next time somebody inserts a probe. 776 - */ 777 - hlist_del(&kip->hlist); 778 - if (hlist_empty(&kprobe_insn_pages)) { 779 - INIT_HLIST_NODE(&kip->hlist); 780 - hlist_add_head(&kip->hlist, 781 - &kprobe_insn_pages); 782 - } else { 783 - module_free(NULL, kip->insns); 784 - kfree(kip); 785 - } 786 - } 787 - return; 788 - } 789 - } 790 683 }
+1
include/asm-ia64/kprobes.h
··· 28 28 #include <linux/ptrace.h> 29 29 #include <asm/break.h> 30 30 31 + #define MAX_INSN_SIZE 16 31 32 #define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6) 32 33 33 34 typedef union cmp_inst {
+1 -1
include/asm-ppc64/kprobes.h
··· 45 45 /* Architecture specific copy of original instruction */ 46 46 struct arch_specific_insn { 47 47 /* copy of original instruction */ 48 - kprobe_opcode_t insn[MAX_INSN_SIZE]; 48 + kprobe_opcode_t *insn; 49 49 }; 50 50 51 51 #ifdef CONFIG_KPROBES
+2
include/linux/kprobes.h
··· 177 177 extern void arch_disarm_kprobe(struct kprobe *p); 178 178 extern void arch_remove_kprobe(struct kprobe *p); 179 179 extern void show_registers(struct pt_regs *regs); 180 + extern kprobe_opcode_t *get_insn_slot(void); 181 + extern void free_insn_slot(kprobe_opcode_t *slot); 180 182 181 183 /* Get the kprobe at this addr (if any). Must have called lock_kprobes */ 182 184 struct kprobe *get_kprobe(void *addr);
+101
kernel/kprobes.c
··· 36 36 #include <linux/hash.h> 37 37 #include <linux/init.h> 38 38 #include <linux/module.h> 39 + #include <linux/moduleloader.h> 39 40 #include <asm/cacheflush.h> 40 41 #include <asm/errno.h> 41 42 #include <asm/kdebug.h> ··· 50 49 unsigned int kprobe_cpu = NR_CPUS; 51 50 static DEFINE_SPINLOCK(kprobe_lock); 52 51 static struct kprobe *curr_kprobe; 52 + 53 + /* 54 + * kprobe->ainsn.insn points to the copy of the instruction to be 55 + * single-stepped. x86_64, POWER4 and above have no-exec support and 56 + * stepping on the instruction on a vmalloced/kmalloced/data page 57 + * is a recipe for disaster 58 + */ 59 + #define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 60 + 61 + struct kprobe_insn_page { 62 + struct hlist_node hlist; 63 + kprobe_opcode_t *insns; /* Page of instruction slots */ 64 + char slot_used[INSNS_PER_PAGE]; 65 + int nused; 66 + }; 67 + 68 + static struct hlist_head kprobe_insn_pages; 69 + 70 + /** 71 + * get_insn_slot() - Find a slot on an executable page for an instruction. 72 + * We allocate an executable page if there's no room on existing ones. 73 + */ 74 + kprobe_opcode_t *get_insn_slot(void) 75 + { 76 + struct kprobe_insn_page *kip; 77 + struct hlist_node *pos; 78 + 79 + hlist_for_each(pos, &kprobe_insn_pages) { 80 + kip = hlist_entry(pos, struct kprobe_insn_page, hlist); 81 + if (kip->nused < INSNS_PER_PAGE) { 82 + int i; 83 + for (i = 0; i < INSNS_PER_PAGE; i++) { 84 + if (!kip->slot_used[i]) { 85 + kip->slot_used[i] = 1; 86 + kip->nused++; 87 + return kip->insns + (i * MAX_INSN_SIZE); 88 + } 89 + } 90 + /* Surprise! No unused slots. Fix kip->nused. */ 91 + kip->nused = INSNS_PER_PAGE; 92 + } 93 + } 94 + 95 + /* All out of space. Need to allocate a new page. Use slot 0.*/ 96 + kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); 97 + if (!kip) { 98 + return NULL; 99 + } 100 + 101 + /* 102 + * Use module_alloc so this page is within +/- 2GB of where the 103 + * kernel image and loaded module images reside. This is required 104 + * so x86_64 can correctly handle the %rip-relative fixups. 105 + */ 106 + kip->insns = module_alloc(PAGE_SIZE); 107 + if (!kip->insns) { 108 + kfree(kip); 109 + return NULL; 110 + } 111 + INIT_HLIST_NODE(&kip->hlist); 112 + hlist_add_head(&kip->hlist, &kprobe_insn_pages); 113 + memset(kip->slot_used, 0, INSNS_PER_PAGE); 114 + kip->slot_used[0] = 1; 115 + kip->nused = 1; 116 + return kip->insns; 117 + } 118 + 119 + void free_insn_slot(kprobe_opcode_t *slot) 120 + { 121 + struct kprobe_insn_page *kip; 122 + struct hlist_node *pos; 123 + 124 + hlist_for_each(pos, &kprobe_insn_pages) { 125 + kip = hlist_entry(pos, struct kprobe_insn_page, hlist); 126 + if (kip->insns <= slot && 127 + slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { 128 + int i = (slot - kip->insns) / MAX_INSN_SIZE; 129 + kip->slot_used[i] = 0; 130 + kip->nused--; 131 + if (kip->nused == 0) { 132 + /* 133 + * Page is no longer in use. Free it unless 134 + * it's the last one. We keep the last one 135 + * so as not to have to set it up again the 136 + * next time somebody inserts a probe. 137 + */ 138 + hlist_del(&kip->hlist); 139 + if (hlist_empty(&kprobe_insn_pages)) { 140 + INIT_HLIST_NODE(&kip->hlist); 141 + hlist_add_head(&kip->hlist, 142 + &kprobe_insn_pages); 143 + } else { 144 + module_free(NULL, kip->insns); 145 + kfree(kip); 146 + } 147 + } 148 + return; 149 + } 150 + } 151 + } 53 152 54 153 /* Locks kprobe: irqs must be disabled */ 55 154 void lock_kprobes(void)