Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] Kprobes: Track kprobe on a per_cpu basis - x86_64 changes

x86_64 changes to track kprobe execution on a per-cpu basis. We now track the
kprobe state machine independently on each cpu using a arch specific kprobe
control block.

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Ananth N Mavinakayanahalli and committed by
Linus Torvalds
e7a510f9 f215d985

+89 -59
+70 -59
arch/x86_64/kernel/kprobes.c
··· 44 44 #include <asm/kdebug.h> 45 45 46 46 static DECLARE_MUTEX(kprobe_mutex); 47 - 48 - static struct kprobe *current_kprobe; 49 - static unsigned long kprobe_status, kprobe_old_rflags, kprobe_saved_rflags; 50 - static struct kprobe *kprobe_prev; 51 - static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; 52 - static struct pt_regs jprobe_saved_regs; 53 - static long *jprobe_saved_rsp; 54 47 void jprobe_return_end(void); 55 48 56 - /* copy of the kernel stack at the probe fire time */ 57 - static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; 49 + DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 50 + DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); 58 51 59 52 /* 60 53 * returns non-zero if opcode modifies the interrupt flag. ··· 229 236 up(&kprobe_mutex); 230 237 } 231 238 232 - static inline void save_previous_kprobe(void) 239 + static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) 233 240 { 234 - kprobe_prev = current_kprobe; 235 - kprobe_status_prev = kprobe_status; 236 - kprobe_old_rflags_prev = kprobe_old_rflags; 237 - kprobe_saved_rflags_prev = kprobe_saved_rflags; 241 + kcb->prev_kprobe.kp = kprobe_running(); 242 + kcb->prev_kprobe.status = kcb->kprobe_status; 243 + kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags; 244 + kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; 238 245 } 239 246 240 - static inline void restore_previous_kprobe(void) 247 + static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) 241 248 { 242 - current_kprobe = kprobe_prev; 243 - kprobe_status = kprobe_status_prev; 244 - kprobe_old_rflags = kprobe_old_rflags_prev; 245 - kprobe_saved_rflags = kprobe_saved_rflags_prev; 249 + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; 250 + kcb->kprobe_status = kcb->prev_kprobe.status; 251 + kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags; 252 + kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; 246 253 } 247 254 248 - static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) 255 + static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 256 + struct kprobe_ctlblk *kcb) 249 257 { 250 - current_kprobe = p; 251 - kprobe_saved_rflags = kprobe_old_rflags 258 + __get_cpu_var(current_kprobe) = p; 259 + kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags 252 260 = (regs->eflags & (TF_MASK | IF_MASK)); 253 261 if (is_IF_modifier(p->ainsn.insn)) 254 - kprobe_saved_rflags &= ~IF_MASK; 262 + kcb->kprobe_saved_rflags &= ~IF_MASK; 255 263 } 256 264 257 265 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) ··· 295 301 struct kprobe *p; 296 302 int ret = 0; 297 303 kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); 304 + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 298 305 299 306 /* Check we're not actually recursing */ 300 307 if (kprobe_running()) { ··· 303 308 Disarm the probe we just hit, and ignore it. */ 304 309 p = get_kprobe(addr); 305 310 if (p) { 306 - if (kprobe_status == KPROBE_HIT_SS && 311 + if (kcb->kprobe_status == KPROBE_HIT_SS && 307 312 *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { 308 313 regs->eflags &= ~TF_MASK; 309 - regs->eflags |= kprobe_saved_rflags; 314 + regs->eflags |= kcb->kprobe_saved_rflags; 310 315 unlock_kprobes(); 311 316 goto no_kprobe; 312 - } else if (kprobe_status == KPROBE_HIT_SSDONE) { 317 + } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) { 313 318 /* TODO: Provide re-entrancy from 314 319 * post_kprobes_handler() and avoid exception 315 320 * stack corruption while single-stepping on ··· 317 322 */ 318 323 arch_disarm_kprobe(p); 319 324 regs->rip = (unsigned long)p->addr; 325 + reset_current_kprobe(); 320 326 ret = 1; 321 327 } else { 322 328 /* We have reentered the kprobe_handler(), since ··· 327 331 * of the new probe without calling any user 328 332 * handlers. 329 333 */ 330 - save_previous_kprobe(); 331 - set_current_kprobe(p, regs); 334 + save_previous_kprobe(kcb); 335 + set_current_kprobe(p, regs, kcb); 332 336 p->nmissed++; 333 337 prepare_singlestep(p, regs); 334 - kprobe_status = KPROBE_REENTER; 338 + kcb->kprobe_status = KPROBE_REENTER; 335 339 return 1; 336 340 } 337 341 } else { 338 - p = current_kprobe; 342 + p = __get_cpu_var(current_kprobe); 339 343 if (p->break_handler && p->break_handler(p, regs)) { 340 344 goto ss_probe; 341 345 } ··· 370 374 * in post_kprobe_handler() 371 375 */ 372 376 preempt_disable(); 373 - kprobe_status = KPROBE_HIT_ACTIVE; 374 - set_current_kprobe(p, regs); 377 + set_current_kprobe(p, regs, kcb); 378 + kcb->kprobe_status = KPROBE_HIT_ACTIVE; 375 379 376 380 if (p->pre_handler && p->pre_handler(p, regs)) 377 381 /* handler has already set things up, so skip ss setup */ ··· 379 383 380 384 ss_probe: 381 385 prepare_singlestep(p, regs); 382 - kprobe_status = KPROBE_HIT_SS; 386 + kcb->kprobe_status = KPROBE_HIT_SS; 383 387 return 1; 384 388 385 389 no_kprobe: ··· 447 451 BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); 448 452 regs->rip = orig_ret_address; 449 453 454 + reset_current_kprobe(); 450 455 unlock_kprobes(); 451 456 preempt_enable_no_resched(); 452 457 ··· 481 484 * that is atop the stack is the address following the copied instruction. 482 485 * We need to make it the address following the original instruction. 483 486 */ 484 - static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) 487 + static void __kprobes resume_execution(struct kprobe *p, 488 + struct pt_regs *regs, struct kprobe_ctlblk *kcb) 485 489 { 486 490 unsigned long *tos = (unsigned long *)regs->rsp; 487 491 unsigned long next_rip = 0; ··· 497 499 switch (*insn) { 498 500 case 0x9c: /* pushfl */ 499 501 *tos &= ~(TF_MASK | IF_MASK); 500 - *tos |= kprobe_old_rflags; 502 + *tos |= kcb->kprobe_old_rflags; 501 503 break; 502 504 case 0xc3: /* ret/lret */ 503 505 case 0xcb: ··· 542 544 */ 543 545 int __kprobes post_kprobe_handler(struct pt_regs *regs) 544 546 { 545 - if (!kprobe_running()) 547 + struct kprobe *cur = kprobe_running(); 548 + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 549 + 550 + if (!cur) 546 551 return 0; 547 552 548 - if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { 549 - kprobe_status = KPROBE_HIT_SSDONE; 550 - current_kprobe->post_handler(current_kprobe, regs, 0); 553 + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { 554 + kcb->kprobe_status = KPROBE_HIT_SSDONE; 555 + cur->post_handler(cur, regs, 0); 551 556 } 552 557 553 - resume_execution(current_kprobe, regs); 554 - regs->eflags |= kprobe_saved_rflags; 558 + resume_execution(cur, regs, kcb); 559 + regs->eflags |= kcb->kprobe_saved_rflags; 555 560 556 561 /* Restore the original saved kprobes variables and continue. */ 557 - if (kprobe_status == KPROBE_REENTER) { 558 - restore_previous_kprobe(); 562 + if (kcb->kprobe_status == KPROBE_REENTER) { 563 + restore_previous_kprobe(kcb); 559 564 goto out; 560 565 } else { 561 566 unlock_kprobes(); 562 567 } 568 + reset_current_kprobe(); 563 569 out: 564 570 preempt_enable_no_resched(); 565 571 ··· 581 579 /* Interrupts disabled, kprobe_lock held. */ 582 580 int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) 583 581 { 584 - if (current_kprobe->fault_handler 585 - && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) 582 + struct kprobe *cur = kprobe_running(); 583 + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 584 + 585 + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) 586 586 return 1; 587 587 588 - if (kprobe_status & KPROBE_HIT_SS) { 589 - resume_execution(current_kprobe, regs); 590 - regs->eflags |= kprobe_old_rflags; 588 + if (kcb->kprobe_status & KPROBE_HIT_SS) { 589 + resume_execution(cur, regs, kcb); 590 + regs->eflags |= kcb->kprobe_old_rflags; 591 591 592 + reset_current_kprobe(); 592 593 unlock_kprobes(); 593 594 preempt_enable_no_resched(); 594 595 } ··· 634 629 { 635 630 struct jprobe *jp = container_of(p, struct jprobe, kp); 636 631 unsigned long addr; 632 + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 637 633 638 - jprobe_saved_regs = *regs; 639 - jprobe_saved_rsp = (long *) regs->rsp; 640 - addr = (unsigned long)jprobe_saved_rsp; 634 + kcb->jprobe_saved_regs = *regs; 635 + kcb->jprobe_saved_rsp = (long *) regs->rsp; 636 + addr = (unsigned long)(kcb->jprobe_saved_rsp); 641 637 /* 642 638 * As Linus pointed out, gcc assumes that the callee 643 639 * owns the argument space and could overwrite it, e.g. ··· 646 640 * we also save and restore enough stack bytes to cover 647 641 * the argument area. 648 642 */ 649 - memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr)); 643 + memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, 644 + MIN_STACK_SIZE(addr)); 650 645 regs->eflags &= ~IF_MASK; 651 646 regs->rip = (unsigned long)(jp->entry); 652 647 return 1; ··· 655 648 656 649 void __kprobes jprobe_return(void) 657 650 { 651 + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 652 + 658 653 asm volatile (" xchg %%rbx,%%rsp \n" 659 654 " int3 \n" 660 655 " .globl jprobe_return_end \n" 661 656 " jprobe_return_end: \n" 662 657 " nop \n"::"b" 663 - (jprobe_saved_rsp):"memory"); 658 + (kcb->jprobe_saved_rsp):"memory"); 664 659 } 665 660 666 661 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 667 662 { 663 + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 668 664 u8 *addr = (u8 *) (regs->rip - 1); 669 - unsigned long stack_addr = (unsigned long)jprobe_saved_rsp; 665 + unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp); 670 666 struct jprobe *jp = container_of(p, struct jprobe, kp); 671 667 672 668 if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { 673 - if ((long *)regs->rsp != jprobe_saved_rsp) { 669 + if ((long *)regs->rsp != kcb->jprobe_saved_rsp) { 674 670 struct pt_regs *saved_regs = 675 - container_of(jprobe_saved_rsp, struct pt_regs, rsp); 671 + container_of(kcb->jprobe_saved_rsp, 672 + struct pt_regs, rsp); 676 673 printk("current rsp %p does not match saved rsp %p\n", 677 - (long *)regs->rsp, jprobe_saved_rsp); 674 + (long *)regs->rsp, kcb->jprobe_saved_rsp); 678 675 printk("Saved registers for jprobe %p\n", jp); 679 676 show_registers(saved_regs); 680 677 printk("Current registers\n"); 681 678 show_registers(regs); 682 679 BUG(); 683 680 } 684 - *regs = jprobe_saved_regs; 685 - memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack, 681 + *regs = kcb->jprobe_saved_regs; 682 + memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, 686 683 MIN_STACK_SIZE(stack_addr)); 687 684 return 1; 688 685 }
+19
include/asm-x86_64/kprobes.h
··· 25 25 */ 26 26 #include <linux/types.h> 27 27 #include <linux/ptrace.h> 28 + #include <linux/percpu.h> 28 29 29 30 struct pt_regs; 30 31 ··· 47 46 struct arch_specific_insn { 48 47 /* copy of the original instruction */ 49 48 kprobe_opcode_t *insn; 49 + }; 50 + 51 + struct prev_kprobe { 52 + struct kprobe *kp; 53 + unsigned long status; 54 + unsigned long old_rflags; 55 + unsigned long saved_rflags; 56 + }; 57 + 58 + /* per-cpu kprobe control block */ 59 + struct kprobe_ctlblk { 60 + unsigned long kprobe_status; 61 + unsigned long kprobe_old_rflags; 62 + unsigned long kprobe_saved_rflags; 63 + long *jprobe_saved_rsp; 64 + struct pt_regs jprobe_saved_regs; 65 + kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; 66 + struct prev_kprobe prev_kprobe; 50 67 }; 51 68 52 69 /* trap3/1 are intr gates for kprobes. So, restore the status of IF,