Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] Kprobes: prevent possible race conditions generic

There are possible race conditions if probes are placed on routines within the
kprobes files and routines used by the kprobes. For example if you put probe
on get_kprobe() routines, the system can hang while inserting probes on any
routine such as do_fork(). Because while inserting probes on do_fork(),
register_kprobes() routine grabs the kprobes spin lock and executes
get_kprobe() routine and to handle probe of get_kprobe(), kprobes_handler()
gets executed and tries to grab kprobes spin lock, and spins forever. This
patch avoids such possible race conditions by preventing probes on routines
within the kprobes file and routines used by kprobes.

I have modified the patches as per Andi Kleen's suggestion to move kprobes
routines and other routines used by kprobes to a seperate section
.kprobes.text.

Also moved page fault and exception handlers, general protection fault to
.kprobes.text section.

These patches have been tested on i386, x86_64 and ppc64 architectures, also
compiled on ia64 and sparc64 architectures.

Signed-off-by: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Prasanna S Panchamukhi and committed by
Linus Torvalds
d0aaff97 505db036

+60 -29
+1
include/asm-generic/sections.h
··· 12 12 extern char _eextratext[] __attribute__((weak)); 13 13 extern char _end[]; 14 14 extern char __per_cpu_start[], __per_cpu_end[]; 15 + extern char __kprobes_text_start[], __kprobes_text_end[]; 15 16 16 17 #endif /* _ASM_GENERIC_SECTIONS_H_ */
+6
include/asm-generic/vmlinux.lds.h
··· 97 97 VMLINUX_SYMBOL(__lock_text_start) = .; \ 98 98 *(.spinlock.text) \ 99 99 VMLINUX_SYMBOL(__lock_text_end) = .; 100 + 101 + #define KPROBES_TEXT \ 102 + ALIGN_FUNCTION(); \ 103 + VMLINUX_SYMBOL(__kprobes_text_start) = .; \ 104 + *(.kprobes.text) \ 105 + VMLINUX_SYMBOL(__kprobes_text_end) = .;
+3
include/linux/kprobes.h
··· 42 42 #define KPROBE_REENTER 0x00000004 43 43 #define KPROBE_HIT_SSDONE 0x00000008 44 44 45 + /* Attach to insert probes on any functions which should be ignored*/ 46 + #define __kprobes __attribute__((__section__(".kprobes.text"))) 47 + 45 48 struct kprobe; 46 49 struct pt_regs; 47 50 struct kretprobe;
+7
include/linux/linkage.h
··· 33 33 ALIGN; \ 34 34 name: 35 35 36 + #define KPROBE_ENTRY(name) \ 37 + .section .kprobes.text, "ax"; \ 38 + .globl name; \ 39 + ALIGN; \ 40 + name: 41 + 42 + 36 43 #endif 37 44 38 45 #define NORET_TYPE /**/
+43 -29
kernel/kprobes.c
··· 37 37 #include <linux/init.h> 38 38 #include <linux/module.h> 39 39 #include <linux/moduleloader.h> 40 + #include <asm-generic/sections.h> 40 41 #include <asm/cacheflush.h> 41 42 #include <asm/errno.h> 42 43 #include <asm/kdebug.h> ··· 73 72 * get_insn_slot() - Find a slot on an executable page for an instruction. 74 73 * We allocate an executable page if there's no room on existing ones. 75 74 */ 76 - kprobe_opcode_t *get_insn_slot(void) 75 + kprobe_opcode_t __kprobes *get_insn_slot(void) 77 76 { 78 77 struct kprobe_insn_page *kip; 79 78 struct hlist_node *pos; ··· 118 117 return kip->insns; 119 118 } 120 119 121 - void free_insn_slot(kprobe_opcode_t *slot) 120 + void __kprobes free_insn_slot(kprobe_opcode_t *slot) 122 121 { 123 122 struct kprobe_insn_page *kip; 124 123 struct hlist_node *pos; ··· 153 152 } 154 153 155 154 /* Locks kprobe: irqs must be disabled */ 156 - void lock_kprobes(void) 155 + void __kprobes lock_kprobes(void) 157 156 { 158 157 spin_lock(&kprobe_lock); 159 158 kprobe_cpu = smp_processor_id(); 160 159 } 161 160 162 - void unlock_kprobes(void) 161 + void __kprobes unlock_kprobes(void) 163 162 { 164 163 kprobe_cpu = NR_CPUS; 165 164 spin_unlock(&kprobe_lock); 166 165 } 167 166 168 167 /* You have to be holding the kprobe_lock */ 169 - struct kprobe *get_kprobe(void *addr) 168 + struct kprobe __kprobes *get_kprobe(void *addr) 170 169 { 171 170 struct hlist_head *head; 172 171 struct hlist_node *node; ··· 184 183 * Aggregate handlers for multiple kprobes support - these handlers 185 184 * take care of invoking the individual kprobe handlers on p->list 186 185 */ 187 - static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 186 + static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 188 187 { 189 188 struct kprobe *kp; 190 189 ··· 199 198 return 0; 200 199 } 201 200 202 - static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 203 - unsigned long flags) 201 + static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 202 + unsigned long flags) 204 203 { 205 204 struct kprobe *kp; 206 205 ··· 214 213 return; 215 214 } 216 215 217 - static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 218 - int trapnr) 216 + static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 217 + int trapnr) 219 218 { 220 219 /* 221 220 * if we faulted "during" the execution of a user specified ··· 228 227 return 0; 229 228 } 230 229 231 - static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 230 + static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 232 231 { 233 232 struct kprobe *kp = curr_kprobe; 234 233 if (curr_kprobe && kp->break_handler) { ··· 241 240 return 0; 242 241 } 243 242 244 - struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) 243 + struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) 245 244 { 246 245 struct hlist_node *node; 247 246 struct kretprobe_instance *ri; ··· 250 249 return NULL; 251 250 } 252 251 253 - static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) 252 + static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe 253 + *rp) 254 254 { 255 255 struct hlist_node *node; 256 256 struct kretprobe_instance *ri; ··· 260 258 return NULL; 261 259 } 262 260 263 - void add_rp_inst(struct kretprobe_instance *ri) 261 + void __kprobes add_rp_inst(struct kretprobe_instance *ri) 264 262 { 265 263 /* 266 264 * Remove rp inst off the free list - ··· 278 276 hlist_add_head(&ri->uflist, &ri->rp->used_instances); 279 277 } 280 278 281 - void recycle_rp_inst(struct kretprobe_instance *ri) 279 + void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) 282 280 { 283 281 /* remove rp inst off the rprobe_inst_table */ 284 282 hlist_del(&ri->hlist); ··· 293 291 kfree(ri); 294 292 } 295 293 296 - struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) 294 + struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) 297 295 { 298 296 return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; 299 297 } ··· 304 302 * instances associated with this task. These left over instances represent 305 303 * probed functions that have been called but will never return. 306 304 */ 307 - void kprobe_flush_task(struct task_struct *tk) 305 + void __kprobes kprobe_flush_task(struct task_struct *tk) 308 306 { 309 307 struct kretprobe_instance *ri; 310 308 struct hlist_head *head; ··· 324 322 * This kprobe pre_handler is registered with every kretprobe. When probe 325 323 * hits it will set up the return probe. 326 324 */ 327 - static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) 325 + static int __kprobes pre_handler_kretprobe(struct kprobe *p, 326 + struct pt_regs *regs) 328 327 { 329 328 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 330 329 ··· 356 353 * Add the new probe to old_p->list. Fail if this is the 357 354 * second jprobe at the address - two jprobes can't coexist 358 355 */ 359 - static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 356 + static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 360 357 { 361 358 struct kprobe *kp; 362 359 ··· 398 395 * the intricacies 399 396 * TODO: Move kcalloc outside the spinlock 400 397 */ 401 - static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) 398 + static int __kprobes register_aggr_kprobe(struct kprobe *old_p, 399 + struct kprobe *p) 402 400 { 403 401 int ret = 0; 404 402 struct kprobe *ap; ··· 438 434 spin_unlock_irqrestore(&kprobe_lock, flags); 439 435 } 440 436 441 - int register_kprobe(struct kprobe *p) 437 + static int __kprobes in_kprobes_functions(unsigned long addr) 438 + { 439 + if (addr >= (unsigned long)__kprobes_text_start 440 + && addr < (unsigned long)__kprobes_text_end) 441 + return -EINVAL; 442 + return 0; 443 + } 444 + 445 + int __kprobes register_kprobe(struct kprobe *p) 442 446 { 443 447 int ret = 0; 444 448 unsigned long flags = 0; 445 449 struct kprobe *old_p; 446 450 447 - if ((ret = arch_prepare_kprobe(p)) != 0) { 451 + if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0) 452 + return ret; 453 + if ((ret = arch_prepare_kprobe(p)) != 0) 448 454 goto rm_kprobe; 449 - } 455 + 450 456 spin_lock_irqsave(&kprobe_lock, flags); 451 457 old_p = get_kprobe(p->addr); 452 458 p->nmissed = 0; ··· 480 466 return ret; 481 467 } 482 468 483 - void unregister_kprobe(struct kprobe *p) 469 + void __kprobes unregister_kprobe(struct kprobe *p) 484 470 { 485 471 unsigned long flags; 486 472 struct kprobe *old_p; ··· 501 487 .priority = 0x7fffffff /* we need to notified first */ 502 488 }; 503 489 504 - int register_jprobe(struct jprobe *jp) 490 + int __kprobes register_jprobe(struct jprobe *jp) 505 491 { 506 492 /* Todo: Verify probepoint is a function entry point */ 507 493 jp->kp.pre_handler = setjmp_pre_handler; ··· 510 496 return register_kprobe(&jp->kp); 511 497 } 512 498 513 - void unregister_jprobe(struct jprobe *jp) 499 + void __kprobes unregister_jprobe(struct jprobe *jp) 514 500 { 515 501 unregister_kprobe(&jp->kp); 516 502 } 517 503 518 504 #ifdef ARCH_SUPPORTS_KRETPROBES 519 505 520 - int register_kretprobe(struct kretprobe *rp) 506 + int __kprobes register_kretprobe(struct kretprobe *rp) 521 507 { 522 508 int ret = 0; 523 509 struct kretprobe_instance *inst; ··· 554 540 555 541 #else /* ARCH_SUPPORTS_KRETPROBES */ 556 542 557 - int register_kretprobe(struct kretprobe *rp) 543 + int __kprobes register_kretprobe(struct kretprobe *rp) 558 544 { 559 545 return -ENOSYS; 560 546 } 561 547 562 548 #endif /* ARCH_SUPPORTS_KRETPROBES */ 563 549 564 - void unregister_kretprobe(struct kretprobe *rp) 550 + void __kprobes unregister_kretprobe(struct kretprobe *rp) 565 551 { 566 552 unsigned long flags; 567 553 struct kretprobe_instance *ri;