Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kprobes: Introduce NOKPROBE_SYMBOL() macro to maintain kprobes blacklist

Introduce NOKPROBE_SYMBOL() macro which builds a kprobes
blacklist at kernel build time.

The usage of this macro is similar to EXPORT_SYMBOL(),
placed after the function definition:

NOKPROBE_SYMBOL(function);

Since this macro will inhibit inlining of static/inline
functions, this patch also introduces a nokprobe_inline macro
for static/inline functions. In this case, we must use
NOKPROBE_SYMBOL() for the inline function caller.

When CONFIG_KPROBES=y, the macro stores the given function
address in the "_kprobe_blacklist" section.

Since the data structures are not fully initialized by the
macro (because there is no "size" information), those
are re-initialized at boot time by using kallsyms.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Link: http://lkml.kernel.org/r/20140417081705.26341.96719.stgit@ltc230.yrl.intra.hitachi.co.jp
Cc: Alok Kataria <akataria@vmware.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christopher Li <sparse@chrisli.org>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jan-Simon Möller <dl9pf@gmx.de>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: linux-arch@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-sparse@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Masami Hiramatsu and committed by
Ingo Molnar
376e2424 be8f2743

+107 -52
+15 -1
Documentation/kprobes.txt
··· 22 22 23 23 Kprobes enables you to dynamically break into any kernel routine and 24 24 collect debugging and performance information non-disruptively. You 25 - can trap at almost any kernel code address, specifying a handler 25 + can trap at almost any kernel code address(*), specifying a handler 26 26 routine to be invoked when the breakpoint is hit. 27 + (*: some parts of the kernel code can not be trapped, see 1.5 Blacklist) 27 28 28 29 There are currently three types of probes: kprobes, jprobes, and 29 30 kretprobes (also called return probes). A kprobe can be inserted ··· 273 272 - Specify an empty function for the kprobe's post_handler or break_handler. 274 273 or 275 274 - Execute 'sysctl -w debug.kprobes_optimization=n' 275 + 276 + 1.5 Blacklist 277 + 278 + Kprobes can probe most of the kernel except itself. This means 279 + that there are some functions where kprobes cannot probe. Probing 280 + (trapping) such functions can cause a recursive trap (e.g. double 281 + fault) or the nested probe handler may never be called. 282 + Kprobes manages such functions as a blacklist. 283 + If you want to add a function into the blacklist, you just need 284 + to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro 285 + to specify a blacklisted function. 286 + Kprobes checks the given probe address against the blacklist and 287 + rejects registering it, if the given address is in the blacklist. 276 288 277 289 2. Architectures Supported 278 290
+7
arch/x86/include/asm/asm.h
··· 57 57 .long (from) - . ; \ 58 58 .long (to) - . + 0x7ffffff0 ; \ 59 59 .popsection 60 + 61 + # define _ASM_NOKPROBE(entry) \ 62 + .pushsection "_kprobe_blacklist","aw" ; \ 63 + _ASM_ALIGN ; \ 64 + _ASM_PTR (entry); \ 65 + .popsection 60 66 #else 61 67 # define _ASM_EXTABLE(from,to) \ 62 68 " .pushsection \"__ex_table\",\"a\"\n" \ ··· 77 71 " .long (" #from ") - .\n" \ 78 72 " .long (" #to ") - . + 0x7ffffff0\n" \ 79 73 " .popsection\n" 74 + /* For C file, we already have NOKPROBE_SYMBOL macro */ 80 75 #endif 81 76 82 77 #endif /* _ASM_X86_ASM_H */
+4
arch/x86/kernel/paravirt.c
··· 23 23 #include <linux/efi.h> 24 24 #include <linux/bcd.h> 25 25 #include <linux/highmem.h> 26 + #include <linux/kprobes.h> 26 27 27 28 #include <asm/bug.h> 28 29 #include <asm/paravirt.h> ··· 389 388 .start_context_switch = paravirt_nop, 390 389 .end_context_switch = paravirt_nop, 391 390 }; 391 + 392 + /* At this point, native_get_debugreg has a real function entry */ 393 + NOKPROBE_SYMBOL(native_get_debugreg); 392 394 393 395 struct pv_apic_ops pv_apic_ops = { 394 396 #ifdef CONFIG_X86_LOCAL_APIC
+9
include/asm-generic/vmlinux.lds.h
··· 109 109 #define BRANCH_PROFILE() 110 110 #endif 111 111 112 + #ifdef CONFIG_KPROBES 113 + #define KPROBE_BLACKLIST() VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \ 114 + *(_kprobe_blacklist) \ 115 + VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .; 116 + #else 117 + #define KPROBE_BLACKLIST() 118 + #endif 119 + 112 120 #ifdef CONFIG_EVENT_TRACING 113 121 #define FTRACE_EVENTS() . = ALIGN(8); \ 114 122 VMLINUX_SYMBOL(__start_ftrace_events) = .; \ ··· 515 507 *(.init.rodata) \ 516 508 FTRACE_EVENTS() \ 517 509 TRACE_SYSCALLS() \ 510 + KPROBE_BLACKLIST() \ 518 511 MEM_DISCARD(init.rodata) \ 519 512 CLK_OF_TABLES() \ 520 513 RESERVEDMEM_OF_TABLES() \
+2
include/linux/compiler.h
··· 374 374 /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ 375 375 #ifdef CONFIG_KPROBES 376 376 # define __kprobes __attribute__((__section__(".kprobes.text"))) 377 + # define nokprobe_inline __always_inline 377 378 #else 378 379 # define __kprobes 380 + # define nokprobe_inline inline 379 381 #endif 380 382 #endif /* __LINUX_COMPILER_H */
+17 -3
include/linux/kprobes.h
··· 205 205 void *addr; 206 206 }; 207 207 208 - struct kprobe_blackpoint { 209 - const char *name; 208 + struct kprobe_blacklist_entry { 209 + struct list_head list; 210 210 unsigned long start_addr; 211 - unsigned long range; 211 + unsigned long end_addr; 212 212 }; 213 213 214 214 #ifdef CONFIG_KPROBES ··· 476 476 { 477 477 return enable_kprobe(&jp->kp); 478 478 } 479 + 480 + #ifdef CONFIG_KPROBES 481 + /* 482 + * Blacklist ganerating macro. Specify functions which is not probed 483 + * by using this macro. 484 + */ 485 + #define __NOKPROBE_SYMBOL(fname) \ 486 + static unsigned long __used \ 487 + __attribute__((section("_kprobe_blacklist"))) \ 488 + _kbl_addr_##fname = (unsigned long)fname; 489 + #define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) 490 + #else 491 + #define NOKPROBE_SYMBOL(fname) 492 + #endif 479 493 480 494 #endif /* _LINUX_KPROBES_H */
+52 -48
kernel/kprobes.c
··· 86 86 return &(kretprobe_table_locks[hash].lock); 87 87 } 88 88 89 - /* 90 - * Normally, functions that we'd want to prohibit kprobes in, are marked 91 - * __kprobes. But, there are cases where such functions already belong to 92 - * a different section (__sched for preempt_schedule) 93 - * 94 - * For such cases, we now have a blacklist 95 - */ 96 - static struct kprobe_blackpoint kprobe_blacklist[] = { 97 - {"preempt_schedule",}, 98 - {"native_get_debugreg",}, 99 - {NULL} /* Terminator */ 100 - }; 89 + /* Blacklist -- list of struct kprobe_blacklist_entry */ 90 + static LIST_HEAD(kprobe_blacklist); 101 91 102 92 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT 103 93 /* ··· 1318 1328 addr < (unsigned long)__kprobes_text_end; 1319 1329 } 1320 1330 1321 - static int __kprobes in_kprobes_functions(unsigned long addr) 1331 + static bool __kprobes within_kprobe_blacklist(unsigned long addr) 1322 1332 { 1323 - struct kprobe_blackpoint *kb; 1333 + struct kprobe_blacklist_entry *ent; 1324 1334 1325 1335 if (arch_within_kprobe_blacklist(addr)) 1326 - return -EINVAL; 1336 + return true; 1327 1337 /* 1328 1338 * If there exists a kprobe_blacklist, verify and 1329 1339 * fail any probe registration in the prohibited area 1330 1340 */ 1331 - for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 1332 - if (kb->start_addr) { 1333 - if (addr >= kb->start_addr && 1334 - addr < (kb->start_addr + kb->range)) 1335 - return -EINVAL; 1336 - } 1341 + list_for_each_entry(ent, &kprobe_blacklist, list) { 1342 + if (addr >= ent->start_addr && addr < ent->end_addr) 1343 + return true; 1337 1344 } 1338 - return 0; 1345 + 1346 + return false; 1339 1347 } 1340 1348 1341 1349 /* ··· 1424 1436 1425 1437 /* Ensure it is not in reserved area nor out of text */ 1426 1438 if (!kernel_text_address((unsigned long) p->addr) || 1427 - in_kprobes_functions((unsigned long) p->addr) || 1439 + within_kprobe_blacklist((unsigned long) p->addr) || 1428 1440 jump_label_text_reserved(p->addr, p->addr)) { 1429 1441 ret = -EINVAL; 1430 1442 goto out; ··· 2010 2022 kp->symbol_name, kp->addr, kp->offset); 2011 2023 } 2012 2024 2025 + /* 2026 + * Lookup and populate the kprobe_blacklist. 2027 + * 2028 + * Unlike the kretprobe blacklist, we'll need to determine 2029 + * the range of addresses that belong to the said functions, 2030 + * since a kprobe need not necessarily be at the beginning 2031 + * of a function. 2032 + */ 2033 + static int __init populate_kprobe_blacklist(unsigned long *start, 2034 + unsigned long *end) 2035 + { 2036 + unsigned long *iter; 2037 + struct kprobe_blacklist_entry *ent; 2038 + unsigned long offset = 0, size = 0; 2039 + 2040 + for (iter = start; iter < end; iter++) { 2041 + if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) { 2042 + pr_err("Failed to find blacklist %p\n", (void *)*iter); 2043 + continue; 2044 + } 2045 + 2046 + ent = kmalloc(sizeof(*ent), GFP_KERNEL); 2047 + if (!ent) 2048 + return -ENOMEM; 2049 + ent->start_addr = *iter; 2050 + ent->end_addr = *iter + size; 2051 + INIT_LIST_HEAD(&ent->list); 2052 + list_add_tail(&ent->list, &kprobe_blacklist); 2053 + } 2054 + return 0; 2055 + } 2056 + 2013 2057 /* Module notifier call back, checking kprobes on the module */ 2014 2058 static int __kprobes kprobes_module_callback(struct notifier_block *nb, 2015 2059 unsigned long val, void *data) ··· 2085 2065 .priority = 0 2086 2066 }; 2087 2067 2068 + /* Markers of _kprobe_blacklist section */ 2069 + extern unsigned long __start_kprobe_blacklist[]; 2070 + extern unsigned long __stop_kprobe_blacklist[]; 2071 + 2088 2072 static int __init init_kprobes(void) 2089 2073 { 2090 2074 int i, err = 0; 2091 - unsigned long offset = 0, size = 0; 2092 - char *modname, namebuf[KSYM_NAME_LEN]; 2093 - const char *symbol_name; 2094 - void *addr; 2095 - struct kprobe_blackpoint *kb; 2096 2075 2097 2076 /* FIXME allocate the probe table, currently defined statically */ 2098 2077 /* initialize all list heads */ ··· 2101 2082 raw_spin_lock_init(&(kretprobe_table_locks[i].lock)); 2102 2083 } 2103 2084 2104 - /* 2105 - * Lookup and populate the kprobe_blacklist. 2106 - * 2107 - * Unlike the kretprobe blacklist, we'll need to determine 2108 - * the range of addresses that belong to the said functions, 2109 - * since a kprobe need not necessarily be at the beginning 2110 - * of a function. 2111 - */ 2112 - for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 2113 - kprobe_lookup_name(kb->name, addr); 2114 - if (!addr) 2115 - continue; 2116 - 2117 - kb->start_addr = (unsigned long)addr; 2118 - symbol_name = kallsyms_lookup(kb->start_addr, 2119 - &size, &offset, &modname, namebuf); 2120 - if (!symbol_name) 2121 - kb->range = 0; 2122 - else 2123 - kb->range = size; 2085 + err = populate_kprobe_blacklist(__start_kprobe_blacklist, 2086 + __stop_kprobe_blacklist); 2087 + if (err) { 2088 + pr_err("kprobes: failed to populate blacklist: %d\n", err); 2089 + pr_err("Please take care of using kprobes.\n"); 2124 2090 } 2125 2091 2126 2092 if (kretprobe_blacklist_size) {
+1
kernel/sched/core.c
··· 2804 2804 barrier(); 2805 2805 } while (need_resched()); 2806 2806 } 2807 + NOKPROBE_SYMBOL(preempt_schedule); 2807 2808 EXPORT_SYMBOL(preempt_schedule); 2808 2809 #endif /* CONFIG_PREEMPT */ 2809 2810