Merge branches 'perf-fixes-for-linus' and 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
jump label: Add work around to i386 gcc asm goto bug
x86, ftrace: Use safe noops, drop trap test
jump_label: Fix unaligned traps on sparc.
jump label: Make arch_jump_label_text_poke_early() optional
jump label: Fix error with preempt disable holding mutex
oprofile: Remove deprecated use of flush_scheduled_work()
oprofile: Fix the hang while taking the cpu offline
jump label: Fix deadlock b/w jump_label_mutex vs. text_mutex
jump label: Fix module __init section race

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86: Check irq_remapped instead of remapping_enabled in destroy_irq()

+153 -82
+14
arch/Kconfig
··· 42 42 for kernel debugging, non-intrusive instrumentation and testing. 43 43 If in doubt, say "N". 44 44 45 + config JUMP_LABEL 46 + bool "Optimize trace point call sites" 47 + depends on HAVE_ARCH_JUMP_LABEL 48 + help 49 + If it is detected that the compiler has support for "asm goto", 50 + the kernel will compile trace point locations with just a 51 + nop instruction. When trace points are enabled, the nop will 52 + be converted to a jump to the trace function. This technique 53 + lowers overhead and stress on the branch prediction of the 54 + processor. 55 + 56 + On i386, options added to the compiler flags may increase 57 + the size of the kernel slightly. 58 + 45 59 config OPTPROBES 46 60 def_bool y 47 61 depends on KPROBES && HAVE_OPTPROBES
+1
arch/sparc/include/asm/jump_label.h
··· 13 13 "nop\n\t" \ 14 14 "nop\n\t" \ 15 15 ".pushsection __jump_table, \"a\"\n\t"\ 16 + ".align 4\n\t" \ 16 17 ".word 1b, %l[" #label "], %c0\n\t" \ 17 18 ".popsection \n\t" \ 18 19 : : "i" (key) : : label);\
+12 -1
arch/x86/Makefile_32.cpu
··· 51 51 # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph 52 52 # tracer assumptions. For i686, generic, core2 this is set by the 53 53 # compiler anyway 54 - cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args) 54 + ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y) 55 + ADD_ACCUMULATE_OUTGOING_ARGS := y 56 + endif 57 + 58 + # Work around to a bug with asm goto with first implementations of it 59 + # in gcc causing gcc to mess up the push and pop of the stack in some 60 + # uses of asm goto. 61 + ifeq ($(CONFIG_JUMP_LABEL), y) 62 + ADD_ACCUMULATE_OUTGOING_ARGS := y 63 + endif 64 + 65 + cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args) 55 66 56 67 # Bug fix for binutils: this option is required in order to keep 57 68 # binutils from generating NOPL instructions against our will.
+15 -54
arch/x86/kernel/alternative.c
··· 644 644 645 645 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 646 646 647 - unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; 647 + #ifdef CONFIG_X86_64 648 + unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 }; 649 + #else 650 + unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 }; 651 + #endif 648 652 649 653 void __init arch_init_ideal_nop5(void) 650 654 { 651 - extern const unsigned char ftrace_test_p6nop[]; 652 - extern const unsigned char ftrace_test_nop5[]; 653 - extern const unsigned char ftrace_test_jmp[]; 654 - int faulted = 0; 655 - 656 655 /* 657 - * There is no good nop for all x86 archs. 658 - * We will default to using the P6_NOP5, but first we 659 - * will test to make sure that the nop will actually 660 - * work on this CPU. If it faults, we will then 661 - * go to a lesser efficient 5 byte nop. If that fails 662 - * we then just use a jmp as our nop. This isn't the most 663 - * efficient nop, but we can not use a multi part nop 664 - * since we would then risk being preempted in the middle 665 - * of that nop, and if we enabled tracing then, it might 666 - * cause a system crash. 656 + * There is no good nop for all x86 archs. This selection 657 + * algorithm should be unified with the one in find_nop_table(), 658 + * but this should be good enough for now. 667 659 * 668 - * TODO: check the cpuid to determine the best nop. 660 + * For cases other than the ones below, use the safe (as in 661 + * always functional) defaults above. 669 662 */ 670 - asm volatile ( 671 - "ftrace_test_jmp:" 672 - "jmp ftrace_test_p6nop\n" 673 - "nop\n" 674 - "nop\n" 675 - "nop\n" /* 2 byte jmp + 3 bytes */ 676 - "ftrace_test_p6nop:" 677 - P6_NOP5 678 - "jmp 1f\n" 679 - "ftrace_test_nop5:" 680 - ".byte 0x66,0x66,0x66,0x66,0x90\n" 681 - "1:" 682 - ".section .fixup, \"ax\"\n" 683 - "2: movl $1, %0\n" 684 - " jmp ftrace_test_nop5\n" 685 - "3: movl $2, %0\n" 686 - " jmp 1b\n" 687 - ".previous\n" 688 - _ASM_EXTABLE(ftrace_test_p6nop, 2b) 689 - _ASM_EXTABLE(ftrace_test_nop5, 3b) 690 - : "=r"(faulted) : "0" (faulted)); 691 - 692 - switch (faulted) { 693 - case 0: 694 - pr_info("converting mcount calls to 0f 1f 44 00 00\n"); 695 - memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5); 696 - break; 697 - case 1: 698 - pr_info("converting mcount calls to 66 66 66 66 90\n"); 699 - memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5); 700 - break; 701 - case 2: 702 - pr_info("converting mcount calls to jmp . + 5\n"); 703 - memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5); 704 - break; 705 - } 706 - 663 + #ifdef CONFIG_X86_64 664 + /* Don't use these on 32 bits due to broken virtualizers */ 665 + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 666 + memcpy(ideal_nop5, p6_nops[5], 5); 667 + #endif 707 668 } 708 669 #endif
+1 -1
arch/x86/kernel/apic/io_apic.c
··· 3109 3109 3110 3110 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 3111 3111 3112 - if (intr_remapping_enabled) 3112 + if (irq_remapped(cfg)) 3113 3113 free_irte(irq); 3114 3114 raw_spin_lock_irqsave(&vector_lock, flags); 3115 3115 __clear_irq_vector(irq, cfg);
+1 -1
drivers/oprofile/buffer_sync.c
··· 190 190 profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); 191 191 task_handoff_unregister(&task_free_nb); 192 192 mutex_unlock(&buffer_mutex); 193 - flush_scheduled_work(); 193 + flush_cpu_work(); 194 194 195 195 /* make sure we don't leak task structs */ 196 196 process_task_mortuary();
+7 -3
drivers/oprofile/cpu_buffer.c
··· 111 111 112 112 void end_cpu_work(void) 113 113 { 114 - int i; 115 - 116 114 work_enabled = 0; 115 + } 116 + 117 + void flush_cpu_work(void) 118 + { 119 + int i; 117 120 118 121 for_each_online_cpu(i) { 119 122 struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); 120 123 121 - cancel_delayed_work(&b->work); 124 + /* these works are per-cpu, no need for flush_sync */ 125 + flush_delayed_work(&b->work); 122 126 } 123 127 } 124 128
+1
drivers/oprofile/cpu_buffer.h
··· 25 25 26 26 void start_cpu_work(void); 27 27 void end_cpu_work(void); 28 + void flush_cpu_work(void); 28 29 29 30 /* CPU buffer is composed of such entries (which are 30 31 * also used for context switch notes)
+13
drivers/oprofile/timer_int.c
··· 21 21 #include "oprof.h" 22 22 23 23 static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer); 24 + static int ctr_running; 24 25 25 26 static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer) 26 27 { ··· 34 33 { 35 34 struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer); 36 35 36 + if (!ctr_running) 37 + return; 38 + 37 39 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 38 40 hrtimer->function = oprofile_hrtimer_notify; 39 41 ··· 46 42 47 43 static int oprofile_hrtimer_start(void) 48 44 { 45 + get_online_cpus(); 46 + ctr_running = 1; 49 47 on_each_cpu(__oprofile_hrtimer_start, NULL, 1); 48 + put_online_cpus(); 50 49 return 0; 51 50 } 52 51 53 52 static void __oprofile_hrtimer_stop(int cpu) 54 53 { 55 54 struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu); 55 + 56 + if (!ctr_running) 57 + return; 56 58 57 59 hrtimer_cancel(hrtimer); 58 60 } ··· 67 57 { 68 58 int cpu; 69 59 60 + get_online_cpus(); 70 61 for_each_online_cpu(cpu) 71 62 __oprofile_hrtimer_stop(cpu); 63 + ctr_running = 0; 64 + put_online_cpus(); 72 65 } 73 66 74 67 static int __cpuinit oprofile_cpu_notify(struct notifier_block *self,
+6 -1
include/linux/jump_label.h
··· 1 1 #ifndef _LINUX_JUMP_LABEL_H 2 2 #define _LINUX_JUMP_LABEL_H 3 3 4 - #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) 4 + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) 5 5 # include <asm/jump_label.h> 6 6 # define HAVE_JUMP_LABEL 7 7 #endif ··· 18 18 extern struct jump_entry __start___jump_table[]; 19 19 extern struct jump_entry __stop___jump_table[]; 20 20 21 + extern void jump_label_lock(void); 22 + extern void jump_label_unlock(void); 21 23 extern void arch_jump_label_transform(struct jump_entry *entry, 22 24 enum jump_label_type type); 23 25 extern void arch_jump_label_text_poke_early(jump_label_t addr); ··· 60 58 { 61 59 return 0; 62 60 } 61 + 62 + static inline void jump_label_lock(void) {} 63 + static inline void jump_label_unlock(void) {} 63 64 64 65 #endif 65 66
+66 -11
kernel/jump_label.c
··· 39 39 struct module *mod; 40 40 }; 41 41 42 + void jump_label_lock(void) 43 + { 44 + mutex_lock(&jump_label_mutex); 45 + } 46 + 47 + void jump_label_unlock(void) 48 + { 49 + mutex_unlock(&jump_label_mutex); 50 + } 51 + 42 52 static int jump_label_cmp(const void *a, const void *b) 43 53 { 44 54 const struct jump_entry *jea = a; ··· 162 152 struct jump_label_module_entry *e_module; 163 153 int count; 164 154 165 - mutex_lock(&jump_label_mutex); 155 + jump_label_lock(); 166 156 entry = get_jump_label_entry((jump_label_t)key); 167 157 if (entry) { 168 158 count = entry->nr_entries; ··· 178 168 count = e_module->nr_entries; 179 169 iter = e_module->table; 180 170 while (count--) { 181 - if (kernel_text_address(iter->code)) 171 + if (iter->key && 172 + kernel_text_address(iter->code)) 182 173 arch_jump_label_transform(iter, type); 183 174 iter++; 184 175 } 185 176 } 186 177 } 187 - mutex_unlock(&jump_label_mutex); 178 + jump_label_unlock(); 188 179 } 189 180 190 181 static int addr_conflict(struct jump_entry *entry, void *start, void *end) ··· 242 231 * overlaps with any of the jump label patch addresses. Code 243 232 * that wants to modify kernel text should first verify that 244 233 * it does not overlap with any of the jump label addresses. 234 + * Caller must hold jump_label_mutex. 245 235 * 246 236 * returns 1 if there is an overlap, 0 otherwise 247 237 */ ··· 253 241 struct jump_entry *iter_stop = __start___jump_table; 254 242 int conflict = 0; 255 243 256 - mutex_lock(&jump_label_mutex); 257 244 iter = iter_start; 258 245 while (iter < iter_stop) { 259 246 if (addr_conflict(iter, start, end)) { ··· 267 256 conflict = module_conflict(start, end); 268 257 #endif 269 258 out: 270 - mutex_unlock(&jump_label_mutex); 271 259 return conflict; 260 + } 261 + 262 + /* 263 + * Not all archs need this. 264 + */ 265 + void __weak arch_jump_label_text_poke_early(jump_label_t addr) 266 + { 272 267 } 273 268 274 269 static __init int init_jump_label(void) ··· 284 267 struct jump_entry *iter_stop = __stop___jump_table; 285 268 struct jump_entry *iter; 286 269 287 - mutex_lock(&jump_label_mutex); 270 + jump_label_lock(); 288 271 ret = build_jump_label_hashtable(__start___jump_table, 289 272 __stop___jump_table); 290 273 iter = iter_start; ··· 292 275 arch_jump_label_text_poke_early(iter->code); 293 276 iter++; 294 277 } 295 - mutex_unlock(&jump_label_mutex); 278 + jump_label_unlock(); 296 279 return ret; 297 280 } 298 281 early_initcall(init_jump_label); ··· 383 366 } 384 367 } 385 368 369 + static void remove_jump_label_module_init(struct module *mod) 370 + { 371 + struct hlist_head *head; 372 + struct hlist_node *node, *node_next, *module_node, *module_node_next; 373 + struct jump_label_entry *e; 374 + struct jump_label_module_entry *e_module; 375 + struct jump_entry *iter; 376 + int i, count; 377 + 378 + /* if the module doesn't have jump label entries, just return */ 379 + if (!mod->num_jump_entries) 380 + return; 381 + 382 + for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { 383 + head = &jump_label_table[i]; 384 + hlist_for_each_entry_safe(e, node, node_next, head, hlist) { 385 + hlist_for_each_entry_safe(e_module, module_node, 386 + module_node_next, 387 + &(e->modules), hlist) { 388 + if (e_module->mod != mod) 389 + continue; 390 + count = e_module->nr_entries; 391 + iter = e_module->table; 392 + while (count--) { 393 + if (within_module_init(iter->code, mod)) 394 + iter->key = 0; 395 + iter++; 396 + } 397 + } 398 + } 399 + } 400 + } 401 + 386 402 static int 387 403 jump_label_module_notify(struct notifier_block *self, unsigned long val, 388 404 void *data) ··· 425 375 426 376 switch (val) { 427 377 case MODULE_STATE_COMING: 428 - mutex_lock(&jump_label_mutex); 378 + jump_label_lock(); 429 379 ret = add_jump_label_module(mod); 430 380 if (ret) 431 381 remove_jump_label_module(mod); 432 - mutex_unlock(&jump_label_mutex); 382 + jump_label_unlock(); 433 383 break; 434 384 case MODULE_STATE_GOING: 435 - mutex_lock(&jump_label_mutex); 385 + jump_label_lock(); 436 386 remove_jump_label_module(mod); 437 - mutex_unlock(&jump_label_mutex); 387 + jump_label_unlock(); 388 + break; 389 + case MODULE_STATE_LIVE: 390 + jump_label_lock(); 391 + remove_jump_label_module_init(mod); 392 + jump_label_unlock(); 438 393 break; 439 394 } 440 395 return ret;
+16 -10
kernel/kprobes.c
··· 1145 1145 if (ret) 1146 1146 return ret; 1147 1147 1148 + jump_label_lock(); 1148 1149 preempt_disable(); 1149 1150 if (!kernel_text_address((unsigned long) p->addr) || 1150 1151 in_kprobes_functions((unsigned long) p->addr) || 1151 1152 ftrace_text_reserved(p->addr, p->addr) || 1152 - jump_label_text_reserved(p->addr, p->addr)) { 1153 - preempt_enable(); 1154 - return -EINVAL; 1155 - } 1153 + jump_label_text_reserved(p->addr, p->addr)) 1154 + goto fail_with_jump_label; 1156 1155 1157 1156 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ 1158 1157 p->flags &= KPROBE_FLAG_DISABLED; ··· 1165 1166 * We must hold a refcount of the probed module while updating 1166 1167 * its code to prohibit unexpected unloading. 1167 1168 */ 1168 - if (unlikely(!try_module_get(probed_mod))) { 1169 - preempt_enable(); 1170 - return -EINVAL; 1171 - } 1169 + if (unlikely(!try_module_get(probed_mod))) 1170 + goto fail_with_jump_label; 1171 + 1172 1172 /* 1173 1173 * If the module freed .init.text, we couldn't insert 1174 1174 * kprobes in there. ··· 1175 1177 if (within_module_init((unsigned long)p->addr, probed_mod) && 1176 1178 probed_mod->state != MODULE_STATE_COMING) { 1177 1179 module_put(probed_mod); 1178 - preempt_enable(); 1179 - return -EINVAL; 1180 + goto fail_with_jump_label; 1180 1181 } 1181 1182 } 1182 1183 preempt_enable(); 1184 + jump_label_unlock(); 1183 1185 1184 1186 p->nmissed = 0; 1185 1187 INIT_LIST_HEAD(&p->list); 1186 1188 mutex_lock(&kprobe_mutex); 1189 + 1190 + jump_label_lock(); /* needed to call jump_label_text_reserved() */ 1187 1191 1188 1192 get_online_cpus(); /* For avoiding text_mutex deadlock. */ 1189 1193 mutex_lock(&text_mutex); ··· 1214 1214 out: 1215 1215 mutex_unlock(&text_mutex); 1216 1216 put_online_cpus(); 1217 + jump_label_unlock(); 1217 1218 mutex_unlock(&kprobe_mutex); 1218 1219 1219 1220 if (probed_mod) 1220 1221 module_put(probed_mod); 1221 1222 1222 1223 return ret; 1224 + 1225 + fail_with_jump_label: 1226 + preempt_enable(); 1227 + jump_label_unlock(); 1228 + return -EINVAL; 1223 1229 } 1224 1230 EXPORT_SYMBOL_GPL(register_kprobe); 1225 1231