commit f9ee7f60d6f37ae0184812b4c59b3869f875768b · tjh.dev/kernel

tjh.dev / kernel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Merge branches 'core-fixes-for-linus', 'x86-fixes-for-linus', 'timers-fixes-for-linus' and 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
rcu: avoid pointless blocked-task warnings
rcu: demote SRCU_SYNCHRONIZE_DELAY from kernel-parameter status
rtmutex: Fix comment about why new_owner can be NULL in wake_futex_pi()

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86, olpc: Add missing Kconfig dependencies
x86, mrst: Set correct APB timer IRQ affinity for secondary cpu
x86: tsc: Fix calibration refinement conditionals to avoid divide by zero
x86, ia64, acpi: Clean up x86-ism in drivers/acpi/numa.c

* 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
timekeeping: Make local variables static
time: Rename misnamed minsec argument of clocks_calc_mult_shift()

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
tracing: Remove syscall_exit_fields
tracing: Only process module tracepoints once
perf record: Add "nodelay" mode, disabled by default
perf sched: Fix list of events, dropping unsupported ':r' modifier
Revert "perf tools: Emit clearer message for sys_perf_event_open ENOENT return"
perf top: Fix annotate segv
perf evsel: Fix order of event list deletion

Linus Torvalds 15 years ago f9ee7f60 16c10203

+84 -79

18 changed files

expand all

unified split

arch

x86

Kconfig

kernel

apb_timer.c

tsc.c

drivers

acpi

numa.c

include

trace

events

module.h

init

Kconfig

kernel

futex.c

rcutiny.c

srcu.c

time

clocksource.c

timekeeping.c

trace

trace_syscalls.c

tools

perf

Documentation

perf-record.txt

builtin-record.c

builtin-sched.c

builtin-stat.c

builtin-top.c

perf.c

arch/x86/Kconfig

··· 2068 2068 bool "One Laptop Per Child support" 2069 2069 select GPIOLIB 2070 2070 select OLPC_OPENFIRMWARE 2071 + depends on !X86_64 && !X86_PAE 2071 2072 ---help--- 2072 2073 Add support for detecting the unique features of the OLPC 2073 2074 XO hardware.

+8 -6

arch/x86/kernel/apb_timer.c

··· 313 313 if (adev->irq == 0) 314 314 return; 315 315 316 + irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); 317 + irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); 318 + /* APB timer irqs are set up as mp_irqs, timer is edge type */ 319 + __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); 320 + 316 321 if (system_state == SYSTEM_BOOTING) { 317 - irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); 318 - irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); 319 - /* APB timer irqs are set up as mp_irqs, timer is edge type */ 320 - __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); 321 322 if (request_irq(adev->irq, apbt_interrupt_handler, 322 - IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, 323 - adev->name, adev)) { 323 + IRQF_TIMER | IRQF_DISABLED | 324 + IRQF_NOBALANCING, 325 + adev->name, adev)) { 324 326 printk(KERN_ERR "Failed request IRQ for APBT%d\n", 325 327 adev->num); 326 328 }

+2 -2

arch/x86/kernel/tsc.c

··· 464 464 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); 465 465 466 466 /* hpet or pmtimer available ? */ 467 - if (!hpet && !ref1 && !ref2) 467 + if (ref1 == ref2) 468 468 continue; 469 469 470 470 /* Check, whether the sampling was disturbed by an SMI */ ··· 935 935 tsc_stop = tsc_read_refs(&ref_stop, hpet); 936 936 937 937 /* hpet or pmtimer available ? */ 938 - if (!hpet && !ref_start && !ref_stop) 938 + if (ref_start == ref_stop) 939 939 goto out; 940 940 941 941 /* Check, whether the sampling was disturbed by an SMI */

+2 -6

drivers/acpi/numa.c

··· 275 275 int __init acpi_numa_init(void) 276 276 { 277 277 int ret = 0; 278 - int nr_cpu_entries = nr_cpu_ids; 279 278 280 - #ifdef CONFIG_X86 281 279 /* 282 280 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= 283 281 * SRAT cpu entries could have different order with that in MADT. 284 282 * So go over all cpu entries in SRAT to get apicid to node mapping. 285 283 */ 286 - nr_cpu_entries = MAX_LOCAL_APIC; 287 - #endif 288 284 289 285 /* SRAT: Static Resource Affinity Table */ 290 286 if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { 291 287 acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, 292 - acpi_parse_x2apic_affinity, nr_cpu_entries); 288 + acpi_parse_x2apic_affinity, 0); 293 289 acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, 294 - acpi_parse_processor_affinity, nr_cpu_entries); 290 + acpi_parse_processor_affinity, 0); 295 291 ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, 296 292 acpi_parse_memory_affinity, 297 293 NR_NODE_MEMBLKS);

+10

include/trace/events/module.h

··· 1 + /* 2 + * Because linux/module.h has tracepoints in the header, and ftrace.h 3 + * eventually includes this file, define_trace.h includes linux/module.h 4 + * But we do not want the module.h to override the TRACE_SYSTEM macro 5 + * variable that define_trace.h is processing, so we only set it 6 + * when module events are being processed, which would happen when 7 + * CREATE_TRACE_POINTS is defined. 8 + */ 9 + #ifdef CREATE_TRACE_POINTS 1 10 #undef TRACE_SYSTEM 2 11 #define TRACE_SYSTEM module 12 + #endif 3 13 4 14 #if !defined(_TRACE_MODULE_H) || defined(TRACE_HEADER_MULTI_READ) 5 15 #define _TRACE_MODULE_H

-15

init/Kconfig

··· 515 515 516 516 Accept the default if unsure. 517 517 518 - config SRCU_SYNCHRONIZE_DELAY 519 - int "Microseconds to delay before waiting for readers" 520 - range 0 20 521 - default 10 522 - help 523 - This option controls how long SRCU delays before entering its 524 - loop waiting on SRCU readers. The purpose of this loop is 525 - to avoid the unconditional context-switch penalty that would 526 - otherwise be incurred if there was an active SRCU reader, 527 - in a manner similar to adaptive locking schemes. This should 528 - be set to be a bit longer than the common-case SRCU read-side 529 - critical-section overhead. 530 - 531 - Accept the default if unsure. 532 - 533 518 endmenu # "RCU Subsystem" 534 519 535 520 config IKCONFIG

+3 -4

kernel/futex.c

··· 826 826 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 827 827 828 828 /* 829 - * This happens when we have stolen the lock and the original 830 - * pending owner did not enqueue itself back on the rt_mutex. 831 - * Thats not a tragedy. We know that way, that a lock waiter 832 - * is on the fly. We make the futex_q waiter the pending owner. 829 + * It is possible that the next waiter (the one that brought 830 + * this owner to the kernel) timed out and is no longer 831 + * waiting on the lock. 833 832 */ 834 833 if (!new_owner) 835 834 new_owner = this->task;

+2 -1

kernel/rcutiny.c

··· 189 189 unsigned long flags; 190 190 191 191 for (;;) { 192 - wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); 192 + wait_event_interruptible(rcu_kthread_wq, 193 + have_rcu_kthread_work != 0); 193 194 morework = rcu_boost(); 194 195 local_irq_save(flags); 195 196 work = have_rcu_kthread_work;

+13 -2

kernel/srcu.c

··· 156 156 EXPORT_SYMBOL_GPL(__srcu_read_unlock); 157 157 158 158 /* 159 + * We use an adaptive strategy for synchronize_srcu() and especially for 160 + * synchronize_srcu_expedited(). We spin for a fixed time period 161 + * (defined below) to allow SRCU readers to exit their read-side critical 162 + * sections. If there are still some readers after 10 microseconds, 163 + * we repeatedly block for 1-millisecond time periods. This approach 164 + * has done well in testing, so there is no need for a config parameter. 165 + */ 166 + #define SYNCHRONIZE_SRCU_READER_DELAY 10 167 + 168 + /* 159 169 * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). 160 170 */ 161 171 static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) ··· 217 207 * will have finished executing. We initially give readers 218 208 * an arbitrarily chosen 10 microseconds to get out of their 219 209 * SRCU read-side critical sections, then loop waiting 1/HZ 220 - * seconds per iteration. 210 + * seconds per iteration. The 10-microsecond value has done 211 + * very well in testing. 221 212 */ 222 213 223 214 if (srcu_readers_active_idx(sp, idx)) 224 - udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY); 215 + udelay(SYNCHRONIZE_SRCU_READER_DELAY); 225 216 while (srcu_readers_active_idx(sp, idx)) 226 217 schedule_timeout_interruptible(1); 227 218

+4 -4

kernel/time/clocksource.c

··· 113 113 * @shift: pointer to shift variable 114 114 * @from: frequency to convert from 115 115 * @to: frequency to convert to 116 - * @minsec: guaranteed runtime conversion range in seconds 116 + * @maxsec: guaranteed runtime conversion range in seconds 117 117 * 118 118 * The function evaluates the shift/mult pair for the scaled math 119 119 * operations of clocksources and clockevents. ··· 122 122 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock 123 123 * event @to is the counter frequency and @from is NSEC_PER_SEC. 124 124 * 125 - * The @minsec conversion range argument controls the time frame in 125 + * The @maxsec conversion range argument controls the time frame in 126 126 * seconds which must be covered by the runtime conversion with the 127 127 * calculated mult and shift factors. This guarantees that no 64bit 128 128 * overflow happens when the input value of the conversion is ··· 131 131 * factors. 132 132 */ 133 133 void 134 - clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) 134 + clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) 135 135 { 136 136 u64 tmp; 137 137 u32 sft, sftacc= 32; ··· 140 140 * Calculate the shift factor which is limiting the conversion 141 141 * range: 142 142 */ 143 - tmp = ((u64)minsec * from) >> 32; 143 + tmp = ((u64)maxsec * from) >> 32; 144 144 while (tmp) { 145 145 tmp >>=1; 146 146 sftacc--;

+2 -2

kernel/time/timekeeping.c

··· 49 49 u32 mult; 50 50 }; 51 51 52 - struct timekeeper timekeeper; 52 + static struct timekeeper timekeeper; 53 53 54 54 /** 55 55 * timekeeper_setup_internals - Set up internals to use clocksource clock. ··· 164 164 /* 165 165 * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. 166 166 */ 167 - struct timespec raw_time; 167 + static struct timespec raw_time; 168 168 169 169 /* flag for if timekeeping is suspended */ 170 170 int __read_mostly timekeeping_suspended;

+12 -21

kernel/trace/trace_syscalls.c

··· 23 23 static int syscall_enter_define_fields(struct ftrace_event_call *call); 24 24 static int syscall_exit_define_fields(struct ftrace_event_call *call); 25 25 26 - /* All syscall exit events have the same fields */ 27 - static LIST_HEAD(syscall_exit_fields); 28 - 29 26 static struct list_head * 30 27 syscall_get_enter_fields(struct ftrace_event_call *call) 31 28 { ··· 31 34 return &entry->enter_fields; 32 35 } 33 36 34 - static struct list_head * 35 - syscall_get_exit_fields(struct ftrace_event_call *call) 36 - { 37 - return &syscall_exit_fields; 38 - } 39 - 40 37 struct trace_event_functions enter_syscall_print_funcs = { 41 - .trace = print_syscall_enter, 38 + .trace = print_syscall_enter, 42 39 }; 43 40 44 41 struct trace_event_functions exit_syscall_print_funcs = { 45 - .trace = print_syscall_exit, 42 + .trace = print_syscall_exit, 46 43 }; 47 44 48 45 struct ftrace_event_class event_class_syscall_enter = { 49 - .system = "syscalls", 50 - .reg = syscall_enter_register, 51 - .define_fields = syscall_enter_define_fields, 52 - .get_fields = syscall_get_enter_fields, 53 - .raw_init = init_syscall_trace, 46 + .system = "syscalls", 47 + .reg = syscall_enter_register, 48 + .define_fields = syscall_enter_define_fields, 49 + .get_fields = syscall_get_enter_fields, 50 + .raw_init = init_syscall_trace, 54 51 }; 55 52 56 53 struct ftrace_event_class event_class_syscall_exit = { 57 - .system = "syscalls", 58 - .reg = syscall_exit_register, 59 - .define_fields = syscall_exit_define_fields, 60 - .get_fields = syscall_get_exit_fields, 61 - .raw_init = init_syscall_trace, 54 + .system = "syscalls", 55 + .reg = syscall_exit_register, 56 + .define_fields = syscall_exit_define_fields, 57 + .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), 58 + .raw_init = init_syscall_trace, 62 59 }; 63 60 64 61 extern unsigned long __start_syscalls_metadata[];

tools/perf/Documentation/perf-record.txt

··· 61 61 -r:: 62 62 --realtime=:: 63 63 Collect data with this RT SCHED_FIFO priority. 64 + -D:: 65 + --no-delay:: 66 + Collect data without buffering. 64 67 -A:: 65 68 --append:: 66 69 Append to the output file to do incremental profiling.

+9 -3

tools/perf/builtin-record.c

··· 49 49 static const char *output_name = "perf.data"; 50 50 static int group = 0; 51 51 static int realtime_prio = 0; 52 + static bool nodelay = false; 52 53 static bool raw_samples = false; 53 54 static bool sample_id_all_avail = true; 54 55 static bool system_wide = false; ··· 308 307 attr->sample_type |= PERF_SAMPLE_CPU; 309 308 } 310 309 310 + if (nodelay) { 311 + attr->watermark = 0; 312 + attr->wakeup_events = 1; 313 + } 314 + 311 315 attr->mmap = track; 312 316 attr->comm = track; 313 317 attr->inherit = !no_inherit; ··· 337 331 else if (err == ENODEV && cpu_list) { 338 332 die("No such device - did you specify" 339 333 " an out-of-range profile CPU?\n"); 340 - } else if (err == ENOENT) { 341 - die("%s event is not supported. ", 342 - event_name(evsel)); 343 334 } else if (err == EINVAL && sample_id_all_avail) { 344 335 /* 345 336 * Old kernel, no attr->sample_id_type_all field ··· 483 480 process_buildids(); 484 481 perf_header__write(&session->header, output, true); 485 482 perf_session__delete(session); 483 + perf_evsel_list__delete(); 486 484 symbol__exit(); 487 485 } 488 486 } ··· 849 845 "record events on existing thread id"), 850 846 OPT_INTEGER('r', "realtime", &realtime_prio, 851 847 "collect data with this RT SCHED_FIFO priority"), 848 + OPT_BOOLEAN('D', "no-delay", &nodelay, 849 + "collect data without buffering"), 852 850 OPT_BOOLEAN('R', "raw-samples", &raw_samples, 853 851 "collect raw sample records from all opened counters"), 854 852 OPT_BOOLEAN('a', "all-cpus", &system_wide,

+9 -9

tools/perf/builtin-sched.c

··· 1843 1843 "-f", 1844 1844 "-m", "1024", 1845 1845 "-c", "1", 1846 - "-e", "sched:sched_switch:r", 1847 - "-e", "sched:sched_stat_wait:r", 1848 - "-e", "sched:sched_stat_sleep:r", 1849 - "-e", "sched:sched_stat_iowait:r", 1850 - "-e", "sched:sched_stat_runtime:r", 1851 - "-e", "sched:sched_process_exit:r", 1852 - "-e", "sched:sched_process_fork:r", 1853 - "-e", "sched:sched_wakeup:r", 1854 - "-e", "sched:sched_migrate_task:r", 1846 + "-e", "sched:sched_switch", 1847 + "-e", "sched:sched_stat_wait", 1848 + "-e", "sched:sched_stat_sleep", 1849 + "-e", "sched:sched_stat_iowait", 1850 + "-e", "sched:sched_stat_runtime", 1851 + "-e", "sched:sched_process_exit", 1852 + "-e", "sched:sched_process_fork", 1853 + "-e", "sched:sched_wakeup", 1854 + "-e", "sched:sched_migrate_task", 1855 1855 }; 1856 1856 1857 1857 static int __cmd_record(int argc, const char **argv)

tools/perf/builtin-stat.c

··· 743 743 out_free_fd: 744 744 list_for_each_entry(pos, &evsel_list, node) 745 745 perf_evsel__free_stat_priv(pos); 746 + perf_evsel_list__delete(); 746 747 out: 747 748 thread_map__delete(threads); 748 749 threads = NULL;

+3 -2

tools/perf/builtin-top.c

··· 1247 1247 die("Permission error - are you root?\n" 1248 1248 "\t Consider tweaking" 1249 1249 " /proc/sys/kernel/perf_event_paranoid.\n"); 1250 - if (err == ENOENT) 1251 - die("%s event is not supported. ", event_name(evsel)); 1252 1250 /* 1253 1251 * If it's cycles then fall back to hrtimer 1254 1252 * based cpu-clock-tick sw counter, which ··· 1471 1473 pos->attr.sample_period = default_interval; 1472 1474 } 1473 1475 1476 + sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node); 1477 + 1474 1478 symbol_conf.priv_size = (sizeof(struct sym_entry) + 1475 1479 (nr_counters + 1) * sizeof(unsigned long)); 1476 1480 ··· 1490 1490 out_free_fd: 1491 1491 list_for_each_entry(pos, &evsel_list, node) 1492 1492 perf_evsel__free_mmap(pos); 1493 + perf_evsel_list__delete(); 1493 1494 1494 1495 return status; 1495 1496 }

-2

tools/perf/perf.c

··· 286 286 status = p->fn(argc, argv, prefix); 287 287 exit_browser(status); 288 288 289 - perf_evsel_list__delete(); 290 - 291 289 if (status) 292 290 return status & 0xff; 293 291