Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
perf tools: Fix sample size bit operations
perf tools: Fix ommitted mmap data update on remap
watchdog: Change the default timeout and configure nmi watchdog period based on watchdog_thresh
watchdog: Disable watchdog when thresh is zero
watchdog: Only disable/enable watchdog if neccessary
watchdog: Fix rounding bug in get_sample_period()
perf tools: Propagate event parse error handling
perf tools: Robustify dynamic sample content fetch
perf tools: Pre-check sample size before parsing
perf tools: Move evlist sample helpers to evlist area
perf tools: Remove junk code in mmap size handling
perf tools: Check we are able to read the event size on mmap

+216 -100
+2 -2
arch/x86/kernel/apic/hw_nmi.c
··· 19 19 #include <linux/delay.h> 20 20 21 21 #ifdef CONFIG_HARDLOCKUP_DETECTOR 22 - u64 hw_nmi_get_sample_period(void) 22 + u64 hw_nmi_get_sample_period(int watchdog_thresh) 23 23 { 24 - return (u64)(cpu_khz) * 1000 * 60; 24 + return (u64)(cpu_khz) * 1000 * watchdog_thresh; 25 25 } 26 26 #endif 27 27
+4 -3
include/linux/nmi.h
··· 45 45 46 46 #ifdef CONFIG_LOCKUP_DETECTOR 47 47 int hw_nmi_is_cpu_stuck(struct pt_regs *); 48 - u64 hw_nmi_get_sample_period(void); 48 + u64 hw_nmi_get_sample_period(int watchdog_thresh); 49 49 extern int watchdog_enabled; 50 + extern int watchdog_thresh; 50 51 struct ctl_table; 51 - extern int proc_dowatchdog_enabled(struct ctl_table *, int , 52 - void __user *, size_t *, loff_t *); 52 + extern int proc_dowatchdog(struct ctl_table *, int , 53 + void __user *, size_t *, loff_t *); 53 54 #endif 54 55 55 56 #endif
-1
include/linux/sched.h
··· 315 315 void __user *buffer, 316 316 size_t *lenp, loff_t *ppos); 317 317 extern unsigned int softlockup_panic; 318 - extern int softlockup_thresh; 319 318 void lockup_detector_init(void); 320 319 #else 321 320 static inline void touch_softlockup_watchdog(void)
+8 -4
kernel/sysctl.c
··· 730 730 .data = &watchdog_enabled, 731 731 .maxlen = sizeof (int), 732 732 .mode = 0644, 733 - .proc_handler = proc_dowatchdog_enabled, 733 + .proc_handler = proc_dowatchdog, 734 + .extra1 = &zero, 735 + .extra2 = &one, 734 736 }, 735 737 { 736 738 .procname = "watchdog_thresh", 737 - .data = &softlockup_thresh, 739 + .data = &watchdog_thresh, 738 740 .maxlen = sizeof(int), 739 741 .mode = 0644, 740 - .proc_handler = proc_dowatchdog_thresh, 742 + .proc_handler = proc_dowatchdog, 741 743 .extra1 = &neg_one, 742 744 .extra2 = &sixty, 743 745 }, ··· 757 755 .data = &watchdog_enabled, 758 756 .maxlen = sizeof (int), 759 757 .mode = 0644, 760 - .proc_handler = proc_dowatchdog_enabled, 758 + .proc_handler = proc_dowatchdog, 759 + .extra1 = &zero, 760 + .extra2 = &one, 761 761 }, 762 762 #endif 763 763 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+30 -22
kernel/watchdog.c
··· 28 28 #include <linux/perf_event.h> 29 29 30 30 int watchdog_enabled = 1; 31 - int __read_mostly softlockup_thresh = 60; 31 + int __read_mostly watchdog_thresh = 10; 32 32 33 33 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 34 34 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); ··· 91 91 __setup("nosoftlockup", nosoftlockup_setup); 92 92 /* */ 93 93 94 + /* 95 + * Hard-lockup warnings should be triggered after just a few seconds. Soft- 96 + * lockups can have false positives under extreme conditions. So we generally 97 + * want a higher threshold for soft lockups than for hard lockups. So we couple 98 + * the thresholds with a factor: we make the soft threshold twice the amount of 99 + * time the hard threshold is. 100 + */ 101 + static int get_softlockup_thresh() 102 + { 103 + return watchdog_thresh * 2; 104 + } 94 105 95 106 /* 96 107 * Returns seconds, approximately. We don't need nanosecond ··· 116 105 static unsigned long get_sample_period(void) 117 106 { 118 107 /* 119 - * convert softlockup_thresh from seconds to ns 108 + * convert watchdog_thresh from seconds to ns 120 109 * the divide by 5 is to give hrtimer 5 chances to 121 110 * increment before the hardlockup detector generates 122 111 * a warning 123 112 */ 124 - return softlockup_thresh / 5 * NSEC_PER_SEC; 113 + return get_softlockup_thresh() * (NSEC_PER_SEC / 5); 125 114 } 126 115 127 116 /* Commands for resetting the watchdog */ ··· 193 182 unsigned long now = get_timestamp(smp_processor_id()); 194 183 195 184 /* Warn about unreasonable delays: */ 196 - if (time_after(now, touch_ts + softlockup_thresh)) 185 + if (time_after(now, touch_ts + get_softlockup_thresh())) 197 186 return now - touch_ts; 198 187 199 188 return 0; ··· 370 359 371 360 /* Try to register using hardware perf events */ 372 361 wd_attr = &wd_hw_attr; 373 - wd_attr->sample_period = hw_nmi_get_sample_period(); 362 + wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); 374 363 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); 375 364 if (!IS_ERR(event)) { 376 365 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); ··· 512 501 /* sysctl functions */ 513 502 #ifdef CONFIG_SYSCTL 514 503 /* 515 - * proc handler for /proc/sys/kernel/nmi_watchdog 504 + * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh 516 505 */ 517 506 518 - int proc_dowatchdog_enabled(struct ctl_table *table, int write, 519 - void __user *buffer, size_t *length, loff_t *ppos) 507 + int proc_dowatchdog(struct ctl_table *table, int write, 508 + void __user *buffer, size_t *lenp, loff_t *ppos) 520 509 { 521 - proc_dointvec(table, write, buffer, length, ppos); 510 + int ret; 522 511 523 - if (write) { 524 - if (watchdog_enabled) 525 - watchdog_enable_all_cpus(); 526 - else 527 - watchdog_disable_all_cpus(); 528 - } 529 - return 0; 530 - } 512 + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 513 + if (ret || !write) 514 + goto out; 531 515 532 - int proc_dowatchdog_thresh(struct ctl_table *table, int write, 533 - void __user *buffer, 534 - size_t *lenp, loff_t *ppos) 535 - { 536 - return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 516 + if (watchdog_enabled && watchdog_thresh) 517 + watchdog_enable_all_cpus(); 518 + else 519 + watchdog_disable_all_cpus(); 520 + 521 + out: 522 + return ret; 537 523 } 538 524 #endif /* CONFIG_SYSCTL */ 539 525
+8 -1
tools/perf/builtin-test.c
··· 474 474 unsigned int nr_events[nsyscalls], 475 475 expected_nr_events[nsyscalls], i, j; 476 476 struct perf_evsel *evsels[nsyscalls], *evsel; 477 + int sample_size = perf_sample_size(attr.sample_type); 477 478 478 479 for (i = 0; i < nsyscalls; ++i) { 479 480 char name[64]; ··· 559 558 goto out_munmap; 560 559 } 561 560 562 - perf_event__parse_sample(event, attr.sample_type, false, &sample); 561 + err = perf_event__parse_sample(event, attr.sample_type, sample_size, 562 + false, &sample); 563 + if (err) { 564 + pr_err("Can't parse sample, err = %d\n", err); 565 + goto out_munmap; 566 + } 567 + 563 568 evsel = perf_evlist__id2evsel(evlist, sample.id); 564 569 if (evsel == NULL) { 565 570 pr_debug("event with id %" PRIu64
+6 -1
tools/perf/builtin-top.c
··· 805 805 { 806 806 struct perf_sample sample; 807 807 union perf_event *event; 808 + int ret; 808 809 809 810 while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) { 810 - perf_session__parse_sample(self, event, &sample); 811 + ret = perf_session__parse_sample(self, event, &sample); 812 + if (ret) { 813 + pr_err("Can't parse sample, err = %d\n", ret); 814 + continue; 815 + } 811 816 812 817 if (event->header.type == PERF_RECORD_SAMPLE) 813 818 perf_event__process_sample(event, &sample, self);
+31 -15
tools/perf/util/event.c
··· 9 9 #include "thread_map.h" 10 10 11 11 static const char *perf_event__names[] = { 12 - [0] = "TOTAL", 13 - [PERF_RECORD_MMAP] = "MMAP", 14 - [PERF_RECORD_LOST] = "LOST", 15 - [PERF_RECORD_COMM] = "COMM", 16 - [PERF_RECORD_EXIT] = "EXIT", 17 - [PERF_RECORD_THROTTLE] = "THROTTLE", 18 - [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE", 19 - [PERF_RECORD_FORK] = "FORK", 20 - [PERF_RECORD_READ] = "READ", 21 - [PERF_RECORD_SAMPLE] = "SAMPLE", 22 - [PERF_RECORD_HEADER_ATTR] = "ATTR", 23 - [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 24 - [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", 25 - [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", 26 - [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", 12 + [0] = "TOTAL", 13 + [PERF_RECORD_MMAP] = "MMAP", 14 + [PERF_RECORD_LOST] = "LOST", 15 + [PERF_RECORD_COMM] = "COMM", 16 + [PERF_RECORD_EXIT] = "EXIT", 17 + [PERF_RECORD_THROTTLE] = "THROTTLE", 18 + [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE", 19 + [PERF_RECORD_FORK] = "FORK", 20 + [PERF_RECORD_READ] = "READ", 21 + [PERF_RECORD_SAMPLE] = "SAMPLE", 22 + [PERF_RECORD_HEADER_ATTR] = "ATTR", 23 + [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 24 + [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", 25 + [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", 26 + [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", 27 27 }; 28 28 29 29 const char *perf_event__name(unsigned int id) ··· 33 33 if (!perf_event__names[id]) 34 34 return "UNKNOWN"; 35 35 return perf_event__names[id]; 36 + } 37 + 38 + int perf_sample_size(u64 sample_type) 39 + { 40 + u64 mask = sample_type & PERF_SAMPLE_MASK; 41 + int size = 0; 42 + int i; 43 + 44 + for (i = 0; i < 64; i++) { 45 + if (mask & (1UL << i)) 46 + size++; 47 + } 48 + 49 + size *= sizeof(u64); 50 + 51 + return size; 36 52 } 37 53 38 54 static struct perf_sample synth_sample = {
+11 -1
tools/perf/util/event.h
··· 56 56 u64 id; 57 57 }; 58 58 59 + 60 + #define PERF_SAMPLE_MASK \ 61 + (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \ 62 + PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \ 63 + PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID | \ 64 + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) 65 + 59 66 struct sample_event { 60 67 struct perf_event_header header; 61 68 u64 array[]; ··· 81 74 void *raw_data; 82 75 struct ip_callchain *callchain; 83 76 }; 77 + 78 + int perf_sample_size(u64 sample_type); 84 79 85 80 #define BUILD_ID_SIZE 20 86 81 ··· 187 178 const char *perf_event__name(unsigned int id); 188 179 189 180 int perf_event__parse_sample(const union perf_event *event, u64 type, 190 - bool sample_id_all, struct perf_sample *sample); 181 + int sample_size, bool sample_id_all, 182 + struct perf_sample *sample); 191 183 192 184 #endif /* __PERF_RECORD_H */
+31
tools/perf/util/evlist.c
··· 459 459 460 460 return 0; 461 461 } 462 + 463 + u64 perf_evlist__sample_type(struct perf_evlist *evlist) 464 + { 465 + struct perf_evsel *pos; 466 + u64 type = 0; 467 + 468 + list_for_each_entry(pos, &evlist->entries, node) { 469 + if (!type) 470 + type = pos->attr.sample_type; 471 + else if (type != pos->attr.sample_type) 472 + die("non matching sample_type"); 473 + } 474 + 475 + return type; 476 + } 477 + 478 + bool perf_evlist__sample_id_all(const struct perf_evlist *evlist) 479 + { 480 + bool value = false, first = true; 481 + struct perf_evsel *pos; 482 + 483 + list_for_each_entry(pos, &evlist->entries, node) { 484 + if (first) { 485 + value = pos->attr.sample_id_all; 486 + first = false; 487 + } else if (value != pos->attr.sample_id_all) 488 + die("non matching sample_id_all"); 489 + } 490 + 491 + return value; 492 + }
+3
tools/perf/util/evlist.h
··· 66 66 void perf_evlist__delete_maps(struct perf_evlist *evlist); 67 67 int perf_evlist__set_filters(struct perf_evlist *evlist); 68 68 69 + u64 perf_evlist__sample_type(struct perf_evlist *evlist); 70 + bool perf_evlist__sample_id_all(const struct perf_evlist *evlist); 71 + 69 72 #endif /* __PERF_EVLIST_H */
+31 -1
tools/perf/util/evsel.c
··· 303 303 return 0; 304 304 } 305 305 306 + static bool sample_overlap(const union perf_event *event, 307 + const void *offset, u64 size) 308 + { 309 + const void *base = event; 310 + 311 + if (offset + size > base + event->header.size) 312 + return true; 313 + 314 + return false; 315 + } 316 + 306 317 int perf_event__parse_sample(const union perf_event *event, u64 type, 307 - bool sample_id_all, struct perf_sample *data) 318 + int sample_size, bool sample_id_all, 319 + struct perf_sample *data) 308 320 { 309 321 const u64 *array; 310 322 ··· 330 318 } 331 319 332 320 array = event->sample.array; 321 + 322 + if (sample_size + sizeof(event->header) > event->header.size) 323 + return -EFAULT; 333 324 334 325 if (type & PERF_SAMPLE_IP) { 335 326 data->ip = event->ip.ip; ··· 384 369 } 385 370 386 371 if (type & PERF_SAMPLE_CALLCHAIN) { 372 + if (sample_overlap(event, array, sizeof(data->callchain->nr))) 373 + return -EFAULT; 374 + 387 375 data->callchain = (struct ip_callchain *)array; 376 + 377 + if (sample_overlap(event, array, data->callchain->nr)) 378 + return -EFAULT; 379 + 388 380 array += 1 + data->callchain->nr; 389 381 } 390 382 391 383 if (type & PERF_SAMPLE_RAW) { 392 384 u32 *p = (u32 *)array; 385 + 386 + if (sample_overlap(event, array, sizeof(u32))) 387 + return -EFAULT; 388 + 393 389 data->raw_size = *p; 394 390 p++; 391 + 392 + if (sample_overlap(event, p, data->raw_size)) 393 + return -EFAULT; 394 + 395 395 data->raw_data = p; 396 396 } 397 397
-31
tools/perf/util/header.c
··· 934 934 return -ENOMEM; 935 935 } 936 936 937 - u64 perf_evlist__sample_type(struct perf_evlist *evlist) 938 - { 939 - struct perf_evsel *pos; 940 - u64 type = 0; 941 - 942 - list_for_each_entry(pos, &evlist->entries, node) { 943 - if (!type) 944 - type = pos->attr.sample_type; 945 - else if (type != pos->attr.sample_type) 946 - die("non matching sample_type"); 947 - } 948 - 949 - return type; 950 - } 951 - 952 - bool perf_evlist__sample_id_all(const struct perf_evlist *evlist) 953 - { 954 - bool value = false, first = true; 955 - struct perf_evsel *pos; 956 - 957 - list_for_each_entry(pos, &evlist->entries, node) { 958 - if (first) { 959 - value = pos->attr.sample_id_all; 960 - first = false; 961 - } else if (value != pos->attr.sample_id_all) 962 - die("non matching sample_id_all"); 963 - } 964 - 965 - return value; 966 - } 967 - 968 937 int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, 969 938 perf_event__handler_t process, 970 939 struct perf_session *session)
-2
tools/perf/util/header.h
··· 64 64 int perf_header__push_event(u64 id, const char *name); 65 65 char *perf_header__find_event(u64 id); 66 66 67 - u64 perf_evlist__sample_type(struct perf_evlist *evlist); 68 - bool perf_evlist__sample_id_all(const struct perf_evlist *evlist); 69 67 void perf_header__set_feat(struct perf_header *header, int feat); 70 68 void perf_header__clear_feat(struct perf_header *header, int feat); 71 69 bool perf_header__has_feat(const struct perf_header *header, int feat);
+2
tools/perf/util/include/linux/list.h
··· 1 1 #include <linux/kernel.h> 2 + #include <linux/prefetch.h> 3 + 2 4 #include "../../../../include/linux/list.h" 3 5 4 6 #ifndef PERF_LIST_H
+10 -3
tools/perf/util/python.c
··· 675 675 union perf_event *event; 676 676 int sample_id_all = 1, cpu; 677 677 static char *kwlist[] = {"sample_id_all", NULL, NULL}; 678 + int err; 678 679 679 680 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, 680 681 &cpu, &sample_id_all)) ··· 691 690 return PyErr_NoMemory(); 692 691 693 692 first = list_entry(evlist->entries.next, struct perf_evsel, node); 694 - perf_event__parse_sample(event, first->attr.sample_type, sample_id_all, 695 - &pevent->sample); 693 + err = perf_event__parse_sample(event, first->attr.sample_type, 694 + perf_sample_size(first->attr.sample_type), 695 + sample_id_all, &pevent->sample); 696 + if (err) { 697 + pr_err("Can't parse sample, err = %d\n", err); 698 + goto end; 699 + } 700 + 696 701 return pyevent; 697 702 } 698 - 703 + end: 699 704 Py_INCREF(Py_None); 700 705 return Py_None; 701 706 }
+37 -13
tools/perf/util/session.c
··· 97 97 void perf_session__update_sample_type(struct perf_session *self) 98 98 { 99 99 self->sample_type = perf_evlist__sample_type(self->evlist); 100 + self->sample_size = perf_sample_size(self->sample_type); 100 101 self->sample_id_all = perf_evlist__sample_id_all(self->evlist); 101 102 perf_session__id_header_size(self); 102 103 } ··· 480 479 struct perf_sample sample; 481 480 u64 limit = os->next_flush; 482 481 u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; 482 + int ret; 483 483 484 484 if (!ops->ordered_samples || !limit) 485 485 return; ··· 489 487 if (iter->timestamp > limit) 490 488 break; 491 489 492 - perf_session__parse_sample(s, iter->event, &sample); 493 - perf_session_deliver_event(s, iter->event, &sample, ops, 494 - iter->file_offset); 490 + ret = perf_session__parse_sample(s, iter->event, &sample); 491 + if (ret) 492 + pr_err("Can't parse sample, err = %d\n", ret); 493 + else 494 + perf_session_deliver_event(s, iter->event, &sample, ops, 495 + iter->file_offset); 495 496 496 497 os->last_flush = iter->timestamp; 497 498 list_del(&iter->list); ··· 810 805 /* 811 806 * For all kernel events we get the sample data 812 807 */ 813 - perf_session__parse_sample(session, event, &sample); 808 + ret = perf_session__parse_sample(session, event, &sample); 809 + if (ret) 810 + return ret; 814 811 815 812 /* Preprocess sample records - precheck callchains */ 816 813 if (perf_session__preprocess_sample(session, event, &sample)) ··· 960 953 return err; 961 954 } 962 955 956 + static union perf_event * 957 + fetch_mmaped_event(struct perf_session *session, 958 + u64 head, size_t mmap_size, char *buf) 959 + { 960 + union perf_event *event; 961 + 962 + /* 963 + * Ensure we have enough space remaining to read 964 + * the size of the event in the headers. 965 + */ 966 + if (head + sizeof(event->header) > mmap_size) 967 + return NULL; 968 + 969 + event = (union perf_event *)(buf + head); 970 + 971 + if (session->header.needs_swap) 972 + perf_event_header__bswap(&event->header); 973 + 974 + if (head + event->header.size > mmap_size) 975 + return NULL; 976 + 977 + return event; 978 + } 979 + 963 980 int __perf_session__process_events(struct perf_session *session, 964 981 u64 data_offset, u64 data_size, 965 982 u64 file_size, struct perf_event_ops *ops) ··· 1038 1007 file_pos = file_offset + head; 1039 1008 1040 1009 more: 1041 - event = (union perf_event *)(buf + head); 1042 - 1043 - if (session->header.needs_swap) 1044 - perf_event_header__bswap(&event->header); 1045 - size = event->header.size; 1046 - if (size == 0) 1047 - size = 8; 1048 - 1049 - if (head + event->header.size > mmap_size) { 1010 + event = fetch_mmaped_event(session, head, mmap_size, buf); 1011 + if (!event) { 1050 1012 if (mmaps[map_idx]) { 1051 1013 munmap(mmaps[map_idx], mmap_size); 1052 1014 mmaps[map_idx] = NULL;
+2
tools/perf/util/session.h
··· 43 43 */ 44 44 struct hists hists; 45 45 u64 sample_type; 46 + int sample_size; 46 47 int fd; 47 48 bool fd_pipe; 48 49 bool repipe; ··· 160 159 struct perf_sample *sample) 161 160 { 162 161 return perf_event__parse_sample(event, session->sample_type, 162 + session->sample_size, 163 163 session->sample_id_all, sample); 164 164 } 165 165