Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf, x86: Add hw_watchdog_set_attr() in a sake of nmi-watchdog on P4

Due to restriction and specifics of Netburst PMU we need a separated
event for NMI watchdog. In particular every Netburst event
consumes not just a counter and a config register, but also an
additional ESCR register.

Since ESCR registers are grouped upon counters (i.e. if ESCR is occupied
for some event there is no room for another event to enter until its
released) we need to pick up the "least" used ESCR (or the most available
one) for nmi-watchdog purposes -- so MSR_P4_CRU_ESCR2/3 was chosen.

With this patch nmi-watchdog and perf top should be able to run simultaneously.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Lin Ming <ming.m.lin@intel.com>
CC: Arnaldo Carvalho de Melo <acme@redhat.com>
CC: Frederic Weisbecker <fweisbec@gmail.com>
Tested-and-reviewed-by: Don Zickus <dzickus@redhat.com>
Tested-and-reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110623124918.GC13050@sun
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Cyrill Gorcunov and committed by
Ingo Molnar
1880c4ae 0d641208

+38 -1
+7
arch/x86/kernel/cpu/perf_event.c
··· 233 233 void (*enable_all)(int added); 234 234 void (*enable)(struct perf_event *); 235 235 void (*disable)(struct perf_event *); 236 + void (*hw_watchdog_set_attr)(struct perf_event_attr *attr); 236 237 int (*hw_config)(struct perf_event *event); 237 238 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); 238 239 unsigned eventsel; ··· 315 314 [PERF_COUNT_HW_CACHE_MAX] 316 315 [PERF_COUNT_HW_CACHE_OP_MAX] 317 316 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 317 + 318 + void hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr) 319 + { 320 + if (x86_pmu.hw_watchdog_set_attr) 321 + x86_pmu.hw_watchdog_set_attr(wd_attr); 322 + } 318 323 319 324 /* 320 325 * Propagate event elapsed time into the generic event.
+26
arch/x86/kernel/cpu/perf_event_p4.c
··· 705 705 return 0; 706 706 } 707 707 708 + static void p4_hw_watchdog_set_attr(struct perf_event_attr *wd_attr) 709 + { 710 + /* 711 + * Watchdog ticks are special on Netburst, we use 712 + * that named "non-sleeping" ticks as recommended 713 + * by Intel SDM Vol3b. 714 + */ 715 + WARN_ON_ONCE(wd_attr->type != PERF_TYPE_HARDWARE || 716 + wd_attr->config != PERF_COUNT_HW_CPU_CYCLES); 717 + 718 + wd_attr->type = PERF_TYPE_RAW; 719 + wd_attr->config = 720 + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) | 721 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | 722 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | 723 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | 724 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | 725 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | 726 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | 727 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | 728 + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3)) | 729 + p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT | 730 + P4_CCCR_COMPARE); 731 + } 732 + 708 733 static int p4_hw_config(struct perf_event *event) 709 734 { 710 735 int cpu = get_cpu(); ··· 1204 1179 .cntval_bits = ARCH_P4_CNTRVAL_BITS, 1205 1180 .cntval_mask = ARCH_P4_CNTRVAL_MASK, 1206 1181 .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, 1182 + .hw_watchdog_set_attr = p4_hw_watchdog_set_attr, 1207 1183 .hw_config = p4_hw_config, 1208 1184 .schedule_events = p4_pmu_schedule_events, 1209 1185 /*
+5 -1
kernel/watchdog.c
··· 200 200 } 201 201 202 202 #ifdef CONFIG_HARDLOCKUP_DETECTOR 203 + void __weak hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr) { } 204 + 203 205 static struct perf_event_attr wd_hw_attr = { 204 206 .type = PERF_TYPE_HARDWARE, 205 207 .config = PERF_COUNT_HW_CPU_CYCLES, ··· 370 368 if (event != NULL) 371 369 goto out_enable; 372 370 373 - /* Try to register using hardware perf events */ 374 371 wd_attr = &wd_hw_attr; 375 372 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); 373 + hw_nmi_watchdog_set_attr(wd_attr); 374 + 375 + /* Try to register using hardware perf events */ 376 376 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); 377 377 if (!IS_ERR(event)) { 378 378 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");