Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "arm64: Enable perf events based hard lockup detector"

This reverts commit 367c820ef08082e68df8a3bc12e62393af21e4b5.

lockup_detector_init() makes heavy use of per-cpu variables and must be
called with preemption disabled. Usually, it's handled early during boot
in kernel_init_freeable(), before SMP has been initialised.

Since we do not know whether or not our PMU interrupt can be signalled
as an NMI until considerably later in the boot process, the Arm PMU
driver attempts to re-initialise the lockup detector off the back of a
device_initcall(). Unfortunately, this is called from preemptible
context and results in the following splat:

| BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1
| caller is debug_smp_processor_id+0x20/0x2c
| CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.10.0+ #276
| Hardware name: linux,dummy-virt (DT)
| Call trace:
| dump_backtrace+0x0/0x3c0
| show_stack+0x20/0x6c
| dump_stack+0x2f0/0x42c
| check_preemption_disabled+0x1cc/0x1dc
| debug_smp_processor_id+0x20/0x2c
| hardlockup_detector_event_create+0x34/0x18c
| hardlockup_detector_perf_init+0x2c/0x134
| watchdog_nmi_probe+0x18/0x24
| lockup_detector_init+0x44/0xa8
| armv8_pmu_driver_init+0x54/0x78
| do_one_initcall+0x184/0x43c
| kernel_init_freeable+0x368/0x380
| kernel_init+0x1c/0x1cc
| ret_from_fork+0x10/0x30

Rather than bodge this with raw_smp_processor_id() or randomly disabling
preemption, simply revert the culprit for now until we figure out how to
do this properly.

Reported-by: Lecopzer Chen <lecopzer.chen@mediatek.com>
Signed-off-by: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Link: https://lore.kernel.org/r/20201221162249.3119-1-lecopzer.chen@mediatek.com
Link: https://lore.kernel.org/r/20210112221855.10666-1-will@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

authored by

Will Deacon and committed by
Catalin Marinas
b90d72a6 df068247

+2 -48
-2
arch/arm64/Kconfig
··· 174 174 select HAVE_NMI 175 175 select HAVE_PATA_PLATFORM 176 176 select HAVE_PERF_EVENTS 177 - select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI && HW_PERF_EVENTS 178 - select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI 179 177 select HAVE_PERF_REGS 180 178 select HAVE_PERF_USER_STACK_DUMP 181 179 select HAVE_REGS_AND_STACK_ACCESS_API
+2 -39
arch/arm64/kernel/perf_event.c
··· 23 23 #include <linux/platform_device.h> 24 24 #include <linux/sched_clock.h> 25 25 #include <linux/smp.h> 26 - #include <linux/nmi.h> 27 - #include <linux/cpufreq.h> 28 26 29 27 /* ARMv8 Cortex-A53 specific event types. */ 30 28 #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 ··· 1248 1250 1249 1251 static int __init armv8_pmu_driver_init(void) 1250 1252 { 1251 - int ret; 1252 - 1253 1253 if (acpi_disabled) 1254 - ret = platform_driver_register(&armv8_pmu_driver); 1254 + return platform_driver_register(&armv8_pmu_driver); 1255 1255 else 1256 - ret = arm_pmu_acpi_probe(armv8_pmuv3_init); 1257 - 1258 - /* 1259 - * Try to re-initialize lockup detector after PMU init in 1260 - * case PMU events are triggered via NMIs. 1261 - */ 1262 - if (ret == 0 && arm_pmu_irq_is_nmi()) 1263 - lockup_detector_init(); 1264 - 1265 - return ret; 1256 + return arm_pmu_acpi_probe(armv8_pmuv3_init); 1266 1257 } 1267 1258 device_initcall(armv8_pmu_driver_init) 1268 1259 ··· 1309 1322 userpg->cap_user_time_zero = 1; 1310 1323 userpg->cap_user_time_short = 1; 1311 1324 } 1312 - 1313 - #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF 1314 - /* 1315 - * Safe maximum CPU frequency in case a particular platform doesn't implement 1316 - * cpufreq driver. Although, architecture doesn't put any restrictions on 1317 - * maximum frequency but 5 GHz seems to be safe maximum given the available 1318 - * Arm CPUs in the market which are clocked much less than 5 GHz. On the other 1319 - * hand, we can't make it much higher as it would lead to a large hard-lockup 1320 - * detection timeout on parts which are running slower (eg. 1GHz on 1321 - * Developerbox) and doesn't possess a cpufreq driver. 1322 - */ 1323 - #define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz 1324 - u64 hw_nmi_get_sample_period(int watchdog_thresh) 1325 - { 1326 - unsigned int cpu = smp_processor_id(); 1327 - unsigned long max_cpu_freq; 1328 - 1329 - max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; 1330 - if (!max_cpu_freq) 1331 - max_cpu_freq = SAFE_MAX_CPU_FREQ; 1332 - 1333 - return (u64)max_cpu_freq * watchdog_thresh; 1334 - } 1335 - #endif
-5
drivers/perf/arm_pmu.c
··· 726 726 return per_cpu(hw_events->irq, cpu); 727 727 } 728 728 729 - bool arm_pmu_irq_is_nmi(void) 730 - { 731 - return has_nmi; 732 - } 733 - 734 729 /* 735 730 * PMU hardware loses all context when a CPU goes offline. 736 731 * When a CPU is hotplugged back in, since some hardware registers are
-2
include/linux/perf/arm_pmu.h
··· 163 163 static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } 164 164 #endif 165 165 166 - bool arm_pmu_irq_is_nmi(void); 167 - 168 166 /* Internal functions only for core arm_pmu code */ 169 167 struct arm_pmu *armpmu_alloc(void); 170 168 struct arm_pmu *armpmu_alloc_atomic(void);