Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/aperfmperf: Replace aperfmperf_get_khz()

The frequency invariance infrastructure provides the APERF/MPERF samples
already. Utilize them for the cpu frequency display in /proc/cpuinfo.

The sample is considered valid for 20ms. So for idle or isolated NOHZ full
CPUs the function returns 0, which is matching the previous behaviour.

This gets rid of the mass IPIs and a delay of 20ms for stabilizing observed
by Eric when reading /proc/cpuinfo.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20220415161206.875029458@linutronix.de

+35 -49
+34 -43
arch/x86/kernel/cpu/aperfmperf.c
··· 101 101 return time_delta <= APERFMPERF_STALE_THRESHOLD_MS; 102 102 } 103 103 104 - unsigned int aperfmperf_get_khz(int cpu) 105 - { 106 - if (!cpu_khz) 107 - return 0; 108 - 109 - if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) 110 - return 0; 111 - 112 - if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) 113 - return 0; 114 - 115 - if (rcu_is_idle_cpu(cpu)) 116 - return 0; /* Idle CPUs are completely uninteresting. */ 117 - 118 - aperfmperf_snapshot_cpu(cpu, ktime_get(), true); 119 - return per_cpu(samples.khz, cpu); 120 - } 121 - 122 - void arch_freq_prepare_all(void) 123 - { 124 - ktime_t now = ktime_get(); 125 - bool wait = false; 126 - int cpu; 127 - 128 - if (!cpu_khz) 129 - return; 130 - 131 - if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) 132 - return; 133 - 134 - for_each_online_cpu(cpu) { 135 - if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) 136 - continue; 137 - if (rcu_is_idle_cpu(cpu)) 138 - continue; /* Idle CPUs are completely uninteresting. */ 139 - if (!aperfmperf_snapshot_cpu(cpu, now, false)) 140 - wait = true; 141 - } 142 - 143 - if (wait) 144 - msleep(APERFMPERF_REFRESH_DELAY_MS); 145 - } 146 - 147 104 unsigned int arch_freq_get_on_cpu(int cpu) 148 105 { 149 106 struct aperfmperf_sample *s = per_cpu_ptr(&samples, cpu); ··· 485 528 raw_write_seqcount_end(&s->seq); 486 529 487 530 scale_freq_tick(acnt, mcnt); 531 + } 532 + 533 + /* 534 + * Discard samples older than the define maximum sample age of 20ms. There 535 + * is no point in sending IPIs in such a case. If the scheduler tick was 536 + * not running then the CPU is either idle or isolated. 537 + */ 538 + #define MAX_SAMPLE_AGE ((unsigned long)HZ / 50) 539 + 540 + unsigned int aperfmperf_get_khz(int cpu) 541 + { 542 + struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu); 543 + unsigned long last; 544 + unsigned int seq; 545 + u64 acnt, mcnt; 546 + 547 + if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) 548 + return 0; 549 + 550 + do { 551 + seq = raw_read_seqcount_begin(&s->seq); 552 + last = s->last_update; 553 + acnt = s->acnt; 554 + mcnt = s->mcnt; 555 + } while (read_seqcount_retry(&s->seq, seq)); 556 + 557 + /* 558 + * Bail on invalid count and when the last update was too long ago, 559 + * which covers idle and NOHZ full CPUs. 560 + */ 561 + if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE) 562 + return 0; 563 + 564 + return div64_u64((cpu_khz * acnt), mcnt); 488 565 } 489 566 490 567 static int __init bp_init_aperfmperf(void)
+1 -5
fs/proc/cpuinfo.c
··· 5 5 #include <linux/proc_fs.h> 6 6 #include <linux/seq_file.h> 7 7 8 - __weak void arch_freq_prepare_all(void) 9 - { 10 - } 11 - 12 8 extern const struct seq_operations cpuinfo_op; 9 + 13 10 static int cpuinfo_open(struct inode *inode, struct file *file) 14 11 { 15 - arch_freq_prepare_all(); 16 12 return seq_open(file, &cpuinfo_op); 17 13 } 18 14
-1
include/linux/cpufreq.h
··· 1199 1199 struct cpufreq_governor *old_gov) { } 1200 1200 #endif 1201 1201 1202 - extern void arch_freq_prepare_all(void); 1203 1202 extern unsigned int arch_freq_get_on_cpu(int cpu); 1204 1203 1205 1204 #ifndef arch_set_freq_scale