Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cpufreq: AMD "frequency sensitivity feedback" powersave bias for ondemand governor

Future AMD processors, starting with Family 16h, can provide software
with feedback on how the workload may respond to frequency change --
memory-bound workloads will not benefit from higher frequency, where
as compute-bound workloads will. This patch enables this "frequency
sensitivity feedback" to aid the ondemand governor to make better
frequency change decisions by hooking into the powersave bias.

Signed-off-by: Jacob Shin <jacob.shin@amd.com>
Acked-by: Thomas Renninger <trenn@suse.de>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Jacob Shin and committed by
Rafael J. Wysocki
9c5320c8 fb30809e

+190 -1
+21
Documentation/cpu-freq/governors.txt
··· 167 167 busy, rather than shifting back and forth in speed. This tunable has no 168 168 effect on behavior at lower speeds/lower CPU loads. 169 169 170 + powersave_bias: this parameter takes a value between 0 to 1000. It 171 + defines the percentage (times 10) value of the target frequency that 172 + will be shaved off of the target. For example, when set to 100 -- 10%, 173 + when ondemand governor would have targeted 1000 MHz, it will target 174 + 1000 MHz - (10% of 1000 MHz) = 900 MHz instead. This is set to 0 175 + (disabled) by default. 176 + When AMD frequency sensitivity powersave bias driver -- 177 + drivers/cpufreq/amd_freq_sensitivity.c is loaded, this parameter 178 + defines the workload frequency sensitivity threshold in which a lower 179 + frequency is chosen instead of ondemand governor's original target. 180 + The frequency sensitivity is a hardware reported (on AMD Family 16h 181 + Processors and above) value between 0 to 100% that tells software how 182 + the performance of the workload running on a CPU will change when 183 + frequency changes. A workload with sensitivity of 0% (memory/IO-bound) 184 + will not perform any better on higher core frequency, whereas a 185 + workload with sensitivity of 100% (CPU-bound) will perform better 186 + higher the frequency. When the driver is loaded, this is set to 400 187 + by default -- for CPUs running workloads with sensitivity value below 188 + 40%, a lower frequency is chosen. Unloading the driver or writing 0 189 + will disable this feature. 190 + 170 191 171 192 2.5 Conservative 172 193 ----------------
+1
arch/x86/include/asm/cpufeature.h
··· 182 182 #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ 183 183 #define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ 184 184 #define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */ 185 + #define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */ 185 186 186 187 /* Virtualization flags: Linux defined, word 8 */ 187 188 #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
+2 -1
arch/x86/kernel/cpu/scattered.c
··· 39 39 { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, 40 40 { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, 41 41 { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, 42 - { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, 43 42 { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, 43 + { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, 44 + { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 }, 44 45 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, 45 46 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, 46 47 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
+17
drivers/cpufreq/Kconfig.x86
··· 129 129 130 130 For details, take a look at <file:Documentation/cpu-freq/>. 131 131 132 + config X86_AMD_FREQ_SENSITIVITY 133 + tristate "AMD frequency sensitivity feedback powersave bias" 134 + depends on CPU_FREQ_GOV_ONDEMAND && X86_ACPI_CPUFREQ && CPU_SUP_AMD 135 + help 136 + This adds AMD-specific powersave bias function to the ondemand 137 + governor, which allows it to make more power-conscious frequency 138 + change decisions based on feedback from hardware (availble on AMD 139 + Family 16h and above). 140 + 141 + Hardware feedback tells software how "sensitive" to frequency changes 142 + the CPUs' workloads are. CPU-bound workloads will be more sensitive 143 + -- they will perform better as frequency increases. Memory/IO-bound 144 + workloads will be less sensitive -- they will not necessarily perform 145 + better as frequency increases. 146 + 147 + If in doubt, say N. 148 + 132 149 config X86_GX_SUSPMOD 133 150 tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" 134 151 depends on X86_32 && PCI
+1
drivers/cpufreq/Makefile
··· 41 41 obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o 42 42 obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o 43 43 obj-$(CONFIG_X86_INTEL_PSTATE) += intel_pstate.o 44 + obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o 44 45 45 46 ################################################################################## 46 47 # ARM SoC drivers
+148
drivers/cpufreq/amd_freq_sensitivity.c
··· 1 + /* 2 + * amd_freq_sensitivity.c: AMD frequency sensitivity feedback powersave bias 3 + * for the ondemand governor. 4 + * 5 + * Copyright (C) 2013 Advanced Micro Devices, Inc. 6 + * 7 + * Author: Jacob Shin <jacob.shin@amd.com> 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of the GNU General Public License version 2 as 11 + * published by the Free Software Foundation. 12 + */ 13 + 14 + #include <linux/kernel.h> 15 + #include <linux/module.h> 16 + #include <linux/types.h> 17 + #include <linux/percpu-defs.h> 18 + #include <linux/init.h> 19 + #include <linux/mod_devicetable.h> 20 + 21 + #include <asm/msr.h> 22 + #include <asm/cpufeature.h> 23 + 24 + #include "cpufreq_governor.h" 25 + 26 + #define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080 27 + #define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081 28 + #define CLASS_CODE_SHIFT 56 29 + #define POWERSAVE_BIAS_MAX 1000 30 + #define POWERSAVE_BIAS_DEF 400 31 + 32 + struct cpu_data_t { 33 + u64 actual; 34 + u64 reference; 35 + unsigned int freq_prev; 36 + }; 37 + 38 + static DEFINE_PER_CPU(struct cpu_data_t, cpu_data); 39 + 40 + static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, 41 + unsigned int freq_next, 42 + unsigned int relation) 43 + { 44 + int sensitivity; 45 + long d_actual, d_reference; 46 + struct msr actual, reference; 47 + struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu); 48 + struct dbs_data *od_data = policy->governor_data; 49 + struct od_dbs_tuners *od_tuners = od_data->tuners; 50 + struct od_cpu_dbs_info_s *od_info = 51 + od_data->cdata->get_cpu_dbs_info_s(policy->cpu); 52 + 53 + if (!od_info->freq_table) 54 + return freq_next; 55 + 56 + rdmsr_on_cpu(policy->cpu, MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, 57 + &actual.l, &actual.h); 58 + rdmsr_on_cpu(policy->cpu, MSR_AMD64_FREQ_SENSITIVITY_REFERENCE, 59 + &reference.l, &reference.h); 60 + actual.h &= 0x00ffffff; 61 + reference.h &= 0x00ffffff; 62 + 63 + /* counter wrapped around, so stay on current frequency */ 64 + if (actual.q < data->actual || reference.q < data->reference) { 65 + freq_next = policy->cur; 66 + goto out; 67 + } 68 + 69 + d_actual = actual.q - data->actual; 70 + d_reference = reference.q - data->reference; 71 + 72 + /* divide by 0, so stay on current frequency as well */ 73 + if (d_reference == 0) { 74 + freq_next = policy->cur; 75 + goto out; 76 + } 77 + 78 + sensitivity = POWERSAVE_BIAS_MAX - 79 + (POWERSAVE_BIAS_MAX * (d_reference - d_actual) / d_reference); 80 + 81 + clamp(sensitivity, 0, POWERSAVE_BIAS_MAX); 82 + 83 + /* this workload is not CPU bound, so choose a lower freq */ 84 + if (sensitivity < od_tuners->powersave_bias) { 85 + if (data->freq_prev == policy->cur) 86 + freq_next = policy->cur; 87 + 88 + if (freq_next > policy->cur) 89 + freq_next = policy->cur; 90 + else if (freq_next < policy->cur) 91 + freq_next = policy->min; 92 + else { 93 + unsigned int index; 94 + 95 + cpufreq_frequency_table_target(policy, 96 + od_info->freq_table, policy->cur - 1, 97 + CPUFREQ_RELATION_H, &index); 98 + freq_next = od_info->freq_table[index].frequency; 99 + } 100 + 101 + data->freq_prev = freq_next; 102 + } else 103 + data->freq_prev = 0; 104 + 105 + out: 106 + data->actual = actual.q; 107 + data->reference = reference.q; 108 + return freq_next; 109 + } 110 + 111 + static int __init amd_freq_sensitivity_init(void) 112 + { 113 + u64 val; 114 + 115 + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 116 + return -ENODEV; 117 + 118 + if (!static_cpu_has(X86_FEATURE_PROC_FEEDBACK)) 119 + return -ENODEV; 120 + 121 + if (rdmsrl_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val)) 122 + return -ENODEV; 123 + 124 + if (!(val >> CLASS_CODE_SHIFT)) 125 + return -ENODEV; 126 + 127 + od_register_powersave_bias_handler(amd_powersave_bias_target, 128 + POWERSAVE_BIAS_DEF); 129 + return 0; 130 + } 131 + late_initcall(amd_freq_sensitivity_init); 132 + 133 + static void __exit amd_freq_sensitivity_exit(void) 134 + { 135 + od_unregister_powersave_bias_handler(); 136 + } 137 + module_exit(amd_freq_sensitivity_exit); 138 + 139 + static const struct x86_cpu_id amd_freq_sensitivity_ids[] = { 140 + X86_FEATURE_MATCH(X86_FEATURE_PROC_FEEDBACK), 141 + {} 142 + }; 143 + MODULE_DEVICE_TABLE(x86cpu, amd_freq_sensitivity_ids); 144 + 145 + MODULE_AUTHOR("Jacob Shin <jacob.shin@amd.com>"); 146 + MODULE_DESCRIPTION("AMD frequency sensitivity feedback powersave bias for " 147 + "the ondemand governor."); 148 + MODULE_LICENSE("GPL");