Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * intel_pstate.c: Native P state management for Intel processors
3 *
4 * (C) Copyright 2012 Intel Corporation
5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 */
12
13#include <linux/kernel.h>
14#include <linux/kernel_stat.h>
15#include <linux/module.h>
16#include <linux/ktime.h>
17#include <linux/hrtimer.h>
18#include <linux/tick.h>
19#include <linux/slab.h>
20#include <linux/sched.h>
21#include <linux/list.h>
22#include <linux/cpu.h>
23#include <linux/cpufreq.h>
24#include <linux/sysfs.h>
25#include <linux/types.h>
26#include <linux/fs.h>
27#include <linux/debugfs.h>
28#include <linux/acpi.h>
29#include <trace/events/power.h>
30
31#include <asm/div64.h>
32#include <asm/msr.h>
33#include <asm/cpu_device_id.h>
34
35#define BYT_RATIOS 0x66a
36#define BYT_VIDS 0x66b
37#define BYT_TURBO_RATIOS 0x66c
38#define BYT_TURBO_VIDS 0x66d
39
40#define FRAC_BITS 8
41#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
42#define fp_toint(X) ((X) >> FRAC_BITS)
43
44
45static inline int32_t mul_fp(int32_t x, int32_t y)
46{
47 return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
48}
49
50static inline int32_t div_fp(int32_t x, int32_t y)
51{
52 return div_s64((int64_t)x << FRAC_BITS, y);
53}
54
55static inline int ceiling_fp(int32_t x)
56{
57 int mask, ret;
58
59 ret = fp_toint(x);
60 mask = (1 << FRAC_BITS) - 1;
61 if (x & mask)
62 ret += 1;
63 return ret;
64}
65
66struct sample {
67 int32_t core_pct_busy;
68 u64 aperf;
69 u64 mperf;
70 int freq;
71 ktime_t time;
72};
73
74struct pstate_data {
75 int current_pstate;
76 int min_pstate;
77 int max_pstate;
78 int scaling;
79 int turbo_pstate;
80};
81
82struct vid_data {
83 int min;
84 int max;
85 int turbo;
86 int32_t ratio;
87};
88
89struct _pid {
90 int setpoint;
91 int32_t integral;
92 int32_t p_gain;
93 int32_t i_gain;
94 int32_t d_gain;
95 int deadband;
96 int32_t last_err;
97};
98
99struct cpudata {
100 int cpu;
101
102 struct timer_list timer;
103
104 struct pstate_data pstate;
105 struct vid_data vid;
106 struct _pid pid;
107
108 ktime_t last_sample_time;
109 u64 prev_aperf;
110 u64 prev_mperf;
111 struct sample sample;
112};
113
114static struct cpudata **all_cpu_data;
115struct pstate_adjust_policy {
116 int sample_rate_ms;
117 int deadband;
118 int setpoint;
119 int p_gain_pct;
120 int d_gain_pct;
121 int i_gain_pct;
122};
123
124struct pstate_funcs {
125 int (*get_max)(void);
126 int (*get_min)(void);
127 int (*get_turbo)(void);
128 int (*get_scaling)(void);
129 void (*set)(struct cpudata*, int pstate);
130 void (*get_vid)(struct cpudata *);
131};
132
133struct cpu_defaults {
134 struct pstate_adjust_policy pid_policy;
135 struct pstate_funcs funcs;
136};
137
138static struct pstate_adjust_policy pid_params;
139static struct pstate_funcs pstate_funcs;
140
141struct perf_limits {
142 int no_turbo;
143 int turbo_disabled;
144 int max_perf_pct;
145 int min_perf_pct;
146 int32_t max_perf;
147 int32_t min_perf;
148 int max_policy_pct;
149 int max_sysfs_pct;
150};
151
152static struct perf_limits limits = {
153 .no_turbo = 0,
154 .turbo_disabled = 0,
155 .max_perf_pct = 100,
156 .max_perf = int_tofp(1),
157 .min_perf_pct = 0,
158 .min_perf = 0,
159 .max_policy_pct = 100,
160 .max_sysfs_pct = 100,
161};
162
163static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
164 int deadband, int integral) {
165 pid->setpoint = setpoint;
166 pid->deadband = deadband;
167 pid->integral = int_tofp(integral);
168 pid->last_err = int_tofp(setpoint) - int_tofp(busy);
169}
170
171static inline void pid_p_gain_set(struct _pid *pid, int percent)
172{
173 pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
174}
175
176static inline void pid_i_gain_set(struct _pid *pid, int percent)
177{
178 pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
179}
180
181static inline void pid_d_gain_set(struct _pid *pid, int percent)
182{
183 pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
184}
185
186static signed int pid_calc(struct _pid *pid, int32_t busy)
187{
188 signed int result;
189 int32_t pterm, dterm, fp_error;
190 int32_t integral_limit;
191
192 fp_error = int_tofp(pid->setpoint) - busy;
193
194 if (abs(fp_error) <= int_tofp(pid->deadband))
195 return 0;
196
197 pterm = mul_fp(pid->p_gain, fp_error);
198
199 pid->integral += fp_error;
200
201 /* limit the integral term */
202 integral_limit = int_tofp(30);
203 if (pid->integral > integral_limit)
204 pid->integral = integral_limit;
205 if (pid->integral < -integral_limit)
206 pid->integral = -integral_limit;
207
208 dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
209 pid->last_err = fp_error;
210
211 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
212 result = result + (1 << (FRAC_BITS-1));
213 return (signed int)fp_toint(result);
214}
215
216static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
217{
218 pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
219 pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
220 pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
221
222 pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
223}
224
225static inline void intel_pstate_reset_all_pid(void)
226{
227 unsigned int cpu;
228
229 for_each_online_cpu(cpu) {
230 if (all_cpu_data[cpu])
231 intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
232 }
233}
234
235static inline void update_turbo_state(void)
236{
237 u64 misc_en;
238 struct cpudata *cpu;
239
240 cpu = all_cpu_data[0];
241 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
242 limits.turbo_disabled =
243 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
244 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
245}
246
247/************************** debugfs begin ************************/
248static int pid_param_set(void *data, u64 val)
249{
250 *(u32 *)data = val;
251 intel_pstate_reset_all_pid();
252 return 0;
253}
254
255static int pid_param_get(void *data, u64 *val)
256{
257 *val = *(u32 *)data;
258 return 0;
259}
260DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
261
262struct pid_param {
263 char *name;
264 void *value;
265};
266
267static struct pid_param pid_files[] = {
268 {"sample_rate_ms", &pid_params.sample_rate_ms},
269 {"d_gain_pct", &pid_params.d_gain_pct},
270 {"i_gain_pct", &pid_params.i_gain_pct},
271 {"deadband", &pid_params.deadband},
272 {"setpoint", &pid_params.setpoint},
273 {"p_gain_pct", &pid_params.p_gain_pct},
274 {NULL, NULL}
275};
276
277static void __init intel_pstate_debug_expose_params(void)
278{
279 struct dentry *debugfs_parent;
280 int i = 0;
281
282 debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
283 if (IS_ERR_OR_NULL(debugfs_parent))
284 return;
285 while (pid_files[i].name) {
286 debugfs_create_file(pid_files[i].name, 0660,
287 debugfs_parent, pid_files[i].value,
288 &fops_pid_param);
289 i++;
290 }
291}
292
293/************************** debugfs end ************************/
294
295/************************** sysfs begin ************************/
296#define show_one(file_name, object) \
297 static ssize_t show_##file_name \
298 (struct kobject *kobj, struct attribute *attr, char *buf) \
299 { \
300 return sprintf(buf, "%u\n", limits.object); \
301 }
302
303static ssize_t show_no_turbo(struct kobject *kobj,
304 struct attribute *attr, char *buf)
305{
306 ssize_t ret;
307
308 update_turbo_state();
309 if (limits.turbo_disabled)
310 ret = sprintf(buf, "%u\n", limits.turbo_disabled);
311 else
312 ret = sprintf(buf, "%u\n", limits.no_turbo);
313
314 return ret;
315}
316
317static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
318 const char *buf, size_t count)
319{
320 unsigned int input;
321 int ret;
322
323 ret = sscanf(buf, "%u", &input);
324 if (ret != 1)
325 return -EINVAL;
326
327 update_turbo_state();
328 if (limits.turbo_disabled) {
329 pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
330 return -EPERM;
331 }
332 limits.no_turbo = clamp_t(int, input, 0, 1);
333
334 return count;
335}
336
337static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
338 const char *buf, size_t count)
339{
340 unsigned int input;
341 int ret;
342
343 ret = sscanf(buf, "%u", &input);
344 if (ret != 1)
345 return -EINVAL;
346
347 limits.max_sysfs_pct = clamp_t(int, input, 0 , 100);
348 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
349 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
350
351 return count;
352}
353
354static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
355 const char *buf, size_t count)
356{
357 unsigned int input;
358 int ret;
359
360 ret = sscanf(buf, "%u", &input);
361 if (ret != 1)
362 return -EINVAL;
363 limits.min_perf_pct = clamp_t(int, input, 0 , 100);
364 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
365
366 return count;
367}
368
369show_one(max_perf_pct, max_perf_pct);
370show_one(min_perf_pct, min_perf_pct);
371
372define_one_global_rw(no_turbo);
373define_one_global_rw(max_perf_pct);
374define_one_global_rw(min_perf_pct);
375
376static struct attribute *intel_pstate_attributes[] = {
377 &no_turbo.attr,
378 &max_perf_pct.attr,
379 &min_perf_pct.attr,
380 NULL
381};
382
383static struct attribute_group intel_pstate_attr_group = {
384 .attrs = intel_pstate_attributes,
385};
386
387static void __init intel_pstate_sysfs_expose_params(void)
388{
389 struct kobject *intel_pstate_kobject;
390 int rc;
391
392 intel_pstate_kobject = kobject_create_and_add("intel_pstate",
393 &cpu_subsys.dev_root->kobj);
394 BUG_ON(!intel_pstate_kobject);
395 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
396 BUG_ON(rc);
397}
398
399/************************** sysfs end ************************/
400static int byt_get_min_pstate(void)
401{
402 u64 value;
403
404 rdmsrl(BYT_RATIOS, value);
405 return (value >> 8) & 0x7F;
406}
407
408static int byt_get_max_pstate(void)
409{
410 u64 value;
411
412 rdmsrl(BYT_RATIOS, value);
413 return (value >> 16) & 0x7F;
414}
415
416static int byt_get_turbo_pstate(void)
417{
418 u64 value;
419
420 rdmsrl(BYT_TURBO_RATIOS, value);
421 return value & 0x7F;
422}
423
424static void byt_set_pstate(struct cpudata *cpudata, int pstate)
425{
426 u64 val;
427 int32_t vid_fp;
428 u32 vid;
429
430 val = pstate << 8;
431 if (limits.no_turbo && !limits.turbo_disabled)
432 val |= (u64)1 << 32;
433
434 vid_fp = cpudata->vid.min + mul_fp(
435 int_tofp(pstate - cpudata->pstate.min_pstate),
436 cpudata->vid.ratio);
437
438 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
439 vid = ceiling_fp(vid_fp);
440
441 if (pstate > cpudata->pstate.max_pstate)
442 vid = cpudata->vid.turbo;
443
444 val |= vid;
445
446 wrmsrl(MSR_IA32_PERF_CTL, val);
447}
448
449#define BYT_BCLK_FREQS 5
450static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
451
452static int byt_get_scaling(void)
453{
454 u64 value;
455 int i;
456
457 rdmsrl(MSR_FSB_FREQ, value);
458 i = value & 0x3;
459
460 BUG_ON(i > BYT_BCLK_FREQS);
461
462 return byt_freq_table[i] * 100;
463}
464
465static void byt_get_vid(struct cpudata *cpudata)
466{
467 u64 value;
468
469 rdmsrl(BYT_VIDS, value);
470 cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
471 cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
472 cpudata->vid.ratio = div_fp(
473 cpudata->vid.max - cpudata->vid.min,
474 int_tofp(cpudata->pstate.max_pstate -
475 cpudata->pstate.min_pstate));
476
477 rdmsrl(BYT_TURBO_VIDS, value);
478 cpudata->vid.turbo = value & 0x7f;
479}
480
481static int core_get_min_pstate(void)
482{
483 u64 value;
484
485 rdmsrl(MSR_PLATFORM_INFO, value);
486 return (value >> 40) & 0xFF;
487}
488
489static int core_get_max_pstate(void)
490{
491 u64 value;
492
493 rdmsrl(MSR_PLATFORM_INFO, value);
494 return (value >> 8) & 0xFF;
495}
496
497static int core_get_turbo_pstate(void)
498{
499 u64 value;
500 int nont, ret;
501
502 rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
503 nont = core_get_max_pstate();
504 ret = (value) & 255;
505 if (ret <= nont)
506 ret = nont;
507 return ret;
508}
509
510static inline int core_get_scaling(void)
511{
512 return 100000;
513}
514
515static void core_set_pstate(struct cpudata *cpudata, int pstate)
516{
517 u64 val;
518
519 val = pstate << 8;
520 if (limits.no_turbo && !limits.turbo_disabled)
521 val |= (u64)1 << 32;
522
523 wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
524}
525
526static struct cpu_defaults core_params = {
527 .pid_policy = {
528 .sample_rate_ms = 10,
529 .deadband = 0,
530 .setpoint = 97,
531 .p_gain_pct = 20,
532 .d_gain_pct = 0,
533 .i_gain_pct = 0,
534 },
535 .funcs = {
536 .get_max = core_get_max_pstate,
537 .get_min = core_get_min_pstate,
538 .get_turbo = core_get_turbo_pstate,
539 .get_scaling = core_get_scaling,
540 .set = core_set_pstate,
541 },
542};
543
544static struct cpu_defaults byt_params = {
545 .pid_policy = {
546 .sample_rate_ms = 10,
547 .deadband = 0,
548 .setpoint = 97,
549 .p_gain_pct = 14,
550 .d_gain_pct = 0,
551 .i_gain_pct = 4,
552 },
553 .funcs = {
554 .get_max = byt_get_max_pstate,
555 .get_min = byt_get_min_pstate,
556 .get_turbo = byt_get_turbo_pstate,
557 .set = byt_set_pstate,
558 .get_scaling = byt_get_scaling,
559 .get_vid = byt_get_vid,
560 },
561};
562
563static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
564{
565 int max_perf = cpu->pstate.turbo_pstate;
566 int max_perf_adj;
567 int min_perf;
568
569 if (limits.no_turbo || limits.turbo_disabled)
570 max_perf = cpu->pstate.max_pstate;
571
572 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
573 *max = clamp_t(int, max_perf_adj,
574 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
575
576 min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
577 *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
578}
579
580static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
581{
582 int max_perf, min_perf;
583
584 update_turbo_state();
585
586 intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
587
588 pstate = clamp_t(int, pstate, min_perf, max_perf);
589
590 if (pstate == cpu->pstate.current_pstate)
591 return;
592
593 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
594
595 cpu->pstate.current_pstate = pstate;
596
597 pstate_funcs.set(cpu, pstate);
598}
599
600static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
601{
602 cpu->pstate.min_pstate = pstate_funcs.get_min();
603 cpu->pstate.max_pstate = pstate_funcs.get_max();
604 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
605 cpu->pstate.scaling = pstate_funcs.get_scaling();
606
607 if (pstate_funcs.get_vid)
608 pstate_funcs.get_vid(cpu);
609 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
610}
611
612static inline void intel_pstate_calc_busy(struct cpudata *cpu)
613{
614 struct sample *sample = &cpu->sample;
615 int64_t core_pct;
616
617 core_pct = int_tofp(sample->aperf) * int_tofp(100);
618 core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
619
620 sample->freq = fp_toint(
621 mul_fp(int_tofp(
622 cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
623 core_pct));
624
625 sample->core_pct_busy = (int32_t)core_pct;
626}
627
628static inline void intel_pstate_sample(struct cpudata *cpu)
629{
630 u64 aperf, mperf;
631 unsigned long flags;
632
633 local_irq_save(flags);
634 rdmsrl(MSR_IA32_APERF, aperf);
635 rdmsrl(MSR_IA32_MPERF, mperf);
636 local_irq_restore(flags);
637
638 cpu->last_sample_time = cpu->sample.time;
639 cpu->sample.time = ktime_get();
640 cpu->sample.aperf = aperf;
641 cpu->sample.mperf = mperf;
642 cpu->sample.aperf -= cpu->prev_aperf;
643 cpu->sample.mperf -= cpu->prev_mperf;
644
645 intel_pstate_calc_busy(cpu);
646
647 cpu->prev_aperf = aperf;
648 cpu->prev_mperf = mperf;
649}
650
651static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
652{
653 int delay;
654
655 delay = msecs_to_jiffies(pid_params.sample_rate_ms);
656 mod_timer_pinned(&cpu->timer, jiffies + delay);
657}
658
659static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
660{
661 int32_t core_busy, max_pstate, current_pstate, sample_ratio;
662 u32 duration_us;
663 u32 sample_time;
664
665 core_busy = cpu->sample.core_pct_busy;
666 max_pstate = int_tofp(cpu->pstate.max_pstate);
667 current_pstate = int_tofp(cpu->pstate.current_pstate);
668 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
669
670 sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC;
671 duration_us = (u32) ktime_us_delta(cpu->sample.time,
672 cpu->last_sample_time);
673 if (duration_us > sample_time * 3) {
674 sample_ratio = div_fp(int_tofp(sample_time),
675 int_tofp(duration_us));
676 core_busy = mul_fp(core_busy, sample_ratio);
677 }
678
679 return core_busy;
680}
681
682static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
683{
684 int32_t busy_scaled;
685 struct _pid *pid;
686 signed int ctl;
687
688 pid = &cpu->pid;
689 busy_scaled = intel_pstate_get_scaled_busy(cpu);
690
691 ctl = pid_calc(pid, busy_scaled);
692
693 /* Negative values of ctl increase the pstate and vice versa */
694 intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl);
695}
696
697static void intel_pstate_timer_func(unsigned long __data)
698{
699 struct cpudata *cpu = (struct cpudata *) __data;
700 struct sample *sample;
701
702 intel_pstate_sample(cpu);
703
704 sample = &cpu->sample;
705
706 intel_pstate_adjust_busy_pstate(cpu);
707
708 trace_pstate_sample(fp_toint(sample->core_pct_busy),
709 fp_toint(intel_pstate_get_scaled_busy(cpu)),
710 cpu->pstate.current_pstate,
711 sample->mperf,
712 sample->aperf,
713 sample->freq);
714
715 intel_pstate_set_sample_time(cpu);
716}
717
718#define ICPU(model, policy) \
719 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
720 (unsigned long)&policy }
721
722static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
723 ICPU(0x2a, core_params),
724 ICPU(0x2d, core_params),
725 ICPU(0x37, byt_params),
726 ICPU(0x3a, core_params),
727 ICPU(0x3c, core_params),
728 ICPU(0x3d, core_params),
729 ICPU(0x3e, core_params),
730 ICPU(0x3f, core_params),
731 ICPU(0x45, core_params),
732 ICPU(0x46, core_params),
733 ICPU(0x4c, byt_params),
734 ICPU(0x4f, core_params),
735 ICPU(0x56, core_params),
736 {}
737};
738MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
739
740static int intel_pstate_init_cpu(unsigned int cpunum)
741{
742 struct cpudata *cpu;
743
744 if (!all_cpu_data[cpunum])
745 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
746 GFP_KERNEL);
747 if (!all_cpu_data[cpunum])
748 return -ENOMEM;
749
750 cpu = all_cpu_data[cpunum];
751
752 cpu->cpu = cpunum;
753 intel_pstate_get_cpu_pstates(cpu);
754
755 init_timer_deferrable(&cpu->timer);
756 cpu->timer.function = intel_pstate_timer_func;
757 cpu->timer.data = (unsigned long)cpu;
758 cpu->timer.expires = jiffies + HZ/100;
759 intel_pstate_busy_pid_reset(cpu);
760 intel_pstate_sample(cpu);
761
762 add_timer_on(&cpu->timer, cpunum);
763
764 pr_debug("Intel pstate controlling: cpu %d\n", cpunum);
765
766 return 0;
767}
768
769static unsigned int intel_pstate_get(unsigned int cpu_num)
770{
771 struct sample *sample;
772 struct cpudata *cpu;
773
774 cpu = all_cpu_data[cpu_num];
775 if (!cpu)
776 return 0;
777 sample = &cpu->sample;
778 return sample->freq;
779}
780
781static int intel_pstate_set_policy(struct cpufreq_policy *policy)
782{
783 if (!policy->cpuinfo.max_freq)
784 return -ENODEV;
785
786 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
787 limits.min_perf_pct = 100;
788 limits.min_perf = int_tofp(1);
789 limits.max_policy_pct = 100;
790 limits.max_perf_pct = 100;
791 limits.max_perf = int_tofp(1);
792 limits.no_turbo = 0;
793 return 0;
794 }
795 limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
796 limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100);
797 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
798
799 limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq;
800 limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100);
801 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
802 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
803
804 return 0;
805}
806
807static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
808{
809 cpufreq_verify_within_cpu_limits(policy);
810
811 if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
812 policy->policy != CPUFREQ_POLICY_PERFORMANCE)
813 return -EINVAL;
814
815 return 0;
816}
817
818static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
819{
820 int cpu_num = policy->cpu;
821 struct cpudata *cpu = all_cpu_data[cpu_num];
822
823 pr_info("intel_pstate CPU %d exiting\n", cpu_num);
824
825 del_timer_sync(&all_cpu_data[cpu_num]->timer);
826 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
827}
828
829static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
830{
831 struct cpudata *cpu;
832 int rc;
833
834 rc = intel_pstate_init_cpu(policy->cpu);
835 if (rc)
836 return rc;
837
838 cpu = all_cpu_data[policy->cpu];
839
840 if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
841 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
842 else
843 policy->policy = CPUFREQ_POLICY_POWERSAVE;
844
845 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
846 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
847
848 /* cpuinfo and default policy values */
849 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
850 policy->cpuinfo.max_freq =
851 cpu->pstate.turbo_pstate * cpu->pstate.scaling;
852 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
853 cpumask_set_cpu(policy->cpu, policy->cpus);
854
855 return 0;
856}
857
858static struct cpufreq_driver intel_pstate_driver = {
859 .flags = CPUFREQ_CONST_LOOPS,
860 .verify = intel_pstate_verify_policy,
861 .setpolicy = intel_pstate_set_policy,
862 .get = intel_pstate_get,
863 .init = intel_pstate_cpu_init,
864 .stop_cpu = intel_pstate_stop_cpu,
865 .name = "intel_pstate",
866};
867
868static int __initdata no_load;
869
870static int intel_pstate_msrs_not_valid(void)
871{
872 /* Check that all the msr's we are using are valid. */
873 u64 aperf, mperf, tmp;
874
875 rdmsrl(MSR_IA32_APERF, aperf);
876 rdmsrl(MSR_IA32_MPERF, mperf);
877
878 if (!pstate_funcs.get_max() ||
879 !pstate_funcs.get_min() ||
880 !pstate_funcs.get_turbo())
881 return -ENODEV;
882
883 rdmsrl(MSR_IA32_APERF, tmp);
884 if (!(tmp - aperf))
885 return -ENODEV;
886
887 rdmsrl(MSR_IA32_MPERF, tmp);
888 if (!(tmp - mperf))
889 return -ENODEV;
890
891 return 0;
892}
893
894static void copy_pid_params(struct pstate_adjust_policy *policy)
895{
896 pid_params.sample_rate_ms = policy->sample_rate_ms;
897 pid_params.p_gain_pct = policy->p_gain_pct;
898 pid_params.i_gain_pct = policy->i_gain_pct;
899 pid_params.d_gain_pct = policy->d_gain_pct;
900 pid_params.deadband = policy->deadband;
901 pid_params.setpoint = policy->setpoint;
902}
903
904static void copy_cpu_funcs(struct pstate_funcs *funcs)
905{
906 pstate_funcs.get_max = funcs->get_max;
907 pstate_funcs.get_min = funcs->get_min;
908 pstate_funcs.get_turbo = funcs->get_turbo;
909 pstate_funcs.get_scaling = funcs->get_scaling;
910 pstate_funcs.set = funcs->set;
911 pstate_funcs.get_vid = funcs->get_vid;
912}
913
914#if IS_ENABLED(CONFIG_ACPI)
915#include <acpi/processor.h>
916
917static bool intel_pstate_no_acpi_pss(void)
918{
919 int i;
920
921 for_each_possible_cpu(i) {
922 acpi_status status;
923 union acpi_object *pss;
924 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
925 struct acpi_processor *pr = per_cpu(processors, i);
926
927 if (!pr)
928 continue;
929
930 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
931 if (ACPI_FAILURE(status))
932 continue;
933
934 pss = buffer.pointer;
935 if (pss && pss->type == ACPI_TYPE_PACKAGE) {
936 kfree(pss);
937 return false;
938 }
939
940 kfree(pss);
941 }
942
943 return true;
944}
945
946struct hw_vendor_info {
947 u16 valid;
948 char oem_id[ACPI_OEM_ID_SIZE];
949 char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
950};
951
952/* Hardware vendor-specific info that has its own power management modes */
953static struct hw_vendor_info vendor_info[] = {
954 {1, "HP ", "ProLiant"},
955 {0, "", ""},
956};
957
958static bool intel_pstate_platform_pwr_mgmt_exists(void)
959{
960 struct acpi_table_header hdr;
961 struct hw_vendor_info *v_info;
962
963 if (acpi_disabled ||
964 ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
965 return false;
966
967 for (v_info = vendor_info; v_info->valid; v_info++) {
968 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
969 !strncmp(hdr.oem_table_id, v_info->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
970 intel_pstate_no_acpi_pss())
971 return true;
972 }
973
974 return false;
975}
976#else /* CONFIG_ACPI not enabled */
977static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
978#endif /* CONFIG_ACPI */
979
980static int __init intel_pstate_init(void)
981{
982 int cpu, rc = 0;
983 const struct x86_cpu_id *id;
984 struct cpu_defaults *cpu_info;
985
986 if (no_load)
987 return -ENODEV;
988
989 id = x86_match_cpu(intel_pstate_cpu_ids);
990 if (!id)
991 return -ENODEV;
992
993 /*
994 * The Intel pstate driver will be ignored if the platform
995 * firmware has its own power management modes.
996 */
997 if (intel_pstate_platform_pwr_mgmt_exists())
998 return -ENODEV;
999
1000 cpu_info = (struct cpu_defaults *)id->driver_data;
1001
1002 copy_pid_params(&cpu_info->pid_policy);
1003 copy_cpu_funcs(&cpu_info->funcs);
1004
1005 if (intel_pstate_msrs_not_valid())
1006 return -ENODEV;
1007
1008 pr_info("Intel P-state driver initializing.\n");
1009
1010 all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
1011 if (!all_cpu_data)
1012 return -ENOMEM;
1013
1014 rc = cpufreq_register_driver(&intel_pstate_driver);
1015 if (rc)
1016 goto out;
1017
1018 intel_pstate_debug_expose_params();
1019 intel_pstate_sysfs_expose_params();
1020
1021 return rc;
1022out:
1023 get_online_cpus();
1024 for_each_online_cpu(cpu) {
1025 if (all_cpu_data[cpu]) {
1026 del_timer_sync(&all_cpu_data[cpu]->timer);
1027 kfree(all_cpu_data[cpu]);
1028 }
1029 }
1030
1031 put_online_cpus();
1032 vfree(all_cpu_data);
1033 return -ENODEV;
1034}
1035device_initcall(intel_pstate_init);
1036
1037static int __init intel_pstate_setup(char *str)
1038{
1039 if (!str)
1040 return -EINVAL;
1041
1042 if (!strcmp(str, "disable"))
1043 no_load = 1;
1044 return 0;
1045}
1046early_param("intel_pstate", intel_pstate_setup);
1047
1048MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
1049MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
1050MODULE_LICENSE("GPL");