Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux

Pull turbostat updates from Len Brown:
"Bug fixes and a smattering of features"

* 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux: (21 commits)
tools/power turbostat: version 2021.05.04
tools/power turbostat: Support "turbostat --hide idle"
tools/power turbostat: elevate priority of interval mode
tools/power turbostat: formatting
tools/power turbostat: rename tcc variables
tools/power turbostat: add TCC Offset support
tools/power turbostat: save original CPU model
tools/power turbostat: Fix Core C6 residency on Atom CPUs
tools/power turbostat: Print the C-state Pre-wake settings
tools/power turbostat: Enable tsc_tweak for Elkhart Lake and Jasper Lake
tools/power turbostat: unmark non-kernel-doc comment
tools/power/turbostat: Remove Package C6 Retention on Ice Lake Server
tools/power turbostat: Fix offset overflow issue in index converting
tools/power/turbostat: Fix turbostat for AMD Zen CPUs
tools/power turbostat: update version number
tools/power turbostat: Fix DRAM Energy Unit on SKX
Revert "tools/power turbostat: adjust for temperature offset"
tools/power turbostat: Support Ice Lake D
tools/power turbostat: Support Alder Lake Mobile
tools/power turbostat: print microcode patch level
...

+775 -551
+4 -2
tools/power/x86/turbostat/turbostat.8
··· 54 54 .PP 55 55 \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 56 56 .PP 57 - \fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. 57 + \fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. 58 58 .PP 59 59 \fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC". 60 60 The column name "all" can be used to enable all disabled-by-default built-in counters. 61 61 .PP 62 - \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. 62 + \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. 63 + .PP 64 + \fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other". 63 65 .PP 64 66 \fB--Dump\fP displays the raw counter values. 65 67 .PP
+771 -549
tools/power/x86/turbostat/turbostat.c
··· 3 3 * turbostat -- show CPU frequency and C-state residency 4 4 * on modern Intel and AMD processors. 5 5 * 6 - * Copyright (c) 2013 Intel Corporation. 6 + * Copyright (c) 2021 Intel Corporation. 7 7 * Len Brown <len.brown@intel.com> 8 8 */ 9 9 ··· 33 33 #include <sys/capability.h> 34 34 #include <errno.h> 35 35 #include <math.h> 36 + #include <linux/perf_event.h> 37 + #include <asm/unistd.h> 38 + #include <stdbool.h> 36 39 37 40 char *proc_stat = "/proc/stat"; 38 41 FILE *outf; 39 42 int *fd_percpu; 40 - struct timeval interval_tv = {5, 0}; 41 - struct timespec interval_ts = {5, 0}; 43 + int *fd_instr_count_percpu; 44 + struct timeval interval_tv = { 5, 0 }; 45 + struct timespec interval_ts = { 5, 0 }; 46 + 47 + /* Save original CPU model */ 48 + unsigned int model_orig; 49 + 42 50 unsigned int num_iterations; 43 51 unsigned int debug; 44 52 unsigned int quiet; ··· 83 75 unsigned int do_rapl; 84 76 unsigned int do_dts; 85 77 unsigned int do_ptm; 86 - unsigned long long gfx_cur_rc6_ms; 78 + unsigned int do_ipc; 79 + unsigned long long gfx_cur_rc6_ms; 87 80 unsigned long long cpuidle_cur_cpu_lpi_us; 88 81 unsigned long long cpuidle_cur_sys_lpi_us; 89 82 unsigned int gfx_cur_mhz; 90 83 unsigned int gfx_act_mhz; 91 - unsigned int tcc_activation_temp; 92 - unsigned int tcc_activation_temp_override; 84 + unsigned int tj_max; 85 + unsigned int tj_max_override; 86 + int tcc_offset_bits; 93 87 double rapl_power_units, rapl_time_units; 94 88 double rapl_dram_energy_units, rapl_energy_units; 95 89 double rapl_joule_counter_range; 96 90 unsigned int do_core_perf_limit_reasons; 97 91 unsigned int has_automatic_cstate_conversion; 92 + unsigned int dis_cstate_prewake; 98 93 unsigned int do_gfx_perf_limit_reasons; 99 94 unsigned int do_ring_perf_limit_reasons; 100 95 unsigned int crystal_hz; 101 96 unsigned long long tsc_hz; 102 97 int base_cpu; 103 98 double discover_bclk(unsigned int family, unsigned int model); 104 - unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ 99 + unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ 105 100 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ 106 - unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ 101 + unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ 107 102 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ 108 - unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 109 - unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 103 + unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 104 + unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 110 105 unsigned int has_misc_feature_control; 111 106 unsigned int first_counter_read = 1; 112 107 int ignore_stdin; ··· 184 173 unsigned long long aperf; 185 174 unsigned long long mperf; 186 175 unsigned long long c1; 187 - unsigned long long irq_count; 176 + unsigned long long instr_count; 177 + unsigned long long irq_count; 188 178 unsigned int smi_count; 189 179 unsigned int cpu_id; 190 180 unsigned int apic_id; 191 181 unsigned int x2apic_id; 192 182 unsigned int flags; 183 + bool is_atom; 193 184 #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 194 185 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 195 186 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; ··· 253 240 ((node_no) * topo.cores_per_node) + \ 254 241 (core_no)) 255 242 256 - 257 243 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 258 244 259 - enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; 260 - enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC}; 261 - enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT}; 245 + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; 246 + enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC }; 247 + enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT }; 262 248 263 249 struct msr_counter { 264 250 unsigned int msr_num; ··· 293 281 struct msr_sum_array { 294 282 /* get_msr_sum() = sum + (get_msr() - last) */ 295 283 struct { 296 - /*The accumulated MSR value is updated by the timer*/ 284 + /*The accumulated MSR value is updated by the timer */ 297 285 unsigned long long sum; 298 - /*The MSR footprint recorded in last timer*/ 286 + /*The MSR footprint recorded in last timer */ 299 287 unsigned long long last; 300 288 } entries[IDX_COUNT]; 301 289 }; ··· 303 291 /* The percpu MSR sum array.*/ 304 292 struct msr_sum_array *per_cpu_msr_sum; 305 293 306 - int idx_to_offset(int idx) 294 + off_t idx_to_offset(int idx) 307 295 { 308 - int offset; 296 + off_t offset; 309 297 310 298 switch (idx) { 311 299 case IDX_PKG_ENERGY: 312 - offset = MSR_PKG_ENERGY_STATUS; 300 + if (do_rapl & RAPL_AMD_F17H) 301 + offset = MSR_PKG_ENERGY_STAT; 302 + else 303 + offset = MSR_PKG_ENERGY_STATUS; 313 304 break; 314 305 case IDX_DRAM_ENERGY: 315 306 offset = MSR_DRAM_ENERGY_STATUS; ··· 335 320 return offset; 336 321 } 337 322 338 - int offset_to_idx(int offset) 323 + int offset_to_idx(off_t offset) 339 324 { 340 325 int idx; 341 326 342 327 switch (offset) { 343 328 case MSR_PKG_ENERGY_STATUS: 329 + case MSR_PKG_ENERGY_STAT: 344 330 idx = IDX_PKG_ENERGY; 345 331 break; 346 332 case MSR_DRAM_ENERGY_STATUS: ··· 369 353 { 370 354 switch (idx) { 371 355 case IDX_PKG_ENERGY: 372 - return do_rapl & RAPL_PKG; 356 + return do_rapl & (RAPL_PKG | RAPL_AMD_F17H); 373 357 case IDX_DRAM_ENERGY: 374 358 return do_rapl & RAPL_DRAM; 375 359 case IDX_PP0_ENERGY: ··· 384 368 return 0; 385 369 } 386 370 } 371 + 387 372 struct sys_counters { 388 373 unsigned int added_thread_counters; 389 374 unsigned int added_core_counters; ··· 408 391 int logical_node_id; /* 0-based count within the package */ 409 392 int physical_core_id; 410 393 int thread_id; 411 - cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ 394 + cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ 412 395 } *cpus; 413 396 414 397 struct topo_params { ··· 425 408 426 409 struct timeval tv_even, tv_odd, tv_delta; 427 410 428 - int *irq_column_2_cpu; /* /proc/interrupts column numbers */ 411 + int *irq_column_2_cpu; /* /proc/interrupts column numbers */ 429 412 int *irqs_per_cpu; /* indexed by cpu_num */ 430 413 431 414 void setup_all_buffers(void); ··· 438 421 { 439 422 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 440 423 } 424 + 441 425 /* 442 426 * run func(thread, core, package) in topology order 443 427 * skip non-present cpus 444 428 */ 445 429 446 - int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), 447 - struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 430 + int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), 431 + struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 448 432 { 449 433 int retval, pkg_no, core_no, thread_no, node_no; 450 434 451 435 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 452 436 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { 453 437 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 454 - for (thread_no = 0; thread_no < 455 - topo.threads_per_core; ++thread_no) { 438 + for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 456 439 struct thread_data *t; 457 440 struct core_data *c; 458 441 struct pkg_data *p; 459 442 460 - t = GET_THREAD(thread_base, thread_no, 461 - core_no, node_no, 462 - pkg_no); 443 + t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 463 444 464 445 if (cpu_is_not_present(t->cpu_id)) 465 446 continue; 466 447 467 - c = GET_CORE(core_base, core_no, 468 - node_no, pkg_no); 448 + c = GET_CORE(core_base, core_no, node_no, pkg_no); 469 449 p = GET_PKG(pkg_base, pkg_no); 470 450 471 451 retval = func(t, c, p); ··· 484 470 else 485 471 return 0; 486 472 } 473 + 487 474 int get_msr_fd(int cpu) 488 475 { 489 476 char pathname[32]; ··· 503 488 fd_percpu[cpu] = fd; 504 489 505 490 return fd; 491 + } 492 + 493 + static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) 494 + { 495 + return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 496 + } 497 + 498 + static int perf_instr_count_open(int cpu_num) 499 + { 500 + struct perf_event_attr pea; 501 + int fd; 502 + 503 + memset(&pea, 0, sizeof(struct perf_event_attr)); 504 + pea.type = PERF_TYPE_HARDWARE; 505 + pea.size = sizeof(struct perf_event_attr); 506 + pea.config = PERF_COUNT_HW_INSTRUCTIONS; 507 + 508 + /* counter for cpu_num, including user + kernel and all processes */ 509 + fd = perf_event_open(&pea, -1, cpu_num, -1, 0); 510 + if (fd == -1) 511 + err(-1, "cpu%d: perf instruction counter\n", cpu_num); 512 + 513 + return fd; 514 + } 515 + 516 + int get_instr_count_fd(int cpu) 517 + { 518 + if (fd_instr_count_percpu[cpu]) 519 + return fd_instr_count_percpu[cpu]; 520 + 521 + fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu); 522 + 523 + return fd_instr_count_percpu[cpu]; 506 524 } 507 525 508 526 int get_msr(int cpu, off_t offset, unsigned long long *msr) ··· 566 518 { 0x0, "Bzy_MHz" }, 567 519 { 0x0, "TSC_MHz" }, 568 520 { 0x0, "IRQ" }, 569 - { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL}, 521 + { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL }, 570 522 { 0x0, "sysfs" }, 571 523 { 0x0, "CPU%c1" }, 572 524 { 0x0, "CPU%c3" }, ··· 609 561 { 0x0, "X2APIC" }, 610 562 { 0x0, "Die" }, 611 563 { 0x0, "GFXAMHz" }, 564 + { 0x0, "IPC" }, 612 565 }; 613 566 614 567 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) ··· 665 616 #define BIC_X2APIC (1ULL << 49) 666 617 #define BIC_Die (1ULL << 50) 667 618 #define BIC_GFXACTMHz (1ULL << 51) 619 + #define BIC_IPC (1ULL << 52) 620 + 621 + #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) 622 + #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) 623 + #define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz ) 624 + #define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX) 625 + #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) 668 626 669 627 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) 670 628 ··· 683 627 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) 684 628 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) 685 629 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) 686 - 630 + #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) 687 631 688 632 #define MAX_DEFERRED 16 689 633 char *deferred_skip_names[MAX_DEFERRED]; ··· 698 642 void help(void) 699 643 { 700 644 fprintf(outf, 701 - "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 702 - "\n" 703 - "Turbostat forks the specified COMMAND and prints statistics\n" 704 - "when COMMAND completes.\n" 705 - "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 706 - "to print statistics, until interrupted.\n" 707 - " -a, --add add a counter\n" 708 - " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 709 - " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" 710 - " {core | package | j,k,l..m,n-p }\n" 711 - " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" 712 - " -D, --Dump displays the raw counter values\n" 713 - " -e, --enable [all | column]\n" 714 - " shows all or the specified disabled column\n" 715 - " -H, --hide [column|column,column,...]\n" 716 - " hide the specified column(s)\n" 717 - " -i, --interval sec.subsec\n" 718 - " Override default 5-second measurement interval\n" 719 - " -J, --Joules displays energy in Joules instead of Watts\n" 720 - " -l, --list list column headers only\n" 721 - " -n, --num_iterations num\n" 722 - " number of the measurement iterations\n" 723 - " -o, --out file\n" 724 - " create or truncate \"file\" for all output\n" 725 - " -q, --quiet skip decoding system configuration header\n" 726 - " -s, --show [column|column,column,...]\n" 727 - " show only the specified column(s)\n" 728 - " -S, --Summary\n" 729 - " limits output to 1-line system summary per interval\n" 730 - " -T, --TCC temperature\n" 731 - " sets the Thermal Control Circuit temperature in\n" 732 - " degrees Celsius\n" 733 - " -h, --help print this help message\n" 734 - " -v, --version print version information\n" 735 - "\n" 736 - "For more help, run \"man turbostat\"\n"); 645 + "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 646 + "\n" 647 + "Turbostat forks the specified COMMAND and prints statistics\n" 648 + "when COMMAND completes.\n" 649 + "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 650 + "to print statistics, until interrupted.\n" 651 + " -a, --add add a counter\n" 652 + " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 653 + " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" 654 + " {core | package | j,k,l..m,n-p }\n" 655 + " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" 656 + " -D, --Dump displays the raw counter values\n" 657 + " -e, --enable [all | column]\n" 658 + " shows all or the specified disabled column\n" 659 + " -H, --hide [column|column,column,...]\n" 660 + " hide the specified column(s)\n" 661 + " -i, --interval sec.subsec\n" 662 + " Override default 5-second measurement interval\n" 663 + " -J, --Joules displays energy in Joules instead of Watts\n" 664 + " -l, --list list column headers only\n" 665 + " -n, --num_iterations num\n" 666 + " number of the measurement iterations\n" 667 + " -o, --out file\n" 668 + " create or truncate \"file\" for all output\n" 669 + " -q, --quiet skip decoding system configuration header\n" 670 + " -s, --show [column|column,column,...]\n" 671 + " show only the specified column(s)\n" 672 + " -S, --Summary\n" 673 + " limits output to 1-line system summary per interval\n" 674 + " -T, --TCC temperature\n" 675 + " sets the Thermal Control Circuit temperature in\n" 676 + " degrees Celsius\n" 677 + " -h, --help print this help message\n" 678 + " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); 737 679 } 738 680 739 681 /* ··· 754 700 755 701 if (!strcmp(name_list, "all")) 756 702 return ~0; 703 + if (!strcmp(name_list, "topology")) 704 + return BIC_TOPOLOGY; 705 + if (!strcmp(name_list, "power")) 706 + return BIC_THERMAL_PWR; 707 + if (!strcmp(name_list, "idle")) 708 + return BIC_IDLE; 709 + if (!strcmp(name_list, "frequency")) 710 + return BIC_FREQUENCY; 711 + if (!strcmp(name_list, "other")) 712 + return BIC_OTHER; 713 + if (!strcmp(name_list, "all")) 714 + return 0; 757 715 758 716 for (i = 0; i < MAX_BIC; ++i) { 759 717 if (!strcmp(name_list, bic[i].name)) { ··· 796 730 } 797 731 return retval; 798 732 } 799 - 800 733 801 734 void print_header(char *delim) 802 735 { ··· 828 763 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : "")); 829 764 if (DO_BIC(BIC_TSC_MHz)) 830 765 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); 766 + 767 + if (DO_BIC(BIC_IPC)) 768 + outp += sprintf(outp, "%sIPC", (printed++ ? delim : "")); 831 769 832 770 if (DO_BIC(BIC_IRQ)) { 833 771 if (sums_need_wide_columns) ··· 978 910 outp += sprintf(outp, "\n"); 979 911 } 980 912 981 - int dump_counters(struct thread_data *t, struct core_data *c, 982 - struct pkg_data *p) 913 + int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 983 914 { 984 915 int i; 985 916 struct msr_counter *mp; ··· 986 919 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); 987 920 988 921 if (t) { 989 - outp += sprintf(outp, "CPU: %d flags 0x%x\n", 990 - t->cpu_id, t->flags); 922 + outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 991 923 outp += sprintf(outp, "TSC: %016llX\n", t->tsc); 992 924 outp += sprintf(outp, "aperf: %016llX\n", t->aperf); 993 925 outp += sprintf(outp, "mperf: %016llX\n", t->mperf); 994 926 outp += sprintf(outp, "c1: %016llX\n", t->c1); 927 + 928 + if (DO_BIC(BIC_IPC)) 929 + outp += sprintf(outp, "IPC: %lld\n", t->instr_count); 995 930 996 931 if (DO_BIC(BIC_IRQ)) 997 932 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); ··· 1001 932 outp += sprintf(outp, "SMI: %d\n", t->smi_count); 1002 933 1003 934 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 1004 - outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", 1005 - i, mp->msr_num, t->counter[i]); 935 + outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]); 1006 936 } 1007 937 } 1008 938 ··· 1014 946 outp += sprintf(outp, "Joules: %0X\n", c->core_energy); 1015 947 1016 948 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 1017 - outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", 1018 - i, mp->msr_num, c->counter[i]); 949 + outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]); 1019 950 } 1020 951 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); 1021 952 } ··· 1043 976 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores); 1044 977 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx); 1045 978 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram); 1046 - outp += sprintf(outp, "Throttle PKG: %0llX\n", 1047 - p->rapl_pkg_perf_status); 1048 - outp += sprintf(outp, "Throttle RAM: %0llX\n", 1049 - p->rapl_dram_perf_status); 979 + outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status); 980 + outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status); 1050 981 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); 1051 982 1052 983 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 1053 - outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", 1054 - i, mp->msr_num, p->counter[i]); 984 + outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]); 1055 985 } 1056 986 } 1057 987 ··· 1060 996 /* 1061 997 * column formatting convention & formats 1062 998 */ 1063 - int format_counters(struct thread_data *t, struct core_data *c, 1064 - struct pkg_data *p) 999 + int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1065 1000 { 1066 1001 double interval_float, tsc; 1067 1002 char *fmt8; ··· 1069 1006 char *delim = "\t"; 1070 1007 int printed = 0; 1071 1008 1072 - /* if showing only 1st thread in core and this isn't one, bail out */ 1009 + /* if showing only 1st thread in core and this isn't one, bail out */ 1073 1010 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 1074 1011 return 0; 1075 1012 1076 - /* if showing only 1st thread in pkg and this isn't one, bail out */ 1013 + /* if showing only 1st thread in pkg and this isn't one, bail out */ 1077 1014 if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 1078 1015 return 0; 1079 1016 1080 1017 /*if not summary line and --cpu is used */ 1081 - if ((t != &average.threads) && 1082 - (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) 1018 + if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) 1083 1019 return 0; 1084 1020 1085 1021 if (DO_BIC(BIC_USEC)) { ··· 1093 1031 if (DO_BIC(BIC_TOD)) 1094 1032 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); 1095 1033 1096 - interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec/1000000.0; 1034 + interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0; 1097 1035 1098 1036 tsc = t->tsc * tsc_tweak; 1099 1037 ··· 1129 1067 if (DO_BIC(BIC_Node)) { 1130 1068 if (t) 1131 1069 outp += sprintf(outp, "%s%d", 1132 - (printed++ ? delim : ""), 1133 - cpus[t->cpu_id].physical_node_id); 1070 + (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); 1134 1071 else 1135 - outp += sprintf(outp, "%s-", 1136 - (printed++ ? delim : "")); 1072 + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 1137 1073 } 1138 1074 if (DO_BIC(BIC_Core)) { 1139 1075 if (c) ··· 1148 1088 } 1149 1089 1150 1090 if (DO_BIC(BIC_Avg_MHz)) 1151 - outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1152 - 1.0 / units * t->aperf / interval_float); 1091 + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); 1153 1092 1154 1093 if (DO_BIC(BIC_Busy)) 1155 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc); 1094 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); 1156 1095 1157 1096 if (DO_BIC(BIC_Bzy_MHz)) { 1158 1097 if (has_base_hz) 1159 - outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); 1098 + outp += 1099 + sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); 1160 1100 else 1161 1101 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1162 - tsc / units * t->aperf / t->mperf / interval_float); 1102 + tsc / units * t->aperf / t->mperf / interval_float); 1163 1103 } 1164 1104 1165 1105 if (DO_BIC(BIC_TSC_MHz)) 1166 - outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float); 1106 + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float); 1107 + 1108 + if (DO_BIC(BIC_IPC)) 1109 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); 1167 1110 1168 1111 /* IRQ */ 1169 1112 if (DO_BIC(BIC_IRQ)) { ··· 1184 1121 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 1185 1122 if (mp->format == FORMAT_RAW) { 1186 1123 if (mp->width == 32) 1187 - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]); 1124 + outp += 1125 + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); 1188 1126 else 1189 1127 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); 1190 1128 } else if (mp->format == FORMAT_DELTA) { ··· 1195 1131 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); 1196 1132 } else if (mp->format == FORMAT_PERCENT) { 1197 1133 if (mp->type == COUNTER_USEC) 1198 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000); 1134 + outp += 1135 + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1136 + t->counter[i] / interval_float / 10000); 1199 1137 else 1200 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc); 1138 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); 1201 1139 } 1202 1140 } 1203 1141 1204 1142 /* C1 */ 1205 1143 if (DO_BIC(BIC_CPU_c1)) 1206 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc); 1207 - 1144 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); 1208 1145 1209 1146 /* print per-core data only for 1st thread in core */ 1210 1147 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 1211 1148 goto done; 1212 1149 1213 1150 if (DO_BIC(BIC_CPU_c3)) 1214 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); 1151 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); 1215 1152 if (DO_BIC(BIC_CPU_c6)) 1216 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); 1153 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); 1217 1154 if (DO_BIC(BIC_CPU_c7)) 1218 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc); 1155 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); 1219 1156 1220 1157 /* Mod%c6 */ 1221 1158 if (DO_BIC(BIC_Mod_c6)) ··· 1228 1163 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 1229 1164 if (mp->format == FORMAT_RAW) { 1230 1165 if (mp->width == 32) 1231 - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]); 1166 + outp += 1167 + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); 1232 1168 else 1233 1169 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); 1234 1170 } else if (mp->format == FORMAT_DELTA) { ··· 1238 1172 else 1239 1173 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); 1240 1174 } else if (mp->format == FORMAT_PERCENT) { 1241 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc); 1175 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); 1242 1176 } 1243 1177 } 1244 1178 1245 1179 fmt8 = "%s%.2f"; 1246 1180 1247 1181 if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) 1248 - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); 1182 + outp += 1183 + sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); 1249 1184 if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) 1250 1185 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units); 1251 1186 ··· 1264 1197 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 1265 1198 } else { 1266 1199 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1267 - p->gfx_rc6_ms / 10.0 / interval_float); 1200 + p->gfx_rc6_ms / 10.0 / interval_float); 1268 1201 } 1269 1202 } 1270 1203 ··· 1278 1211 1279 1212 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 1280 1213 if (DO_BIC(BIC_Totl_c0)) 1281 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc); 1214 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); 1282 1215 if (DO_BIC(BIC_Any_c0)) 1283 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc); 1216 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); 1284 1217 if (DO_BIC(BIC_GFX_c0)) 1285 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc); 1218 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); 1286 1219 if (DO_BIC(BIC_CPUGFX)) 1287 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc); 1220 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); 1288 1221 1289 1222 if (DO_BIC(BIC_Pkgpc2)) 1290 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc); 1223 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); 1291 1224 if (DO_BIC(BIC_Pkgpc3)) 1292 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc); 1225 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); 1293 1226 if (DO_BIC(BIC_Pkgpc6)) 1294 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc); 1227 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); 1295 1228 if (DO_BIC(BIC_Pkgpc7)) 1296 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc); 1229 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); 1297 1230 if (DO_BIC(BIC_Pkgpc8)) 1298 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc); 1231 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); 1299 1232 if (DO_BIC(BIC_Pkgpc9)) 1300 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc); 1233 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); 1301 1234 if (DO_BIC(BIC_Pkgpc10)) 1302 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc); 1235 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); 1303 1236 1304 1237 if (DO_BIC(BIC_CPU_LPI)) 1305 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float); 1238 + outp += 1239 + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float); 1306 1240 if (DO_BIC(BIC_SYS_LPI)) 1307 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); 1241 + outp += 1242 + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); 1308 1243 1309 1244 if (DO_BIC(BIC_PkgWatt)) 1310 - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); 1245 + outp += 1246 + sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); 1311 1247 if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) 1312 - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); 1248 + outp += 1249 + sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); 1313 1250 if (DO_BIC(BIC_GFXWatt)) 1314 - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float); 1251 + outp += 1252 + sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float); 1315 1253 if (DO_BIC(BIC_RAMWatt)) 1316 - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float); 1254 + outp += 1255 + sprintf(outp, fmt8, (printed++ ? delim : ""), 1256 + p->energy_dram * rapl_dram_energy_units / interval_float); 1317 1257 if (DO_BIC(BIC_Pkg_J)) 1318 1258 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units); 1319 1259 if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) ··· 1330 1256 if (DO_BIC(BIC_RAM_J)) 1331 1257 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units); 1332 1258 if (DO_BIC(BIC_PKG__)) 1333 - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); 1259 + outp += 1260 + sprintf(outp, fmt8, (printed++ ? delim : ""), 1261 + 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); 1334 1262 if (DO_BIC(BIC_RAM__)) 1335 - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); 1263 + outp += 1264 + sprintf(outp, fmt8, (printed++ ? delim : ""), 1265 + 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); 1336 1266 1337 1267 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 1338 1268 if (mp->format == FORMAT_RAW) { 1339 1269 if (mp->width == 32) 1340 - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]); 1270 + outp += 1271 + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); 1341 1272 else 1342 1273 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); 1343 1274 } else if (mp->format == FORMAT_DELTA) { ··· 1351 1272 else 1352 1273 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); 1353 1274 } else if (mp->format == FORMAT_PERCENT) { 1354 - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc); 1275 + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); 1355 1276 } 1356 1277 } 1357 1278 ··· 1376 1297 1377 1298 outp = output_buffer; 1378 1299 } 1300 + 1379 1301 void flush_output_stderr(void) 1380 1302 { 1381 1303 fputs(output_buffer, outf); 1382 1304 fflush(outf); 1383 1305 outp = output_buffer; 1384 1306 } 1307 + 1385 1308 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1386 1309 { 1387 1310 static int printed; ··· 1404 1323 #define DELTA_WRAP32(new, old) \ 1405 1324 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); 1406 1325 1407 - int 1408 - delta_package(struct pkg_data *new, struct pkg_data *old) 1326 + int delta_package(struct pkg_data *new, struct pkg_data *old) 1409 1327 { 1410 1328 int i; 1411 1329 struct msr_counter *mp; 1412 - 1413 1330 1414 1331 if (DO_BIC(BIC_Totl_c0)) 1415 1332 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; ··· 1433 1354 old->pkg_temp_c = new->pkg_temp_c; 1434 1355 1435 1356 /* flag an error when rc6 counter resets/wraps */ 1436 - if (old->gfx_rc6_ms > new->gfx_rc6_ms) 1357 + if (old->gfx_rc6_ms > new->gfx_rc6_ms) 1437 1358 old->gfx_rc6_ms = -1; 1438 1359 else 1439 1360 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; ··· 1458 1379 return 0; 1459 1380 } 1460 1381 1461 - void 1462 - delta_core(struct core_data *new, struct core_data *old) 1382 + void delta_core(struct core_data *new, struct core_data *old) 1463 1383 { 1464 1384 int i; 1465 1385 struct msr_counter *mp; ··· 1490 1412 /* 1491 1413 * old = new - old 1492 1414 */ 1493 - int 1494 - delta_thread(struct thread_data *new, struct thread_data *old, 1495 - struct core_data *core_delta) 1415 + int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) 1496 1416 { 1497 1417 int i; 1498 1418 struct msr_counter *mp; ··· 1521 1445 1522 1446 old->c1 = new->c1 - old->c1; 1523 1447 1524 - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || 1525 - soft_c1_residency_display(BIC_Avg_MHz)) { 1448 + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) { 1526 1449 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 1527 1450 old->aperf = new->aperf - old->aperf; 1528 1451 old->mperf = new->mperf - old->mperf; ··· 1529 1454 return -1; 1530 1455 } 1531 1456 } 1532 - 1533 1457 1534 1458 if (use_c1_residency_msr) { 1535 1459 /* ··· 1546 1472 else { 1547 1473 /* normal case, derive c1 */ 1548 1474 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 1549 - - core_delta->c6 - core_delta->c7; 1475 + - core_delta->c6 - core_delta->c7; 1550 1476 } 1551 1477 } 1552 1478 ··· 1555 1481 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); 1556 1482 old->mperf = 1; /* divide by 0 protection */ 1557 1483 } 1484 + 1485 + if (DO_BIC(BIC_IPC)) 1486 + old->instr_count = new->instr_count - old->instr_count; 1558 1487 1559 1488 if (DO_BIC(BIC_IRQ)) 1560 1489 old->irq_count = new->irq_count - old->irq_count; ··· 1575 1498 } 1576 1499 1577 1500 int delta_cpu(struct thread_data *t, struct core_data *c, 1578 - struct pkg_data *p, struct thread_data *t2, 1579 - struct core_data *c2, struct pkg_data *p2) 1501 + struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) 1580 1502 { 1581 1503 int retval = 0; 1582 1504 ··· 1598 1522 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1599 1523 { 1600 1524 int i; 1601 - struct msr_counter *mp; 1525 + struct msr_counter *mp; 1602 1526 1603 1527 t->tv_begin.tv_sec = 0; 1604 1528 t->tv_begin.tv_usec = 0; ··· 1611 1535 t->aperf = 0; 1612 1536 t->mperf = 0; 1613 1537 t->c1 = 0; 1538 + 1539 + t->instr_count = 0; 1614 1540 1615 1541 t->irq_count = 0; 1616 1542 t->smi_count = 0; ··· 1665 1587 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) 1666 1588 p->counter[i] = 0; 1667 1589 } 1668 - int sum_counters(struct thread_data *t, struct core_data *c, 1669 - struct pkg_data *p) 1590 + 1591 + int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1670 1592 { 1671 1593 int i; 1672 1594 struct msr_counter *mp; ··· 1688 1610 average.threads.aperf += t->aperf; 1689 1611 average.threads.mperf += t->mperf; 1690 1612 average.threads.c1 += t->c1; 1613 + 1614 + average.threads.instr_count += t->instr_count; 1691 1615 1692 1616 average.threads.irq_count += t->irq_count; 1693 1617 average.threads.smi_count += t->smi_count; ··· 1767 1687 } 1768 1688 return 0; 1769 1689 } 1690 + 1770 1691 /* 1771 1692 * sum the counters for all cpus in the system 1772 1693 * compute the weighted average 1773 1694 */ 1774 - void compute_average(struct thread_data *t, struct core_data *c, 1775 - struct pkg_data *p) 1695 + void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1776 1696 { 1777 1697 int i; 1778 1698 struct msr_counter *mp; ··· 1787 1707 average.threads.tsc /= topo.num_cpus; 1788 1708 average.threads.aperf /= topo.num_cpus; 1789 1709 average.threads.mperf /= topo.num_cpus; 1710 + average.threads.instr_count /= topo.num_cpus; 1790 1711 average.threads.c1 /= topo.num_cpus; 1791 1712 1792 1713 if (average.threads.irq_count > 9999999) ··· 1853 1772 { 1854 1773 unsigned int low, high; 1855 1774 1856 - asm volatile("rdtsc" : "=a" (low), "=d" (high)); 1775 + asm volatile ("rdtsc":"=a" (low), "=d"(high)); 1857 1776 1858 1777 return low | ((unsigned long long)high) << 32; 1859 1778 } ··· 1869 1788 err(1, "%s: open failed", path); 1870 1789 return filep; 1871 1790 } 1791 + 1872 1792 /* 1873 1793 * snapshot_sysfs_counter() 1874 1794 * ··· 1901 1819 char path[128 + PATH_BYTES]; 1902 1820 1903 1821 if (mp->flags & SYSFS_PERCPU) { 1904 - sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", 1905 - cpu, mp->path); 1822 + sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path); 1906 1823 1907 1824 *counterp = snapshot_sysfs_counter(path); 1908 1825 } else { ··· 1961 1880 1962 1881 eax = ebx = ecx = edx = 0; 1963 1882 __cpuid(0x80000001, eax, ebx, ecx, edx); 1964 - topology_extensions = ecx & (1 << 22); 1883 + topology_extensions = ecx & (1 << 22); 1965 1884 1966 1885 if (topology_extensions == 0) 1967 1886 return; ··· 1984 1903 t->x2apic_id = edx; 1985 1904 1986 1905 if (debug && (t->apic_id != (t->x2apic_id & 0xff))) 1987 - fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", 1988 - t->cpu_id, t->apic_id, t->x2apic_id); 1906 + fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 1989 1907 } 1990 1908 1991 1909 /* ··· 2012 1932 retry: 2013 1933 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 2014 1934 2015 - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || 2016 - soft_c1_residency_display(BIC_Avg_MHz)) { 1935 + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) { 2017 1936 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; 2018 1937 2019 1938 /* ··· 2059 1980 if (aperf_mperf_retry_count < 5) 2060 1981 goto retry; 2061 1982 else 2062 - warnx("cpu%d jitter %lld %lld", 2063 - cpu, aperf_time, mperf_time); 1983 + warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); 2064 1984 } 2065 1985 aperf_mperf_retry_count = 0; 2066 1986 2067 1987 t->aperf = t->aperf * aperf_mperf_multiplier; 2068 1988 t->mperf = t->mperf * aperf_mperf_multiplier; 2069 1989 } 1990 + 1991 + if (DO_BIC(BIC_IPC)) 1992 + if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) 1993 + return -4; 2070 1994 2071 1995 if (DO_BIC(BIC_IRQ)) 2072 1996 t->irq_count = irqs_per_cpu[cpu]; ··· 2105 2023 return -7; 2106 2024 } 2107 2025 2108 - if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) 2026 + if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) { 2109 2027 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) 2110 2028 return -8; 2029 + else if (t->is_atom) { 2030 + /* 2031 + * For Atom CPUs that has core cstate deeper than c6, 2032 + * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper. 2033 + * Minus CC7 (and deeper cstates) residency to get 2034 + * accturate cc6 residency. 2035 + */ 2036 + c->c6 -= c->c7; 2037 + } 2038 + } 2111 2039 2112 2040 if (DO_BIC(BIC_Mod_c6)) 2113 2041 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) ··· 2126 2034 if (DO_BIC(BIC_CoreTmp)) { 2127 2035 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 2128 2036 return -9; 2129 - c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); 2037 + c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); 2130 2038 } 2131 2039 2132 2040 if (do_rapl & RAPL_AMD_F17H) { ··· 2232 2140 if (DO_BIC(BIC_PkgTmp)) { 2233 2141 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 2234 2142 return -17; 2235 - p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); 2143 + p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); 2236 2144 } 2237 2145 2238 2146 if (DO_BIC(BIC_GFX_rc6)) ··· 2260 2168 * (>= PCL__7) and to index pkg_cstate_limit_strings[]. 2261 2169 */ 2262 2170 2263 - #define PCLUKN 0 /* Unknown */ 2264 - #define PCLRSV 1 /* Reserved */ 2265 - #define PCL__0 2 /* PC0 */ 2266 - #define PCL__1 3 /* PC1 */ 2267 - #define PCL__2 4 /* PC2 */ 2268 - #define PCL__3 5 /* PC3 */ 2269 - #define PCL__4 6 /* PC4 */ 2270 - #define PCL__6 7 /* PC6 */ 2271 - #define PCL_6N 8 /* PC6 No Retention */ 2272 - #define PCL_6R 9 /* PC6 Retention */ 2273 - #define PCL__7 10 /* PC7 */ 2274 - #define PCL_7S 11 /* PC7 Shrink */ 2275 - #define PCL__8 12 /* PC8 */ 2276 - #define PCL__9 13 /* PC9 */ 2277 - #define PCL_10 14 /* PC10 */ 2278 - #define PCLUNL 15 /* Unlimited */ 2171 + #define PCLUKN 0 /* Unknown */ 2172 + #define PCLRSV 1 /* Reserved */ 2173 + #define PCL__0 2 /* PC0 */ 2174 + #define PCL__1 3 /* PC1 */ 2175 + #define PCL__2 4 /* PC2 */ 2176 + #define PCL__3 5 /* PC3 */ 2177 + #define PCL__4 6 /* PC4 */ 2178 + #define PCL__6 7 /* PC6 */ 2179 + #define PCL_6N 8 /* PC6 No Retention */ 2180 + #define PCL_6R 9 /* PC6 Retention */ 2181 + #define PCL__7 10 /* PC7 */ 2182 + #define PCL_7S 11 /* PC7 Shrink */ 2183 + #define PCL__8 12 /* PC8 */ 2184 + #define PCL__9 13 /* PC9 */ 2185 + #define PCL_10 14 /* PC10 */ 2186 + #define PCLUNL 15 /* Unlimited */ 2279 2187 2280 2188 int pkg_cstate_limit = PCLUKN; 2281 2189 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", 2282 - "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"}; 2190 + "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" 2191 + }; 2283 2192 2284 - int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 2285 - int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 2286 - int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 2287 - int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7}; 2288 - int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 2289 - int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 2290 - int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 2291 - int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 2193 + int nhm_pkg_cstate_limits[16] = 2194 + { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 2195 + PCLRSV, PCLRSV 2196 + }; 2292 2197 2198 + int snb_pkg_cstate_limits[16] = 2199 + { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 2200 + PCLRSV, PCLRSV 2201 + }; 2293 2202 2294 - static void 2295 - calculate_tsc_tweak() 2203 + int hsw_pkg_cstate_limits[16] = 2204 + { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 2205 + PCLRSV, PCLRSV 2206 + }; 2207 + 2208 + int slv_pkg_cstate_limits[16] = 2209 + { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 2210 + PCL__6, PCL__7 2211 + }; 2212 + 2213 + int amt_pkg_cstate_limits[16] = 2214 + { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 2215 + PCLRSV, PCLRSV 2216 + }; 2217 + 2218 + int phi_pkg_cstate_limits[16] = 2219 + { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 2220 + PCLRSV, PCLRSV 2221 + }; 2222 + 2223 + int glm_pkg_cstate_limits[16] = 2224 + { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 2225 + PCLRSV, PCLRSV 2226 + }; 2227 + 2228 + int skx_pkg_cstate_limits[16] = 2229 + { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 2230 + PCLRSV, PCLRSV 2231 + }; 2232 + 2233 + int icx_pkg_cstate_limits[16] = 2234 + { PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 2235 + PCLRSV, PCLRSV 2236 + }; 2237 + 2238 + static void calculate_tsc_tweak() 2296 2239 { 2297 2240 tsc_tweak = base_hz / tsc_hz; 2298 2241 } 2299 2242 2300 - static void 2301 - dump_nhm_platform_info(void) 2243 + void prewake_cstate_probe(unsigned int family, unsigned int model); 2244 + 2245 + static void dump_nhm_platform_info(void) 2302 2246 { 2303 2247 unsigned long long msr; 2304 2248 unsigned int ratio; ··· 2344 2216 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 2345 2217 2346 2218 ratio = (msr >> 40) & 0xFF; 2347 - fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", 2348 - ratio, bclk, ratio * bclk); 2219 + fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); 2349 2220 2350 2221 ratio = (msr >> 8) & 0xFF; 2351 - fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", 2352 - ratio, bclk, ratio * bclk); 2222 + fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 2353 2223 2354 2224 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 2355 2225 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 2356 2226 base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); 2357 2227 2228 + /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ 2229 + if (dis_cstate_prewake) 2230 + fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); 2231 + 2358 2232 return; 2359 2233 } 2360 2234 2361 - static void 2362 - dump_hsw_turbo_ratio_limits(void) 2235 + static void dump_hsw_turbo_ratio_limits(void) 2363 2236 { 2364 2237 unsigned long long msr; 2365 2238 unsigned int ratio; ··· 2371 2242 2372 2243 ratio = (msr >> 8) & 0xFF; 2373 2244 if (ratio) 2374 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", 2375 - ratio, bclk, ratio * bclk); 2245 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); 2376 2246 2377 2247 ratio = (msr >> 0) & 0xFF; 2378 2248 if (ratio) 2379 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", 2380 - ratio, bclk, ratio * bclk); 2249 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); 2381 2250 return; 2382 2251 } 2383 2252 2384 - static void 2385 - dump_ivt_turbo_ratio_limits(void) 2253 + static void dump_ivt_turbo_ratio_limits(void) 2386 2254 { 2387 2255 unsigned long long msr; 2388 2256 unsigned int ratio; ··· 2390 2264 2391 2265 ratio = (msr >> 56) & 0xFF; 2392 2266 if (ratio) 2393 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", 2394 - ratio, bclk, ratio * bclk); 2267 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); 2395 2268 2396 2269 ratio = (msr >> 48) & 0xFF; 2397 2270 if (ratio) 2398 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", 2399 - ratio, bclk, ratio * bclk); 2271 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); 2400 2272 2401 2273 ratio = (msr >> 40) & 0xFF; 2402 2274 if (ratio) 2403 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", 2404 - ratio, bclk, ratio * bclk); 2275 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); 2405 2276 2406 2277 ratio = (msr >> 32) & 0xFF; 2407 2278 if (ratio) 2408 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", 2409 - ratio, bclk, ratio * bclk); 2279 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); 2410 2280 2411 2281 ratio = (msr >> 24) & 0xFF; 2412 2282 if (ratio) 2413 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", 2414 - ratio, bclk, ratio * bclk); 2283 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); 2415 2284 2416 2285 ratio = (msr >> 16) & 0xFF; 2417 2286 if (ratio) 2418 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", 2419 - ratio, bclk, ratio * bclk); 2287 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); 2420 2288 2421 2289 ratio = (msr >> 8) & 0xFF; 2422 2290 if (ratio) 2423 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", 2424 - ratio, bclk, ratio * bclk); 2291 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); 2425 2292 2426 2293 ratio = (msr >> 0) & 0xFF; 2427 2294 if (ratio) 2428 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", 2429 - ratio, bclk, ratio * bclk); 2295 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); 2430 2296 return; 2431 2297 } 2298 + 2432 2299 int has_turbo_ratio_group_limits(int family, int model) 2433 2300 { 2434 2301 ··· 2431 2312 switch (model) { 2432 2313 case INTEL_FAM6_ATOM_GOLDMONT: 2433 2314 case INTEL_FAM6_SKYLAKE_X: 2315 + case INTEL_FAM6_ICELAKE_X: 2434 2316 case INTEL_FAM6_ATOM_GOLDMONT_D: 2435 2317 case INTEL_FAM6_ATOM_TREMONT_D: 2436 2318 return 1; ··· 2439 2319 return 0; 2440 2320 } 2441 2321 2442 - static void 2443 - dump_turbo_ratio_limits(int family, int model) 2322 + static void dump_turbo_ratio_limits(int family, int model) 2444 2323 { 2445 2324 unsigned long long msr, core_counts; 2446 2325 unsigned int ratio, group_size; ··· 2504 2385 return; 2505 2386 } 2506 2387 2507 - static void 2508 - dump_atom_turbo_ratio_limits(void) 2388 + static void dump_atom_turbo_ratio_limits(void) 2509 2389 { 2510 2390 unsigned long long msr; 2511 2391 unsigned int ratio; ··· 2514 2396 2515 2397 ratio = (msr >> 0) & 0x3F; 2516 2398 if (ratio) 2517 - fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", 2518 - ratio, bclk, ratio * bclk); 2399 + fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk); 2519 2400 2520 2401 ratio = (msr >> 8) & 0x3F; 2521 2402 if (ratio) 2522 - fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", 2523 - ratio, bclk, ratio * bclk); 2403 + fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk); 2524 2404 2525 2405 ratio = (msr >> 16) & 0x3F; 2526 2406 if (ratio) 2527 - fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", 2528 - ratio, bclk, ratio * bclk); 2407 + fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 2529 2408 2530 2409 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); 2531 2410 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 2532 2411 2533 2412 ratio = (msr >> 24) & 0x3F; 2534 2413 if (ratio) 2535 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", 2536 - ratio, bclk, ratio * bclk); 2414 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); 2537 2415 2538 2416 ratio = (msr >> 16) & 0x3F; 2539 2417 if (ratio) 2540 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", 2541 - ratio, bclk, ratio * bclk); 2418 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); 2542 2419 2543 2420 ratio = (msr >> 8) & 0x3F; 2544 2421 if (ratio) 2545 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", 2546 - ratio, bclk, ratio * bclk); 2422 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); 2547 2423 2548 2424 ratio = (msr >> 0) & 0x3F; 2549 2425 if (ratio) 2550 - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", 2551 - ratio, bclk, ratio * bclk); 2426 + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk); 2552 2427 } 2553 2428 2554 - static void 2555 - dump_knl_turbo_ratio_limits(void) 2429 + static void dump_knl_turbo_ratio_limits(void) 2556 2430 { 2557 2431 const unsigned int buckets_no = 7; 2558 2432 ··· 2556 2446 2557 2447 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 2558 2448 2559 - fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", 2560 - base_cpu, msr); 2449 + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 2561 2450 2562 2451 /* 2563 2452 * Turbo encoding in KNL is as follows: ··· 2601 2492 ratio[i], bclk, ratio[i] * bclk, cores[i]); 2602 2493 } 2603 2494 2604 - static void 2605 - dump_nhm_cst_cfg(void) 2495 + static void dump_nhm_cst_cfg(void) 2606 2496 { 2607 2497 unsigned long long msr; 2608 2498 ··· 2614 2506 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 2615 2507 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 2616 2508 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 2617 - (msr & (1 << 15)) ? "" : "UN", 2618 - (unsigned int)msr & 0xF, 2619 - pkg_cstate_limit_strings[pkg_cstate_limit]); 2509 + (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); 2620 2510 2621 2511 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) 2622 2512 if (has_automatic_cstate_conversion) { 2623 - fprintf(outf, ", automatic c-state conversion=%s", 2624 - (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); 2513 + fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); 2625 2514 } 2626 2515 2627 2516 fprintf(outf, ")\n"); ··· 2626 2521 return; 2627 2522 } 2628 2523 2629 - static void 2630 - dump_config_tdp(void) 2524 + static void dump_config_tdp(void) 2631 2525 { 2632 2526 unsigned long long msr; 2633 2527 ··· 2668 2564 fprintf(outf, ")\n"); 2669 2565 } 2670 2566 2671 - unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 2567 + unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 2672 2568 2673 2569 void print_irtl(void) 2674 2570 { ··· 2708 2604 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 2709 2605 2710 2606 } 2607 + 2711 2608 void free_fd_percpu(void) 2712 2609 { 2713 2610 int i; ··· 2764 2659 } 2765 2660 free(cpus); 2766 2661 } 2767 - 2768 2662 2769 2663 /* 2770 2664 * Parse a file containing a single int. ··· 2839 2735 * the logical_node_id 2840 2736 */ 2841 2737 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { 2842 - if ((cpus[cpux].physical_package_id == pkg) && 2843 - (cpus[cpux].physical_node_id == node)) { 2738 + if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) { 2844 2739 cpus[cpux].logical_node_id = lnode; 2845 2740 cpu_count++; 2846 2741 } ··· 2861 2758 int cpu = thiscpu->logical_cpu_id; 2862 2759 2863 2760 for (i = 0; i <= topo.max_cpu_num; i++) { 2864 - sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", 2865 - cpu, i); 2761 + sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i); 2866 2762 filep = fopen(path, "r"); 2867 2763 if (!filep) 2868 2764 continue; ··· 2891 2789 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 2892 2790 CPU_ZERO_S(size, thiscpu->put_ids); 2893 2791 2894 - sprintf(path, 2895 - "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); 2792 + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); 2896 2793 filep = fopen(path, "r"); 2897 2794 2898 2795 if (!filep) { ··· 2908 2807 sib_core = get_core_id(so); 2909 2808 if (sib_core == thiscpu->physical_core_id) { 2910 2809 CPU_SET_S(so, size, thiscpu->put_ids); 2911 - if ((so != cpu) && 2912 - (cpus[so].thread_id < 0)) 2913 - cpus[so].thread_id = 2914 - thread_id++; 2810 + if ((so != cpu) && (cpus[so].thread_id < 0)) 2811 + cpus[so].thread_id = thread_id++; 2915 2812 } 2916 2813 } 2917 2814 } ··· 2924 2825 * skip non-present cpus 2925 2826 */ 2926 2827 2927 - int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, 2928 - struct pkg_data *, struct thread_data *, struct core_data *, 2929 - struct pkg_data *), struct thread_data *thread_base, 2930 - struct core_data *core_base, struct pkg_data *pkg_base, 2931 - struct thread_data *thread_base2, struct core_data *core_base2, 2932 - struct pkg_data *pkg_base2) 2828 + int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, 2829 + struct pkg_data *, struct thread_data *, struct core_data *, 2830 + struct pkg_data *), struct thread_data *thread_base, 2831 + struct core_data *core_base, struct pkg_data *pkg_base, 2832 + struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) 2933 2833 { 2934 2834 int retval, pkg_no, node_no, core_no, thread_no; 2935 2835 2936 2836 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 2937 2837 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { 2938 - for (core_no = 0; core_no < topo.cores_per_node; 2939 - ++core_no) { 2940 - for (thread_no = 0; thread_no < 2941 - topo.threads_per_core; ++thread_no) { 2838 + for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 2839 + for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 2942 2840 struct thread_data *t, *t2; 2943 2841 struct core_data *c, *c2; 2944 2842 struct pkg_data *p, *p2; 2945 2843 2946 - t = GET_THREAD(thread_base, thread_no, 2947 - core_no, node_no, 2948 - pkg_no); 2844 + t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 2949 2845 2950 2846 if (cpu_is_not_present(t->cpu_id)) 2951 2847 continue; 2952 2848 2953 - t2 = GET_THREAD(thread_base2, thread_no, 2954 - core_no, node_no, 2955 - pkg_no); 2849 + t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); 2956 2850 2957 - c = GET_CORE(core_base, core_no, 2958 - node_no, pkg_no); 2959 - c2 = GET_CORE(core_base2, core_no, 2960 - node_no, 2961 - pkg_no); 2851 + c = GET_CORE(core_base, core_no, node_no, pkg_no); 2852 + c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); 2962 2853 2963 2854 p = GET_PKG(pkg_base, pkg_no); 2964 2855 p2 = GET_PKG(pkg_base2, pkg_no); ··· 2967 2878 * run func(cpu) on every cpu in /proc/stat 2968 2879 * return max_cpu number 2969 2880 */ 2970 - int for_all_proc_cpus(int (func)(int)) 2881 + int for_all_proc_cpus(int (func) (int)) 2971 2882 { 2972 2883 FILE *fp; 2973 2884 int cpu_num; ··· 2987 2898 retval = func(cpu_num); 2988 2899 if (retval) { 2989 2900 fclose(fp); 2990 - return(retval); 2901 + return (retval); 2991 2902 } 2992 2903 } 2993 2904 fclose(fp); ··· 3011 2922 base_cpu = sched_getcpu(); 3012 2923 if (base_cpu < 0) 3013 2924 err(1, "cannot find calling cpu ID"); 3014 - sprintf(pathname, 3015 - "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", 3016 - base_cpu); 2925 + sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu); 3017 2926 3018 2927 filep = fopen_or_die(pathname, "r"); 3019 2928 topo.max_cpu_num = 0; 3020 2929 while (fscanf(filep, "%lx,", &dummy) == 1) 3021 2930 topo.max_cpu_num += BITMASK_SIZE; 3022 2931 fclose(filep); 3023 - topo.max_cpu_num--; /* 0 based */ 2932 + topo.max_cpu_num--; /* 0 based */ 3024 2933 } 3025 2934 3026 2935 /* ··· 3030 2943 topo.num_cpus++; 3031 2944 return 0; 3032 2945 } 2946 + 3033 2947 int mark_cpu_present(int cpu) 3034 2948 { 3035 2949 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); ··· 3100 3012 3101 3013 } 3102 3014 3103 - while (getc(fp) != '\n') 3104 - ; /* flush interrupt description */ 3015 + while (getc(fp) != '\n') ; /* flush interrupt description */ 3105 3016 3106 3017 } 3107 3018 return 0; 3108 3019 } 3020 + 3109 3021 /* 3110 3022 * snapshot_gfx_rc6_ms() 3111 3023 * ··· 3129 3041 3130 3042 return 0; 3131 3043 } 3044 + 3132 3045 /* 3133 3046 * snapshot_gfx_mhz() 3134 3047 * ··· 3209 3120 3210 3121 return 0; 3211 3122 } 3123 + 3212 3124 /* 3213 3125 * snapshot_sys_lpi() 3214 3126 * ··· 3233 3143 3234 3144 return 0; 3235 3145 } 3146 + 3236 3147 /* 3237 3148 * snapshot /proc and /sys files 3238 3149 * ··· 3265 3174 3266 3175 int exit_requested; 3267 3176 3268 - static void signal_handler (int signal) 3177 + static void signal_handler(int signal) 3269 3178 { 3270 3179 switch (signal) { 3271 3180 case SIGINT: ··· 3363 3272 3364 3273 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { 3365 3274 unsigned long long msr_cur, msr_last; 3366 - int offset; 3275 + off_t offset; 3367 3276 3368 3277 if (!idx_valid(i)) 3369 3278 continue; ··· 3372 3281 continue; 3373 3282 ret = get_msr(cpu, offset, &msr_cur); 3374 3283 if (ret) { 3375 - fprintf(outf, "Can not update msr(0x%x)\n", offset); 3284 + fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset); 3376 3285 continue; 3377 3286 } 3378 3287 ··· 3385 3294 return 0; 3386 3295 } 3387 3296 3388 - static void 3389 - msr_record_handler(union sigval v) 3297 + static void msr_record_handler(union sigval v) 3390 3298 { 3391 3299 for_all_cpus(update_msr_sum, EVEN_COUNTERS); 3392 3300 } ··· 3430 3340 } 3431 3341 return; 3432 3342 3433 - release_timer: 3343 + release_timer: 3434 3344 timer_delete(timerid); 3435 - release_msr: 3345 + release_msr: 3436 3346 free(per_cpu_msr_sum); 3347 + } 3348 + 3349 + /* 3350 + * set_my_sched_priority(pri) 3351 + * return previous 3352 + */ 3353 + int set_my_sched_priority(int priority) 3354 + { 3355 + int retval; 3356 + int original_priority; 3357 + 3358 + errno = 0; 3359 + original_priority = getpriority(PRIO_PROCESS, 0); 3360 + if (errno && (original_priority == -1)) 3361 + err(errno, "getpriority"); 3362 + 3363 + retval = setpriority(PRIO_PROCESS, 0, priority); 3364 + if (retval) 3365 + err(retval, "setpriority(%d)", priority); 3366 + 3367 + errno = 0; 3368 + retval = getpriority(PRIO_PROCESS, 0); 3369 + if (retval != priority) 3370 + err(-1, "getpriority(%d) != setpriority(%d)", retval, priority); 3371 + 3372 + return original_priority; 3437 3373 } 3438 3374 3439 3375 void turbostat_loop() ··· 3469 3353 int done_iters = 0; 3470 3354 3471 3355 setup_signal_handler(); 3356 + 3357 + /* 3358 + * elevate own priority for interval mode 3359 + */ 3360 + set_my_sched_priority(-20); 3472 3361 3473 3362 restart: 3474 3363 restarted++; ··· 3555 3434 3556 3435 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 3557 3436 if (stat(pathname, &sb)) 3558 - if (system("/sbin/modprobe msr > /dev/null 2>&1")) 3437 + if (system("/sbin/modprobe msr > /dev/null 2>&1")) 3559 3438 err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); 3560 3439 } 3561 3440 ··· 3577 3456 err(-6, "cap_get\n"); 3578 3457 3579 3458 if (cap_flag_value != CAP_SET) { 3580 - warnx("capget(CAP_SYS_RAWIO) failed," 3581 - " try \"# setcap cap_sys_rawio=ep %s\"", progname); 3459 + warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname); 3582 3460 return 1; 3583 3461 } 3584 3462 ··· 3586 3466 3587 3467 return 0; 3588 3468 } 3469 + 3589 3470 void check_permissions(void) 3590 3471 { 3591 3472 int do_exit = 0; ··· 3672 3551 pkg_cstate_limits = skx_pkg_cstate_limits; 3673 3552 has_misc_feature_control = 1; 3674 3553 break; 3554 + case INTEL_FAM6_ICELAKE_X: /* ICX */ 3555 + pkg_cstate_limits = icx_pkg_cstate_limits; 3556 + has_misc_feature_control = 1; 3557 + break; 3675 3558 case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ 3676 3559 no_MSR_MISC_PWR_MGMT = 1; 3677 3560 case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ ··· 3692 3567 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 3693 3568 case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ 3694 3569 case INTEL_FAM6_ATOM_TREMONT: /* EHL */ 3695 - case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ 3570 + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ 3696 3571 pkg_cstate_limits = glm_pkg_cstate_limits; 3697 3572 break; 3698 3573 default: ··· 3708 3583 has_base_hz = 1; 3709 3584 return 1; 3710 3585 } 3586 + 3711 3587 /* 3712 3588 * SLV client has support for unique MSRs: 3713 3589 * ··· 3729 3603 } 3730 3604 return 0; 3731 3605 } 3606 + 3732 3607 int is_dnv(unsigned int family, unsigned int model) 3733 3608 { 3734 3609 ··· 3742 3615 } 3743 3616 return 0; 3744 3617 } 3618 + 3745 3619 int is_bdx(unsigned int family, unsigned int model) 3746 3620 { 3747 3621 ··· 3755 3627 } 3756 3628 return 0; 3757 3629 } 3630 + 3758 3631 int is_skx(unsigned int family, unsigned int model) 3759 3632 { 3760 3633 ··· 3768 3639 } 3769 3640 return 0; 3770 3641 } 3642 + 3643 + int is_icx(unsigned int family, unsigned int model) 3644 + { 3645 + 3646 + if (!genuine_intel) 3647 + return 0; 3648 + 3649 + switch (model) { 3650 + case INTEL_FAM6_ICELAKE_X: 3651 + return 1; 3652 + } 3653 + return 0; 3654 + } 3655 + 3771 3656 int is_ehl(unsigned int family, unsigned int model) 3772 3657 { 3773 3658 if (!genuine_intel) ··· 3793 3650 } 3794 3651 return 0; 3795 3652 } 3653 + 3796 3654 int is_jvl(unsigned int family, unsigned int model) 3797 3655 { 3798 3656 if (!genuine_intel) ··· 3812 3668 return 0; 3813 3669 3814 3670 switch (model) { 3815 - /* Nehalem compatible, but do not include turbo-ratio limit support */ 3671 + /* Nehalem compatible, but do not include turbo-ratio limit support */ 3816 3672 case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ 3817 3673 case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */ 3818 3674 return 0; ··· 3820 3676 return 1; 3821 3677 } 3822 3678 } 3679 + 3823 3680 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model) 3824 3681 { 3825 3682 if (has_slv_msrs(family, model)) ··· 3828 3683 3829 3684 return 0; 3830 3685 } 3686 + 3831 3687 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) 3832 3688 { 3833 3689 if (!genuine_intel) ··· 3845 3699 return 0; 3846 3700 } 3847 3701 } 3702 + 3848 3703 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) 3849 3704 { 3850 3705 if (!genuine_intel) ··· 3877 3730 return 0; 3878 3731 } 3879 3732 } 3733 + 3880 3734 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model) 3881 3735 { 3882 3736 if (!genuine_intel) ··· 3889 3741 switch (model) { 3890 3742 case INTEL_FAM6_ATOM_GOLDMONT: 3891 3743 case INTEL_FAM6_SKYLAKE_X: 3744 + case INTEL_FAM6_ICELAKE_X: 3892 3745 return 1; 3893 3746 default: 3894 3747 return 0; 3895 3748 } 3896 3749 } 3750 + 3897 3751 int has_config_tdp(unsigned int family, unsigned int model) 3898 3752 { 3899 3753 if (!genuine_intel) ··· 3916 3766 case INTEL_FAM6_SKYLAKE_L: /* SKL */ 3917 3767 case INTEL_FAM6_CANNONLAKE_L: /* CNL */ 3918 3768 case INTEL_FAM6_SKYLAKE_X: /* SKX */ 3769 + case INTEL_FAM6_ICELAKE_X: /* ICX */ 3919 3770 3920 3771 case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ 3921 3772 return 1; ··· 3925 3774 } 3926 3775 } 3927 3776 3928 - static void 3929 - remove_underbar(char *s) 3777 + /* 3778 + * tcc_offset_bits: 3779 + * 0: Tcc Offset not supported (Default) 3780 + * 6: Bit 29:24 of MSR_PLATFORM_INFO 3781 + * 4: Bit 27:24 of MSR_PLATFORM_INFO 3782 + */ 3783 + void check_tcc_offset(int model) 3784 + { 3785 + unsigned long long msr; 3786 + 3787 + if (!genuine_intel) 3788 + return; 3789 + 3790 + switch (model) { 3791 + case INTEL_FAM6_SKYLAKE_L: 3792 + case INTEL_FAM6_SKYLAKE: 3793 + case INTEL_FAM6_KABYLAKE_L: 3794 + case INTEL_FAM6_KABYLAKE: 3795 + case INTEL_FAM6_ICELAKE_L: 3796 + case INTEL_FAM6_ICELAKE: 3797 + case INTEL_FAM6_TIGERLAKE_L: 3798 + case INTEL_FAM6_TIGERLAKE: 3799 + case INTEL_FAM6_COMETLAKE: 3800 + if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) { 3801 + msr = (msr >> 30) & 1; 3802 + if (msr) 3803 + tcc_offset_bits = 6; 3804 + } 3805 + return; 3806 + default: 3807 + return; 3808 + } 3809 + } 3810 + 3811 + static void remove_underbar(char *s) 3930 3812 { 3931 3813 char *to = s; 3932 3814 ··· 3972 3788 *to = 0; 3973 3789 } 3974 3790 3975 - static void 3976 - dump_cstate_pstate_config_info(unsigned int family, unsigned int model) 3791 + static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model) 3977 3792 { 3978 3793 if (!do_nhm_platform_info) 3979 3794 return; ··· 4017 3834 4018 3835 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); 4019 3836 } 4020 - static void 4021 - dump_sysfs_cstate_config(void) 3837 + 3838 + static void dump_sysfs_cstate_config(void) 4022 3839 { 4023 3840 char path[64]; 4024 3841 char name_buf[16]; ··· 4038 3855 4039 3856 for (state = 0; state < 10; ++state) { 4040 3857 4041 - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", 4042 - base_cpu, state); 3858 + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 4043 3859 input = fopen(path, "r"); 4044 3860 if (input == NULL) 4045 3861 continue; 4046 3862 if (!fgets(name_buf, sizeof(name_buf), input)) 4047 3863 err(1, "%s: failed to read file", path); 4048 3864 4049 - /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 3865 + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 4050 3866 sp = strchr(name_buf, '-'); 4051 3867 if (!sp) 4052 3868 sp = strchrnul(name_buf, '\n'); ··· 4054 3872 4055 3873 remove_underbar(name_buf); 4056 3874 4057 - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", 4058 - base_cpu, state); 3875 + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); 4059 3876 input = fopen(path, "r"); 4060 3877 if (input == NULL) 4061 3878 continue; ··· 4065 3884 fclose(input); 4066 3885 } 4067 3886 } 4068 - static void 4069 - dump_sysfs_pstate_config(void) 3887 + 3888 + static void dump_sysfs_pstate_config(void) 4070 3889 { 4071 3890 char path[64]; 4072 3891 char driver_buf[64]; ··· 4074 3893 FILE *input; 4075 3894 int turbo; 4076 3895 4077 - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", 4078 - base_cpu); 3896 + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu); 4079 3897 input = fopen(path, "r"); 4080 3898 if (input == NULL) { 4081 3899 fprintf(outf, "NSFOD %s\n", path); ··· 4084 3904 err(1, "%s: failed to read file", path); 4085 3905 fclose(input); 4086 3906 4087 - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", 4088 - base_cpu); 3907 + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); 4089 3908 input = fopen(path, "r"); 4090 3909 if (input == NULL) { 4091 3910 fprintf(outf, "NSFOD %s\n", path); ··· 4115 3936 fclose(input); 4116 3937 } 4117 3938 } 4118 - 4119 3939 4120 3940 /* 4121 3941 * print_epb() ··· 4161 3983 4162 3984 return 0; 4163 3985 } 3986 + 4164 3987 /* 4165 3988 * print_hwp() 4166 3989 * Decode the MSR_HWP_CAPABILITIES ··· 4188 4009 if (get_msr(cpu, MSR_PM_ENABLE, &msr)) 4189 4010 return 0; 4190 4011 4191 - fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", 4192 - cpu, msr, (msr & (1 << 0)) ? "" : "No-"); 4012 + fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); 4193 4013 4194 4014 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ 4195 4015 if ((msr & (1 << 0)) == 0) ··· 4198 4020 return 0; 4199 4021 4200 4022 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " 4201 - "(high %d guar %d eff %d low %d)\n", 4202 - cpu, msr, 4203 - (unsigned int)HWP_HIGHEST_PERF(msr), 4204 - (unsigned int)HWP_GUARANTEED_PERF(msr), 4205 - (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), 4206 - (unsigned int)HWP_LOWEST_PERF(msr)); 4023 + "(high %d guar %d eff %d low %d)\n", 4024 + cpu, msr, 4025 + (unsigned int)HWP_HIGHEST_PERF(msr), 4026 + (unsigned int)HWP_GUARANTEED_PERF(msr), 4027 + (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); 4207 4028 4208 4029 if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) 4209 4030 return 0; 4210 4031 4211 4032 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " 4212 - "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", 4213 - cpu, msr, 4214 - (unsigned int)(((msr) >> 0) & 0xff), 4215 - (unsigned int)(((msr) >> 8) & 0xff), 4216 - (unsigned int)(((msr) >> 16) & 0xff), 4217 - (unsigned int)(((msr) >> 24) & 0xff), 4218 - (unsigned int)(((msr) >> 32) & 0xff3), 4219 - (unsigned int)(((msr) >> 42) & 0x1)); 4033 + "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", 4034 + cpu, msr, 4035 + (unsigned int)(((msr) >> 0) & 0xff), 4036 + (unsigned int)(((msr) >> 8) & 0xff), 4037 + (unsigned int)(((msr) >> 16) & 0xff), 4038 + (unsigned int)(((msr) >> 24) & 0xff), 4039 + (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); 4220 4040 4221 4041 if (has_hwp_pkg) { 4222 4042 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) ··· 4226 4050 (unsigned int)(((msr) >> 0) & 0xff), 4227 4051 (unsigned int)(((msr) >> 8) & 0xff), 4228 4052 (unsigned int)(((msr) >> 16) & 0xff), 4229 - (unsigned int)(((msr) >> 24) & 0xff), 4230 - (unsigned int)(((msr) >> 32) & 0xff3)); 4053 + (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); 4231 4054 } 4232 4055 if (has_hwp_notify) { 4233 4056 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) ··· 4234 4059 4235 4060 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " 4236 4061 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", 4237 - cpu, msr, 4238 - ((msr) & 0x1) ? "EN" : "Dis", 4239 - ((msr) & 0x2) ? "EN" : "Dis"); 4062 + cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); 4240 4063 } 4241 4064 if (get_msr(cpu, MSR_HWP_STATUS, &msr)) 4242 4065 return 0; 4243 4066 4244 4067 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " 4245 - "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", 4246 - cpu, msr, 4247 - ((msr) & 0x1) ? "" : "No-", 4248 - ((msr) & 0x2) ? "" : "No-"); 4068 + "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", 4069 + cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-"); 4249 4070 4250 4071 return 0; 4251 4072 } ··· 4281 4110 (msr & 1 << 5) ? "Auto-HWP, " : "", 4282 4111 (msr & 1 << 4) ? "Graphics, " : "", 4283 4112 (msr & 1 << 2) ? "bit2, " : "", 4284 - (msr & 1 << 1) ? "ThermStatus, " : "", 4285 - (msr & 1 << 0) ? "PROCHOT, " : ""); 4113 + (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); 4286 4114 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 4287 4115 (msr & 1 << 31) ? "bit31, " : "", 4288 4116 (msr & 1 << 30) ? "bit30, " : "", ··· 4295 4125 (msr & 1 << 21) ? "Auto-HWP, " : "", 4296 4126 (msr & 1 << 20) ? "Graphics, " : "", 4297 4127 (msr & 1 << 18) ? "bit18, " : "", 4298 - (msr & 1 << 17) ? "ThermStatus, " : "", 4299 - (msr & 1 << 16) ? "PROCHOT, " : ""); 4128 + (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); 4300 4129 4301 4130 } 4302 4131 if (do_gfx_perf_limit_reasons) { ··· 4308 4139 (msr & 1 << 6) ? "VR-Therm, " : "", 4309 4140 (msr & 1 << 8) ? "Amps, " : "", 4310 4141 (msr & 1 << 9) ? "GFXPwr, " : "", 4311 - (msr & 1 << 10) ? "PkgPwrL1, " : "", 4312 - (msr & 1 << 11) ? "PkgPwrL2, " : ""); 4142 + (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 4313 4143 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", 4314 4144 (msr & 1 << 16) ? "PROCHOT, " : "", 4315 4145 (msr & 1 << 17) ? "ThermStatus, " : "", ··· 4316 4148 (msr & 1 << 22) ? "VR-Therm, " : "", 4317 4149 (msr & 1 << 24) ? "Amps, " : "", 4318 4150 (msr & 1 << 25) ? "GFXPwr, " : "", 4319 - (msr & 1 << 26) ? "PkgPwrL1, " : "", 4320 - (msr & 1 << 27) ? "PkgPwrL2, " : ""); 4151 + (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 4321 4152 } 4322 4153 if (do_ring_perf_limit_reasons) { 4323 4154 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); ··· 4326 4159 (msr & 1 << 1) ? "ThermStatus, " : "", 4327 4160 (msr & 1 << 6) ? "VR-Therm, " : "", 4328 4161 (msr & 1 << 8) ? "Amps, " : "", 4329 - (msr & 1 << 10) ? "PkgPwrL1, " : "", 4330 - (msr & 1 << 11) ? "PkgPwrL2, " : ""); 4162 + (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 4331 4163 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", 4332 4164 (msr & 1 << 16) ? "PROCHOT, " : "", 4333 4165 (msr & 1 << 17) ? "ThermStatus, " : "", 4334 4166 (msr & 1 << 22) ? "VR-Therm, " : "", 4335 4167 (msr & 1 << 24) ? "Amps, " : "", 4336 - (msr & 1 << 26) ? "PkgPwrL1, " : "", 4337 - (msr & 1 << 27) ? "PkgPwrL2, " : ""); 4168 + (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 4338 4169 } 4339 4170 return 0; 4340 4171 } 4341 4172 4342 4173 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 4343 - #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 4174 + #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 4344 4175 4345 4176 double get_tdp_intel(unsigned int model) 4346 4177 { ··· 4367 4202 * rapl_dram_energy_units_probe() 4368 4203 * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. 4369 4204 */ 4370 - static double 4371 - rapl_dram_energy_units_probe(int model, double rapl_energy_units) 4205 + static double rapl_dram_energy_units_probe(int model, double rapl_energy_units) 4372 4206 { 4373 4207 /* only called for genuine_intel, family 6 */ 4374 4208 4375 4209 switch (model) { 4376 4210 case INTEL_FAM6_HASWELL_X: /* HSX */ 4377 4211 case INTEL_FAM6_BROADWELL_X: /* BDX */ 4212 + case INTEL_FAM6_SKYLAKE_X: /* SKX */ 4378 4213 case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ 4379 4214 return (rapl_dram_energy_units = 15.3 / 1000000); 4380 4215 default: ··· 4419 4254 BIC_PRESENT(BIC_PkgWatt); 4420 4255 break; 4421 4256 case INTEL_FAM6_ATOM_TREMONT: /* EHL */ 4422 - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; 4257 + do_rapl = 4258 + RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS 4259 + | RAPL_GFX | RAPL_PKG_POWER_INFO; 4423 4260 if (rapl_joules) { 4424 4261 BIC_PRESENT(BIC_Pkg_J); 4425 4262 BIC_PRESENT(BIC_Cor_J); ··· 4444 4277 break; 4445 4278 case INTEL_FAM6_SKYLAKE_L: /* SKL */ 4446 4279 case INTEL_FAM6_CANNONLAKE_L: /* CNL */ 4447 - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; 4280 + do_rapl = 4281 + RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS 4282 + | RAPL_GFX | RAPL_PKG_POWER_INFO; 4448 4283 BIC_PRESENT(BIC_PKG__); 4449 4284 BIC_PRESENT(BIC_RAM__); 4450 4285 if (rapl_joules) { ··· 4464 4295 case INTEL_FAM6_HASWELL_X: /* HSX */ 4465 4296 case INTEL_FAM6_BROADWELL_X: /* BDX */ 4466 4297 case INTEL_FAM6_SKYLAKE_X: /* SKX */ 4298 + case INTEL_FAM6_ICELAKE_X: /* ICX */ 4467 4299 case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ 4468 - do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; 4300 + do_rapl = 4301 + RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | 4302 + RAPL_PKG_POWER_INFO; 4469 4303 BIC_PRESENT(BIC_PKG__); 4470 4304 BIC_PRESENT(BIC_RAM__); 4471 4305 if (rapl_joules) { ··· 4481 4309 break; 4482 4310 case INTEL_FAM6_SANDYBRIDGE_X: 4483 4311 case INTEL_FAM6_IVYBRIDGE_X: 4484 - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; 4312 + do_rapl = 4313 + RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | 4314 + RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; 4485 4315 BIC_PRESENT(BIC_PKG__); 4486 4316 BIC_PRESENT(BIC_RAM__); 4487 4317 if (rapl_joules) { ··· 4508 4334 } 4509 4335 break; 4510 4336 case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ 4511 - do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; 4337 + do_rapl = 4338 + RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | 4339 + RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; 4512 4340 BIC_PRESENT(BIC_PKG__); 4513 4341 BIC_PRESENT(BIC_RAM__); 4514 4342 if (rapl_joules) { ··· 4627 4451 4628 4452 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model) 4629 4453 { 4630 - if (is_skx(family, model) || is_bdx(family, model)) 4454 + if (is_skx(family, model) || is_bdx(family, model) || is_icx(family, model)) 4631 4455 has_automatic_cstate_conversion = 1; 4456 + } 4457 + 4458 + void prewake_cstate_probe(unsigned int family, unsigned int model) 4459 + { 4460 + if (is_icx(family, model)) 4461 + dis_cstate_prewake = 1; 4632 4462 } 4633 4463 4634 4464 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) ··· 4662 4480 return 0; 4663 4481 4664 4482 dts = (msr >> 16) & 0x7F; 4665 - fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", 4666 - cpu, msr, tcc_activation_temp - dts); 4483 + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); 4667 4484 4668 4485 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) 4669 4486 return 0; ··· 4670 4489 dts = (msr >> 16) & 0x7F; 4671 4490 dts2 = (msr >> 8) & 0x7F; 4672 4491 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 4673 - cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); 4492 + cpu, msr, tj_max - dts, tj_max - dts2); 4674 4493 } 4675 - 4676 4494 4677 4495 if (do_dts && debug) { 4678 4496 unsigned int resolution; ··· 4682 4502 dts = (msr >> 16) & 0x7F; 4683 4503 resolution = (msr >> 27) & 0xF; 4684 4504 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 4685 - cpu, msr, tcc_activation_temp - dts, resolution); 4505 + cpu, msr, tj_max - dts, resolution); 4686 4506 4687 4507 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) 4688 4508 return 0; ··· 4690 4510 dts = (msr >> 16) & 0x7F; 4691 4511 dts2 = (msr >> 8) & 0x7F; 4692 4512 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 4693 - cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); 4513 + cpu, msr, tj_max - dts, tj_max - dts2); 4694 4514 } 4695 4515 4696 4516 return 0; ··· 4702 4522 cpu, label, 4703 4523 ((msr >> 15) & 1) ? "EN" : "DIS", 4704 4524 ((msr >> 0) & 0x7FFF) * rapl_power_units, 4705 - (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 4525 + (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 4706 4526 (((msr >> 16) & 1) ? "EN" : "DIS")); 4707 4527 4708 4528 return; ··· 4743 4563 if (do_rapl & RAPL_PKG_POWER_INFO) { 4744 4564 4745 4565 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) 4746 - return -5; 4747 - 4566 + return -5; 4748 4567 4749 4568 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 4750 4569 cpu, msr, 4751 - ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 4570 + ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 4752 4571 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 4753 4572 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 4754 4573 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); ··· 4766 4587 cpu, 4767 4588 ((msr >> 47) & 1) ? "EN" : "DIS", 4768 4589 ((msr >> 32) & 0x7FFF) * rapl_power_units, 4769 - (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 4590 + (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 4770 4591 ((msr >> 48) & 1) ? "EN" : "DIS"); 4771 4592 } 4772 4593 4773 4594 if (do_rapl & RAPL_DRAM_POWER_INFO) { 4774 4595 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 4775 - return -6; 4596 + return -6; 4776 4597 4777 4598 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 4778 4599 cpu, msr, 4779 - ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 4600 + ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 4780 4601 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 4781 4602 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 4782 4603 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); ··· 4785 4606 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 4786 4607 return -9; 4787 4608 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 4788 - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 4609 + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 4789 4610 4790 4611 print_power_limit_msr(cpu, msr, "DRAM Limit"); 4791 4612 } ··· 4799 4620 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 4800 4621 return -9; 4801 4622 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 4802 - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 4623 + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 4803 4624 print_power_limit_msr(cpu, msr, "Cores Limit"); 4804 4625 } 4805 4626 if (do_rapl & RAPL_GFX) { ··· 4811 4632 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 4812 4633 return -9; 4813 4634 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 4814 - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 4635 + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 4815 4636 print_power_limit_msr(cpu, msr, "GFX Limit"); 4816 4637 } 4817 4638 return 0; ··· 4833 4654 switch (model) { 4834 4655 case INTEL_FAM6_SANDYBRIDGE: 4835 4656 case INTEL_FAM6_SANDYBRIDGE_X: 4836 - case INTEL_FAM6_IVYBRIDGE: /* IVB */ 4837 - case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ 4838 - case INTEL_FAM6_HASWELL: /* HSW */ 4839 - case INTEL_FAM6_HASWELL_X: /* HSW */ 4840 - case INTEL_FAM6_HASWELL_L: /* HSW */ 4841 - case INTEL_FAM6_HASWELL_G: /* HSW */ 4842 - case INTEL_FAM6_BROADWELL: /* BDW */ 4843 - case INTEL_FAM6_BROADWELL_G: /* BDW */ 4844 - case INTEL_FAM6_BROADWELL_X: /* BDX */ 4845 - case INTEL_FAM6_SKYLAKE_L: /* SKL */ 4846 - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ 4847 - case INTEL_FAM6_SKYLAKE_X: /* SKX */ 4848 - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ 4657 + case INTEL_FAM6_IVYBRIDGE: /* IVB */ 4658 + case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ 4659 + case INTEL_FAM6_HASWELL: /* HSW */ 4660 + case INTEL_FAM6_HASWELL_X: /* HSW */ 4661 + case INTEL_FAM6_HASWELL_L: /* HSW */ 4662 + case INTEL_FAM6_HASWELL_G: /* HSW */ 4663 + case INTEL_FAM6_BROADWELL: /* BDW */ 4664 + case INTEL_FAM6_BROADWELL_G: /* BDW */ 4665 + case INTEL_FAM6_BROADWELL_X: /* BDX */ 4666 + case INTEL_FAM6_SKYLAKE_L: /* SKL */ 4667 + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ 4668 + case INTEL_FAM6_SKYLAKE_X: /* SKX */ 4669 + case INTEL_FAM6_ICELAKE_X: /* ICX */ 4670 + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ 4849 4671 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 4850 4672 case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ 4851 - case INTEL_FAM6_ATOM_TREMONT: /* EHL */ 4852 - case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ 4673 + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ 4674 + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ 4853 4675 return 1; 4854 4676 } 4855 4677 return 0; ··· 4936 4756 return 0; 4937 4757 4938 4758 switch (model) { 4939 - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ 4759 + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ 4940 4760 return 1; 4941 4761 } 4942 4762 ··· 4951 4771 } 4952 4772 4953 4773 #define SLM_BCLK_FREQS 5 4954 - double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; 4774 + double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; 4955 4775 4956 4776 double slm_bclk(void) 4957 4777 { ··· 4985 4805 return 133.33; 4986 4806 } 4987 4807 4808 + int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) 4809 + { 4810 + unsigned int eax, ebx, ecx, edx; 4811 + 4812 + if (!genuine_intel) 4813 + return 0; 4814 + 4815 + if (cpu_migrate(t->cpu_id)) { 4816 + fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); 4817 + return -1; 4818 + } 4819 + 4820 + if (max_level < 0x1a) 4821 + return 0; 4822 + 4823 + __cpuid(0x1a, eax, ebx, ecx, edx); 4824 + eax = (eax >> 24) & 0xFF; 4825 + if (eax == 0x20) 4826 + t->is_atom = true; 4827 + return 0; 4828 + } 4829 + 4988 4830 /* 4989 4831 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where 4990 4832 * the Thermal Control Circuit (TCC) activates. ··· 5019 4817 * below this value, including the Digital Thermal Sensor (DTS), 5020 4818 * Package Thermal Management Sensor (PTM), and thermal event thresholds. 5021 4819 */ 5022 - int read_tcc_activation_temp() 5023 - { 5024 - unsigned long long msr; 5025 - unsigned int tcc, target_c, offset_c; 5026 - 5027 - /* Temperature Target MSR is Nehalem and newer only */ 5028 - if (!do_nhm_platform_info) 5029 - return 0; 5030 - 5031 - if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) 5032 - return 0; 5033 - 5034 - target_c = (msr >> 16) & 0xFF; 5035 - 5036 - offset_c = (msr >> 24) & 0xF; 5037 - 5038 - tcc = target_c - offset_c; 5039 - 5040 - if (!quiet) 5041 - fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", 5042 - base_cpu, msr, tcc, target_c, offset_c); 5043 - 5044 - return tcc; 5045 - } 5046 - 5047 4820 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) 5048 4821 { 5049 - /* tcc_activation_temp is used only for dts or ptm */ 4822 + unsigned long long msr; 4823 + unsigned int tcc_default, tcc_offset; 4824 + int cpu; 4825 + 4826 + /* tj_max is used only for dts or ptm */ 5050 4827 if (!(do_dts || do_ptm)) 5051 4828 return 0; 5052 4829 ··· 5033 4852 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 5034 4853 return 0; 5035 4854 5036 - if (tcc_activation_temp_override != 0) { 5037 - tcc_activation_temp = tcc_activation_temp_override; 5038 - fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp); 4855 + cpu = t->cpu_id; 4856 + if (cpu_migrate(cpu)) { 4857 + fprintf(outf, "Could not migrate to CPU %d\n", cpu); 4858 + return -1; 4859 + } 4860 + 4861 + if (tj_max_override != 0) { 4862 + tj_max = tj_max_override; 4863 + fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max); 5039 4864 return 0; 5040 4865 } 5041 4866 5042 - tcc_activation_temp = read_tcc_activation_temp(); 5043 - if (tcc_activation_temp) 5044 - return 0; 4867 + /* Temperature Target MSR is Nehalem and newer only */ 4868 + if (!do_nhm_platform_info) 4869 + goto guess; 5045 4870 5046 - tcc_activation_temp = TJMAX_DEFAULT; 5047 - fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp); 4871 + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) 4872 + goto guess; 4873 + 4874 + tcc_default = (msr >> 16) & 0xFF; 4875 + 4876 + if (!quiet) { 4877 + switch (tcc_offset_bits) { 4878 + case 4: 4879 + tcc_offset = (msr >> 24) & 0xF; 4880 + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", 4881 + cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); 4882 + break; 4883 + case 6: 4884 + tcc_offset = (msr >> 24) & 0x3F; 4885 + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", 4886 + cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); 4887 + break; 4888 + default: 4889 + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); 4890 + break; 4891 + } 4892 + } 4893 + 4894 + if (!tcc_default) 4895 + goto guess; 4896 + 4897 + tj_max = tcc_default; 4898 + 4899 + return 0; 4900 + 4901 + guess: 4902 + tj_max = TJMAX_DEFAULT; 4903 + fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); 5048 4904 5049 4905 return 0; 5050 4906 } ··· 5092 4874 5093 4875 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) 5094 4876 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", 5095 - base_cpu, msr, 5096 - msr & FEAT_CTL_LOCKED ? "" : "UN-", 5097 - msr & (1 << 18) ? "SGX" : ""); 4877 + base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); 5098 4878 } 5099 4879 5100 4880 void decode_misc_enable_msr(void) ··· 5120 4904 return; 5121 4905 5122 4906 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) 5123 - fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", 5124 - base_cpu, msr, 5125 - msr & (0 << 0) ? "No-" : "", 5126 - msr & (1 << 0) ? "No-" : "", 5127 - msr & (2 << 0) ? "No-" : "", 5128 - msr & (3 << 0) ? "No-" : ""); 4907 + fprintf(outf, 4908 + "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", 4909 + base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", 4910 + msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : ""); 5129 4911 } 4912 + 5130 4913 /* 5131 4914 * Decode MSR_MISC_PWR_MGMT 5132 4915 * ··· 5146 4931 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) 5147 4932 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", 5148 4933 base_cpu, msr, 5149 - msr & (1 << 0) ? "DIS" : "EN", 5150 - msr & (1 << 1) ? "EN" : "DIS", 5151 - msr & (1 << 8) ? "EN" : "DIS"); 4934 + msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); 5152 4935 } 4936 + 5153 4937 /* 5154 4938 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG 5155 4939 * ··· 5174 4960 unsigned int intel_model_duplicates(unsigned int model) 5175 4961 { 5176 4962 5177 - switch(model) { 4963 + switch (model) { 5178 4964 case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ 5179 4965 case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ 5180 - case 0x1F: /* Core i7 and i5 Processor - Nehalem */ 4966 + case 0x1F: /* Core i7 and i5 Processor - Nehalem */ 5181 4967 case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */ 5182 4968 case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */ 5183 4969 return INTEL_FAM6_NEHALEM; ··· 5208 4994 case INTEL_FAM6_ROCKETLAKE: 5209 4995 case INTEL_FAM6_LAKEFIELD: 5210 4996 case INTEL_FAM6_ALDERLAKE: 4997 + case INTEL_FAM6_ALDERLAKE_L: 5211 4998 return INTEL_FAM6_CANNONLAKE_L; 5212 4999 5213 5000 case INTEL_FAM6_ATOM_TREMONT_L: 5214 5001 return INTEL_FAM6_ATOM_TREMONT; 5215 5002 5216 - case INTEL_FAM6_ICELAKE_X: 5003 + case INTEL_FAM6_ICELAKE_D: 5217 5004 case INTEL_FAM6_SAPPHIRERAPIDS_X: 5218 - return INTEL_FAM6_SKYLAKE_X; 5005 + return INTEL_FAM6_ICELAKE_X; 5219 5006 } 5220 5007 return model; 5221 5008 } ··· 5240 5025 close(fd); 5241 5026 return; 5242 5027 } 5243 - fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", 5244 - value, value == 2000000000 ? "default" : "constrained"); 5028 + fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained"); 5245 5029 5246 5030 close(fd); 5031 + } 5032 + 5033 + /* 5034 + * Linux-perf manages the the HW instructions-retired counter 5035 + * by enabling when requested, and hiding rollover 5036 + */ 5037 + void linux_perf_init(void) 5038 + { 5039 + if (!BIC_IS_ENABLED(BIC_IPC)) 5040 + return; 5041 + 5042 + if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) 5043 + return; 5044 + 5045 + fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 5046 + if (fd_instr_count_percpu == NULL) 5047 + err(-1, "calloc fd_instr_count_percpu"); 5048 + 5049 + BIC_PRESENT(BIC_IPC); 5247 5050 } 5248 5051 5249 5052 void process_cpuid() ··· 5269 5036 unsigned int eax, ebx, ecx, edx; 5270 5037 unsigned int fms, family, model, stepping, ecx_flags, edx_flags; 5271 5038 unsigned int has_turbo; 5039 + unsigned long long ucode_patch = 0; 5272 5040 5273 5041 eax = ebx = ecx = edx = 0; 5274 5042 ··· 5283 5049 hygon_genuine = 1; 5284 5050 5285 5051 if (!quiet) 5286 - fprintf(outf, "CPUID(0): %.4s%.4s%.4s ", 5287 - (char *)&ebx, (char *)&edx, (char *)&ecx); 5052 + fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", 5053 + (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); 5288 5054 5289 5055 __cpuid(1, fms, ebx, ecx, edx); 5290 5056 family = (fms >> 8) & 0xf; ··· 5297 5063 ecx_flags = ecx; 5298 5064 edx_flags = edx; 5299 5065 5066 + if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) 5067 + warnx("get_msr(UCODE)\n"); 5068 + 5300 5069 /* 5301 5070 * check max extended function levels of CPUID. 5302 5071 * This is needed to check for invariant TSC. ··· 5309 5072 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); 5310 5073 5311 5074 if (!quiet) { 5312 - fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 5313 - max_level, max_extended_level, family, model, stepping, family, model, stepping); 5075 + fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n", 5076 + family, model, stepping, family, model, stepping, 5077 + (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); 5078 + fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); 5314 5079 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", 5315 5080 ecx_flags & (1 << 0) ? "SSE3" : "-", 5316 5081 ecx_flags & (1 << 3) ? "MONITOR" : "-", ··· 5322 5083 edx_flags & (1 << 4) ? "TSC" : "-", 5323 5084 edx_flags & (1 << 5) ? "MSR" : "-", 5324 5085 edx_flags & (1 << 22) ? "ACPI-TM" : "-", 5325 - edx_flags & (1 << 28) ? "HT" : "-", 5326 - edx_flags & (1 << 29) ? "TM" : "-"); 5086 + edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); 5327 5087 } 5328 - if (genuine_intel) 5088 + if (genuine_intel) { 5089 + model_orig = model; 5329 5090 model = intel_model_duplicates(model); 5091 + } 5330 5092 5331 5093 if (!(edx_flags & (1 << 5))) 5332 5094 errx(1, "CPUID: no MSR"); ··· 5378 5138 has_hwp ? "" : "No-", 5379 5139 has_hwp_notify ? "" : "No-", 5380 5140 has_hwp_activity_window ? "" : "No-", 5381 - has_hwp_epp ? "" : "No-", 5382 - has_hwp_pkg ? "" : "No-", 5383 - has_epb ? "" : "No-"); 5141 + has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); 5384 5142 5385 5143 if (!quiet) 5386 5144 decode_misc_enable_msr(); 5387 - 5388 5145 5389 5146 if (max_level >= 0x7 && !quiet) { 5390 5147 int has_sgx; ··· 5414 5177 eax_crystal, ebx_tsc, crystal_hz); 5415 5178 5416 5179 if (crystal_hz == 0) 5417 - switch(model) { 5180 + switch (model) { 5418 5181 case INTEL_FAM6_SKYLAKE_L: /* SKL */ 5419 5182 crystal_hz = 24000000; /* 24.0 MHz */ 5420 5183 break; ··· 5427 5190 break; 5428 5191 default: 5429 5192 crystal_hz = 0; 5430 - } 5193 + } 5431 5194 5432 5195 if (crystal_hz) { 5433 - tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; 5196 + tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; 5434 5197 if (!quiet) 5435 5198 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 5436 - tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 5199 + tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 5437 5200 } 5438 5201 } 5439 5202 } ··· 5502 5265 BIC_NOT_PRESENT(BIC_Pkgpc7); 5503 5266 use_c1_residency_msr = 1; 5504 5267 } 5505 - if (is_skx(family, model)) { 5268 + if (is_skx(family, model) || is_icx(family, model)) { 5506 5269 BIC_NOT_PRESENT(BIC_CPU_c3); 5507 5270 BIC_NOT_PRESENT(BIC_Pkgpc3); 5508 5271 BIC_NOT_PRESENT(BIC_CPU_c7); ··· 5528 5291 BIC_PRESENT(BIC_CPUGFX); 5529 5292 } 5530 5293 do_slm_cstates = is_slm(family, model); 5531 - do_knl_cstates = is_knl(family, model); 5294 + do_knl_cstates = is_knl(family, model); 5532 5295 5533 - if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || 5534 - is_ehl(family, model)) 5296 + if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model)) 5535 5297 BIC_NOT_PRESENT(BIC_CPU_c3); 5536 5298 5537 5299 if (!quiet) ··· 5543 5307 perf_limit_reasons_probe(family, model); 5544 5308 automatic_cstate_conversion_probe(family, model); 5545 5309 5310 + check_tcc_offset(model_orig); 5311 + 5546 5312 if (!quiet) 5547 5313 dump_cstate_pstate_config_info(family, model); 5548 5314 ··· 5555 5317 if (!quiet) 5556 5318 dump_sysfs_pstate_config(); 5557 5319 5558 - if (has_skl_msrs(family, model)) 5320 + if (has_skl_msrs(family, model) || is_ehl(family, model)) 5559 5321 calculate_tsc_tweak(); 5560 5322 5561 5323 if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) ··· 5624 5386 if (debug > 1) 5625 5387 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 5626 5388 5627 - cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 5389 + cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 5628 5390 if (cpus == NULL) 5629 5391 err(1, "calloc cpus"); 5630 5392 ··· 5703 5465 5704 5466 topo.cores_per_node = max_core_id + 1; 5705 5467 if (debug > 1) 5706 - fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", 5707 - max_core_id, topo.cores_per_node); 5468 + fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); 5708 5469 if (!summary_only && topo.cores_per_node > 1) 5709 5470 BIC_PRESENT(BIC_Core); 5710 5471 5711 5472 topo.num_die = max_die_id + 1; 5712 5473 if (debug > 1) 5713 - fprintf(outf, "max_die_id %d, sizing for %d die\n", 5714 - max_die_id, topo.num_die); 5474 + fprintf(outf, "max_die_id %d, sizing for %d die\n", max_die_id, topo.num_die); 5715 5475 if (!summary_only && topo.num_die > 1) 5716 5476 BIC_PRESENT(BIC_Die); 5717 5477 5718 5478 topo.num_packages = max_package_id + 1; 5719 5479 if (debug > 1) 5720 - fprintf(outf, "max_package_id %d, sizing for %d packages\n", 5721 - max_package_id, topo.num_packages); 5480 + fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); 5722 5481 if (!summary_only && topo.num_packages > 1) 5723 5482 BIC_PRESENT(BIC_Package); 5724 5483 ··· 5738 5503 fprintf(outf, 5739 5504 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", 5740 5505 i, cpus[i].physical_package_id, cpus[i].die_id, 5741 - cpus[i].physical_node_id, 5742 - cpus[i].logical_node_id, 5743 - cpus[i].physical_core_id, 5744 - cpus[i].thread_id); 5506 + cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); 5745 5507 } 5746 5508 5747 5509 } 5748 5510 5749 - void 5750 - allocate_counters(struct thread_data **t, struct core_data **c, 5751 - struct pkg_data **p) 5511 + void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 5752 5512 { 5753 5513 int i; 5754 - int num_cores = topo.cores_per_node * topo.nodes_per_pkg * 5755 - topo.num_packages; 5514 + int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; 5756 5515 int num_threads = topo.threads_per_core * num_cores; 5757 5516 5758 5517 *t = calloc(num_threads, sizeof(struct thread_data)); ··· 5774 5545 error: 5775 5546 err(1, "calloc counters"); 5776 5547 } 5548 + 5777 5549 /* 5778 5550 * init_counter() 5779 5551 * 5780 5552 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 5781 5553 */ 5782 - void init_counter(struct thread_data *thread_base, struct core_data *core_base, 5783 - struct pkg_data *pkg_base, int cpu_id) 5554 + void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) 5784 5555 { 5785 5556 int pkg_id = cpus[cpu_id].physical_package_id; 5786 5557 int node_id = cpus[cpu_id].logical_node_id; ··· 5789 5560 struct thread_data *t; 5790 5561 struct core_data *c; 5791 5562 struct pkg_data *p; 5792 - 5793 5563 5794 5564 /* Workaround for systems where physical_node_id==-1 5795 5565 * and logical_node_id==(-1 - topo.num_cpus) ··· 5811 5583 p->package_id = pkg_id; 5812 5584 } 5813 5585 5814 - 5815 5586 int initialize_counters(int cpu_id) 5816 5587 { 5817 5588 init_counter(EVEN_COUNTERS, cpu_id); ··· 5825 5598 if (outp == NULL) 5826 5599 err(-1, "calloc output buffer"); 5827 5600 } 5601 + 5828 5602 void allocate_fd_percpu(void) 5829 5603 { 5830 5604 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 5831 5605 if (fd_percpu == NULL) 5832 5606 err(-1, "calloc fd_percpu"); 5833 5607 } 5608 + 5834 5609 void allocate_irq_buffers(void) 5835 5610 { 5836 5611 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); ··· 5843 5614 if (irqs_per_cpu == NULL) 5844 5615 err(-1, "calloc %d", topo.max_cpu_num + 1); 5845 5616 } 5617 + 5846 5618 void setup_all_buffers(void) 5847 5619 { 5848 5620 topology_probe(); ··· 5872 5642 check_dev_msr(); 5873 5643 check_permissions(); 5874 5644 process_cpuid(); 5875 - 5645 + linux_perf_init(); 5876 5646 5877 5647 if (!quiet) 5878 5648 for_all_cpus(print_hwp, ODD_COUNTERS); ··· 5887 5657 for_all_cpus(print_rapl, ODD_COUNTERS); 5888 5658 5889 5659 for_all_cpus(set_temperature_target, ODD_COUNTERS); 5660 + 5661 + for_all_cpus(get_cpu_type, ODD_COUNTERS); 5662 + for_all_cpus(get_cpu_type, EVEN_COUNTERS); 5890 5663 5891 5664 if (!quiet) 5892 5665 for_all_cpus(print_thermal, ODD_COUNTERS); ··· 5946 5713 format_all_counters(EVEN_COUNTERS); 5947 5714 } 5948 5715 5949 - fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 5716 + fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); 5950 5717 5951 5718 flush_output_stderr(); 5952 5719 ··· 5971 5738 return status; 5972 5739 } 5973 5740 5974 - void print_version() { 5975 - fprintf(outf, "turbostat version 20.09.30" 5976 - " - Len Brown <lenb@kernel.org>\n"); 5741 + void print_version() 5742 + { 5743 + fprintf(outf, "turbostat version 21.05.04" " - Len Brown <lenb@kernel.org>\n"); 5977 5744 } 5978 5745 5979 5746 int add_counter(unsigned int msr_num, char *path, char *name, 5980 - unsigned int width, enum counter_scope scope, 5981 - enum counter_type type, enum counter_format format, int flags) 5747 + unsigned int width, enum counter_scope scope, 5748 + enum counter_type type, enum counter_format format, int flags) 5982 5749 { 5983 5750 struct msr_counter *msrp; 5984 5751 ··· 6004 5771 sys.tp = msrp; 6005 5772 sys.added_thread_counters++; 6006 5773 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) { 6007 - fprintf(stderr, "exceeded max %d added thread counters\n", 6008 - MAX_ADDED_COUNTERS); 5774 + fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS); 6009 5775 exit(-1); 6010 5776 } 6011 5777 break; ··· 6014 5782 sys.cp = msrp; 6015 5783 sys.added_core_counters++; 6016 5784 if (sys.added_core_counters > MAX_ADDED_COUNTERS) { 6017 - fprintf(stderr, "exceeded max %d added core counters\n", 6018 - MAX_ADDED_COUNTERS); 5785 + fprintf(stderr, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS); 6019 5786 exit(-1); 6020 5787 } 6021 5788 break; ··· 6024 5793 sys.pp = msrp; 6025 5794 sys.added_package_counters++; 6026 5795 if (sys.added_package_counters > MAX_ADDED_COUNTERS) { 6027 - fprintf(stderr, "exceeded max %d added package counters\n", 6028 - MAX_ADDED_COUNTERS); 5796 + fprintf(stderr, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS); 6029 5797 exit(-1); 6030 5798 } 6031 5799 break; ··· 6161 5931 6162 5932 for (state = 10; state >= 0; --state) { 6163 5933 6164 - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", 6165 - base_cpu, state); 5934 + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 6166 5935 input = fopen(path, "r"); 6167 5936 if (input == NULL) 6168 5937 continue; 6169 5938 if (!fgets(name_buf, sizeof(name_buf), input)) 6170 5939 err(1, "%s: failed to read file", path); 6171 5940 6172 - /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 5941 + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 6173 5942 sp = strchr(name_buf, '-'); 6174 5943 if (!sp) 6175 5944 sp = strchrnul(name_buf, '\n'); ··· 6184 5955 if (is_deferred_skip(name_buf)) 6185 5956 continue; 6186 5957 6187 - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, 6188 - FORMAT_PERCENT, SYSFS_PERCPU); 5958 + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU); 6189 5959 } 6190 5960 6191 5961 for (state = 10; state >= 0; --state) { 6192 5962 6193 - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", 6194 - base_cpu, state); 5963 + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 6195 5964 input = fopen(path, "r"); 6196 5965 if (input == NULL) 6197 5966 continue; 6198 5967 if (!fgets(name_buf, sizeof(name_buf), input)) 6199 5968 err(1, "%s: failed to read file", path); 6200 - /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 5969 + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 6201 5970 sp = strchr(name_buf, '-'); 6202 5971 if (!sp) 6203 5972 sp = strchrnul(name_buf, '\n'); ··· 6209 5982 if (is_deferred_skip(name_buf)) 6210 5983 continue; 6211 5984 6212 - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, 6213 - FORMAT_DELTA, SYSFS_PERCPU); 5985 + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU); 6214 5986 } 6215 5987 6216 5988 } 6217 - 6218 5989 6219 5990 /* 6220 5991 * parse cpuset with following syntax ··· 6300 6075 exit(-1); 6301 6076 } 6302 6077 6303 - 6304 6078 void cmdline(int argc, char **argv) 6305 6079 { 6306 6080 int opt; 6307 6081 int option_index = 0; 6308 6082 static struct option long_options[] = { 6309 - {"add", required_argument, 0, 'a'}, 6310 - {"cpu", required_argument, 0, 'c'}, 6311 - {"Dump", no_argument, 0, 'D'}, 6312 - {"debug", no_argument, 0, 'd'}, /* internal, not documented */ 6313 - {"enable", required_argument, 0, 'e'}, 6314 - {"interval", required_argument, 0, 'i'}, 6315 - {"num_iterations", required_argument, 0, 'n'}, 6316 - {"help", no_argument, 0, 'h'}, 6317 - {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help 6318 - {"Joules", no_argument, 0, 'J'}, 6319 - {"list", no_argument, 0, 'l'}, 6320 - {"out", required_argument, 0, 'o'}, 6321 - {"quiet", no_argument, 0, 'q'}, 6322 - {"show", required_argument, 0, 's'}, 6323 - {"Summary", no_argument, 0, 'S'}, 6324 - {"TCC", required_argument, 0, 'T'}, 6325 - {"version", no_argument, 0, 'v' }, 6326 - {0, 0, 0, 0 } 6083 + { "add", required_argument, 0, 'a' }, 6084 + { "cpu", required_argument, 0, 'c' }, 6085 + { "Dump", no_argument, 0, 'D' }, 6086 + { "debug", no_argument, 0, 'd' }, /* internal, not documented */ 6087 + { "enable", required_argument, 0, 'e' }, 6088 + { "interval", required_argument, 0, 'i' }, 6089 + { "IPC", no_argument, 0, 'I' }, 6090 + { "num_iterations", required_argument, 0, 'n' }, 6091 + { "help", no_argument, 0, 'h' }, 6092 + { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help 6093 + { "Joules", no_argument, 0, 'J' }, 6094 + { "list", no_argument, 0, 'l' }, 6095 + { "out", required_argument, 0, 'o' }, 6096 + { "quiet", no_argument, 0, 'q' }, 6097 + { "show", required_argument, 0, 's' }, 6098 + { "Summary", no_argument, 0, 'S' }, 6099 + { "TCC", required_argument, 0, 'T' }, 6100 + { "version", no_argument, 0, 'v' }, 6101 + { 0, 0, 0, 0 } 6327 6102 }; 6328 6103 6329 6104 progname = argv[0]; 6330 6105 6331 - while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", 6332 - long_options, &option_index)) != -1) { 6106 + while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) { 6333 6107 switch (opt) { 6334 6108 case 'a': 6335 6109 parse_add_command(optarg); ··· 6363 6139 double interval = strtod(optarg, NULL); 6364 6140 6365 6141 if (interval < 0.001) { 6366 - fprintf(outf, "interval %f seconds is too small\n", 6367 - interval); 6142 + fprintf(outf, "interval %f seconds is too small\n", interval); 6368 6143 exit(2); 6369 6144 } 6370 6145 ··· 6390 6167 num_iterations = strtod(optarg, NULL); 6391 6168 6392 6169 if (num_iterations <= 0) { 6393 - fprintf(outf, "iterations %d should be positive number\n", 6394 - num_iterations); 6170 + fprintf(outf, "iterations %d should be positive number\n", num_iterations); 6395 6171 exit(2); 6396 6172 } 6397 6173 break; ··· 6410 6188 summary_only++; 6411 6189 break; 6412 6190 case 'T': 6413 - tcc_activation_temp_override = atoi(optarg); 6191 + tj_max_override = atoi(optarg); 6414 6192 break; 6415 6193 case 'v': 6416 6194 print_version();