Merge tag 'pm+acpi-4.6-rc1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

+1 -1

Documentation/cpu-freq/intel-pstate.txt

··· 25 25 The driver provides minimum and maximum frequency limits and callbacks to set a 26 26 policy. The policy in cpufreq sysfs is referred to as the "scaling governor". 27 27 The cpufreq core can request the driver to operate in any of the two policies: 28 - "performance: and "powersave". The driver decides which frequency to use based 28 + "performance" and "powersave". The driver decides which frequency to use based 29 29 on the above policy selection considering minimum and maximum frequency limits. 30 30 31 31 The Intel P-State driver falls under the latter category, which implements the

+6

Documentation/kernel-parameters.txt

··· 193 193 (e.g. thinkpad_acpi, sony_acpi, etc.) instead 194 194 of the ACPI video.ko driver. 195 195 196 + acpi_force_32bit_fadt_addr 197 + force FADT to use 32 bit addresses rather than the 198 + 64 bit X_* addresses. Some firmware have broken 64 199 + bit addresses for force ACPI ignore these and use 200 + the older legacy 32 bit addresses. 201 + 196 202 acpica_no_return_repair [HW, ACPI] 197 203 Disable AML predefined validation mechanism 198 204 This mechanism can repair the evaluation result to make

+7 -2

arch/arm/mach-imx/gpc.c

··· 374 374 .name = "PU", 375 375 .power_off = imx6q_pm_pu_power_off, 376 376 .power_on = imx6q_pm_pu_power_on, 377 - .power_off_latency_ns = 25000, 378 - .power_on_latency_ns = 2000000, 377 + .states = { 378 + [0] = { 379 + .power_off_latency_ns = 25000, 380 + .power_on_latency_ns = 2000000, 381 + }, 382 + }, 383 + .state_count = 1, 379 384 }, 380 385 }; 381 386

+4 -4

arch/x86/include/asm/msr-index.h

··· 235 235 #define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) 236 236 237 237 /* IA32_HWP_CAPABILITIES */ 238 - #define HWP_HIGHEST_PERF(x) (x & 0xff) 239 - #define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8) 240 - #define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16) 241 - #define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24) 238 + #define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) 239 + #define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) 240 + #define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) 241 + #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) 242 242 243 243 /* IA32_HWP_REQUEST */ 244 244 #define HWP_MIN_PERF(x) (x & 0xff)

+1

drivers/acpi/Makefile

··· 43 43 acpi-y += acpi_lpss.o acpi_apd.o 44 44 acpi-y += acpi_platform.o 45 45 acpi-y += acpi_pnp.o 46 + acpi-$(CONFIG_ARM_AMBA) += acpi_amba.o 46 47 acpi-y += int340x_thermal.o 47 48 acpi-y += power.o 48 49 acpi-y += event.o

+122

drivers/acpi/acpi_amba.c

··· 1 + 2 + /* 3 + * ACPI support for platform bus type. 4 + * 5 + * Copyright (C) 2015, Linaro Ltd 6 + * Author: Graeme Gregory <graeme.gregory@linaro.org> 7 + * 8 + * This program is free software; you can redistribute it and/or modify 9 + * it under the terms of the GNU General Public License version 2 as 10 + * published by the Free Software Foundation. 11 + */ 12 + 13 + #include <linux/acpi.h> 14 + #include <linux/amba/bus.h> 15 + #include <linux/clkdev.h> 16 + #include <linux/clk-provider.h> 17 + #include <linux/device.h> 18 + #include <linux/err.h> 19 + #include <linux/ioport.h> 20 + #include <linux/kernel.h> 21 + #include <linux/module.h> 22 + 23 + #include "internal.h" 24 + 25 + static const struct acpi_device_id amba_id_list[] = { 26 + {"ARMH0061", 0}, /* PL061 GPIO Device */ 27 + {"", 0}, 28 + }; 29 + 30 + static void amba_register_dummy_clk(void) 31 + { 32 + static struct clk *amba_dummy_clk; 33 + 34 + /* If clock already registered */ 35 + if (amba_dummy_clk) 36 + return; 37 + 38 + amba_dummy_clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 39 + CLK_IS_ROOT, 0); 40 + clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL); 41 + } 42 + 43 + static int amba_handler_attach(struct acpi_device *adev, 44 + const struct acpi_device_id *id) 45 + { 46 + struct amba_device *dev; 47 + struct resource_entry *rentry; 48 + struct list_head resource_list; 49 + bool address_found = false; 50 + int irq_no = 0; 51 + int ret; 52 + 53 + /* If the ACPI node already has a physical device attached, skip it. */ 54 + if (adev->physical_node_count) 55 + return 0; 56 + 57 + dev = amba_device_alloc(dev_name(&adev->dev), 0, 0); 58 + if (!dev) { 59 + dev_err(&adev->dev, "%s(): amba_device_alloc() failed\n", 60 + __func__); 61 + return -ENOMEM; 62 + } 63 + 64 + INIT_LIST_HEAD(&resource_list); 65 + ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL); 66 + if (ret < 0) 67 + goto err_free; 68 + 69 + list_for_each_entry(rentry, &resource_list, node) { 70 + switch (resource_type(rentry->res)) { 71 + case IORESOURCE_MEM: 72 + if (!address_found) { 73 + dev->res = *rentry->res; 74 + address_found = true; 75 + } 76 + break; 77 + case IORESOURCE_IRQ: 78 + if (irq_no < AMBA_NR_IRQS) 79 + dev->irq[irq_no++] = rentry->res->start; 80 + break; 81 + default: 82 + dev_warn(&adev->dev, "Invalid resource\n"); 83 + break; 84 + } 85 + } 86 + 87 + acpi_dev_free_resource_list(&resource_list); 88 + 89 + /* 90 + * If the ACPI node has a parent and that parent has a physical device 91 + * attached to it, that physical device should be the parent of 92 + * the amba device we are about to create. 93 + */ 94 + if (adev->parent) 95 + dev->dev.parent = acpi_get_first_physical_node(adev->parent); 96 + 97 + ACPI_COMPANION_SET(&dev->dev, adev); 98 + 99 + ret = amba_device_add(dev, &iomem_resource); 100 + if (ret) { 101 + dev_err(&adev->dev, "%s(): amba_device_add() failed (%d)\n", 102 + __func__, ret); 103 + goto err_free; 104 + } 105 + 106 + return 1; 107 + 108 + err_free: 109 + amba_device_put(dev); 110 + return ret; 111 + } 112 + 113 + static struct acpi_scan_handler amba_handler = { 114 + .ids = amba_id_list, 115 + .attach = amba_handler_attach, 116 + }; 117 + 118 + void __init acpi_amba_init(void) 119 + { 120 + amba_register_dummy_clk(); 121 + acpi_scan_add_handler(&amba_handler); 122 + }

+1

drivers/acpi/acpi_apd.c

··· 143 143 /* Generic apd devices */ 144 144 #ifdef CONFIG_X86_AMD_PLATFORM_DEVICE 145 145 { "AMD0010", APD_ADDR(cz_i2c_desc) }, 146 + { "AMDI0010", APD_ADDR(cz_i2c_desc) }, 146 147 { "AMD0020", APD_ADDR(cz_uart_desc) }, 147 148 { "AMD0030", }, 148 149 #endif

+2 -17

drivers/acpi/acpi_platform.c

··· 43 43 struct platform_device *acpi_create_platform_device(struct acpi_device *adev) 44 44 { 45 45 struct platform_device *pdev = NULL; 46 - struct acpi_device *acpi_parent; 47 46 struct platform_device_info pdevinfo; 48 47 struct resource_entry *rentry; 49 48 struct list_head resource_list; ··· 81 82 * attached to it, that physical device should be the parent of the 82 83 * platform device we are about to create. 83 84 */ 84 - pdevinfo.parent = NULL; 85 - acpi_parent = adev->parent; 86 - if (acpi_parent) { 87 - struct acpi_device_physical_node *entry; 88 - struct list_head *list; 89 - 90 - mutex_lock(&acpi_parent->physical_node_lock); 91 - list = &acpi_parent->physical_node_list; 92 - if (!list_empty(list)) { 93 - entry = list_first_entry(list, 94 - struct acpi_device_physical_node, 95 - node); 96 - pdevinfo.parent = entry->dev; 97 - } 98 - mutex_unlock(&acpi_parent->physical_node_lock); 99 - } 85 + pdevinfo.parent = adev->parent ? 86 + acpi_get_first_physical_node(adev->parent) : NULL; 100 87 pdevinfo.name = dev_name(&adev->dev); 101 88 pdevinfo.id = -1; 102 89 pdevinfo.res = resources;

+17

drivers/acpi/acpi_processor.c

··· 514 514 }, 515 515 }; 516 516 517 + static int acpi_processor_container_attach(struct acpi_device *dev, 518 + const struct acpi_device_id *id) 519 + { 520 + return 1; 521 + } 522 + 523 + static const struct acpi_device_id processor_container_ids[] = { 524 + { ACPI_PROCESSOR_CONTAINER_HID, }, 525 + { } 526 + }; 527 + 528 + static struct acpi_scan_handler processor_container_handler = { 529 + .ids = processor_container_ids, 530 + .attach = acpi_processor_container_attach, 531 + }; 532 + 517 533 void __init acpi_processor_init(void) 518 534 { 519 535 acpi_scan_add_handler_with_hotplug(&processor_handler, "processor"); 536 + acpi_scan_add_handler(&processor_container_handler); 520 537 }

-7

drivers/acpi/acpi_video.c

··· 218 218 struct thermal_cooling_device *cooling_dev; 219 219 }; 220 220 221 - static const char device_decode[][30] = { 222 - "motherboard VGA device", 223 - "PCI VGA device", 224 - "AGP VGA device", 225 - "UNKNOWN", 226 - }; 227 - 228 221 static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data); 229 222 static void acpi_video_device_rebind(struct acpi_video_bus *video); 230 223 static void acpi_video_device_bind(struct acpi_video_bus *video,

+1 -1

drivers/acpi/acpica/acglobal.h

··· 165 165 166 166 /* Initialization sequencing */ 167 167 168 - ACPI_INIT_GLOBAL(u8, acpi_gbl_reg_methods_enabled, FALSE); 168 + ACPI_INIT_GLOBAL(u8, acpi_gbl_namespace_initialized, FALSE); 169 169 170 170 /* Misc */ 171 171

+3 -3

drivers/acpi/acpica/aclocal.h

··· 85 85 #define ACPI_MTX_MEMORY 5 /* Debug memory tracking lists */ 86 86 87 87 #define ACPI_MAX_MUTEX 5 88 - #define ACPI_NUM_MUTEX ACPI_MAX_MUTEX+1 88 + #define ACPI_NUM_MUTEX (ACPI_MAX_MUTEX+1) 89 89 90 90 /* Lock structure for reader/writer interfaces */ 91 91 ··· 103 103 #define ACPI_LOCK_HARDWARE 1 104 104 105 105 #define ACPI_MAX_LOCK 1 106 - #define ACPI_NUM_LOCK ACPI_MAX_LOCK+1 106 + #define ACPI_NUM_LOCK (ACPI_MAX_LOCK+1) 107 107 108 108 /* This Thread ID means that the mutex is not in use (unlocked) */ 109 109 110 - #define ACPI_MUTEX_NOT_ACQUIRED (acpi_thread_id) 0 110 + #define ACPI_MUTEX_NOT_ACQUIRED ((acpi_thread_id) 0) 111 111 112 112 /* This Thread ID means an invalid thread ID */ 113 113

+1 -1

drivers/acpi/acpica/acnamesp.h

··· 88 88 */ 89 89 acpi_status acpi_ns_initialize_objects(void); 90 90 91 - acpi_status acpi_ns_initialize_devices(void); 91 + acpi_status acpi_ns_initialize_devices(u32 flags); 92 92 93 93 /* 94 94 * nsload - Namespace loading

+1 -1

drivers/acpi/acpica/acpredef.h

··· 1125 1125 PACKAGE_INFO(0, 0, 0, 0, 0, 0) /* Table terminator */ 1126 1126 }; 1127 1127 1128 - static const union acpi_predefined_info acpi_gbl_scope_names[] = { 1128 + const union acpi_predefined_info acpi_gbl_scope_names[] = { 1129 1129 {{"_GPE", 0, 0}}, 1130 1130 {{"_PR_", 0, 0}}, 1131 1131 {{"_SB_", 0, 0}},

+1 -1

drivers/acpi/acpica/dbcmds.c

··· 348 348 } else { 349 349 /* If the pointer is null, the table has been unloaded */ 350 350 351 - ACPI_INFO((AE_INFO, "%4.4s - Table has been unloaded", 351 + ACPI_INFO(("%4.4s - Table has been unloaded", 352 352 table_desc->signature.ascii)); 353 353 } 354 354 }

+3 -2

drivers/acpi/acpica/dbconvert.c

··· 408 408 409 409 new_buffer = acpi_db_encode_pld_buffer(pld_info); 410 410 if (!new_buffer) { 411 - return; 411 + goto exit; 412 412 } 413 413 414 414 /* The two bit-packed buffers should match */ ··· 479 479 pld_info->horizontal_offset); 480 480 } 481 481 482 - ACPI_FREE(pld_info); 483 482 ACPI_FREE(new_buffer); 483 + exit: 484 + ACPI_FREE(pld_info); 484 485 }

+1 -2

drivers/acpi/acpica/dsmethod.c

··· 809 809 if (method_desc->method. 810 810 info_flags & ACPI_METHOD_SERIALIZED_PENDING) { 811 811 if (walk_state) { 812 - ACPI_INFO((AE_INFO, 813 - "Marking method %4.4s as Serialized " 812 + ACPI_INFO(("Marking method %4.4s as Serialized " 814 813 "because of AE_ALREADY_EXISTS error", 815 814 walk_state->method_node->name. 816 815 ascii));

+1 -2

drivers/acpi/acpica/dsobject.c

··· 524 524 arg = arg->common.next; 525 525 } 526 526 527 - ACPI_INFO((AE_INFO, 528 - "Actual Package length (%u) is larger than " 527 + ACPI_INFO(("Actual Package length (%u) is larger than " 529 528 "NumElements field (%u), truncated", 530 529 i, element_count)); 531 530 } else if (i < element_count) {

+1 -2

drivers/acpi/acpica/evgpeblk.c

··· 499 499 } 500 500 501 501 if (gpe_enabled_count) { 502 - ACPI_INFO((AE_INFO, 503 - "Enabled %u GPEs in block %02X to %02X", 502 + ACPI_INFO(("Enabled %u GPEs in block %02X to %02X", 504 503 gpe_enabled_count, (u32)gpe_block->block_base_number, 505 504 (u32)(gpe_block->block_base_number + 506 505 (gpe_block->gpe_count - 1))));

+1 -1

drivers/acpi/acpica/evgpeinit.c

··· 281 281 } 282 282 283 283 if (walk_info.count) { 284 - ACPI_INFO((AE_INFO, "Enabled %u new GPEs", walk_info.count)); 284 + ACPI_INFO(("Enabled %u new GPEs", walk_info.count)); 285 285 } 286 286 287 287 (void)acpi_ut_release_mutex(ACPI_MTX_EVENTS);

+1 -1

drivers/acpi/acpica/evregion.c

··· 600 600 601 601 if (region_obj2->extra.method_REG == NULL || 602 602 region_obj->region.handler == NULL || 603 - !acpi_gbl_reg_methods_enabled) { 603 + !acpi_gbl_namespace_initialized) { 604 604 return_ACPI_STATUS(AE_OK); 605 605 } 606 606

+2 -2

drivers/acpi/acpica/exconfig.c

··· 252 252 253 253 status = acpi_get_table_by_index(table_index, &table); 254 254 if (ACPI_SUCCESS(status)) { 255 - ACPI_INFO((AE_INFO, "Dynamic OEM Table Load:")); 255 + ACPI_INFO(("Dynamic OEM Table Load:")); 256 256 acpi_tb_print_table_header(0, table); 257 257 } 258 258 ··· 472 472 473 473 /* Install the new table into the local data structures */ 474 474 475 - ACPI_INFO((AE_INFO, "Dynamic OEM Table Load:")); 475 + ACPI_INFO(("Dynamic OEM Table Load:")); 476 476 (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); 477 477 478 478 status = acpi_tb_install_standard_table(ACPI_PTR_TO_PHYSADDR(table),

+3 -1

drivers/acpi/acpica/exoparg3.c

··· 123 123 * op is intended for use by disassemblers in order to properly 124 124 * disassemble control method invocations. The opcode or group of 125 125 * opcodes should be surrounded by an "if (0)" clause to ensure that 126 - * AML interpreters never see the opcode. 126 + * AML interpreters never see the opcode. Thus, something is 127 + * wrong if an external opcode ever gets here. 127 128 */ 129 + ACPI_ERROR((AE_INFO, "Executed External Op")); 128 130 status = AE_OK; 129 131 goto cleanup; 130 132

+1 -2

drivers/acpi/acpica/nseval.c

··· 378 378 acpi_ut_remove_reference(prev); 379 379 } 380 380 381 - ACPI_INFO((AE_INFO, 382 - "Executed %u blocks of module-level executable AML code", 381 + ACPI_INFO(("Executed %u blocks of module-level executable AML code", 383 382 method_count)); 384 383 385 384 ACPI_FREE(info);

+84 -55

drivers/acpi/acpica/nsinit.c

··· 46 46 #include "acnamesp.h" 47 47 #include "acdispat.h" 48 48 #include "acinterp.h" 49 + #include "acevents.h" 49 50 50 51 #define _COMPONENT ACPI_NAMESPACE 51 52 ACPI_MODULE_NAME("nsinit") ··· 84 83 85 84 ACPI_FUNCTION_TRACE(ns_initialize_objects); 86 85 86 + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, 87 + "[Init] Completing Initialization of ACPI Objects\n")); 87 88 ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, 88 89 "**** Starting initialization of namespace objects ****\n")); 89 90 ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT, ··· 136 133 * 137 134 ******************************************************************************/ 138 135 139 - acpi_status acpi_ns_initialize_devices(void) 136 + acpi_status acpi_ns_initialize_devices(u32 flags) 140 137 { 141 - acpi_status status; 138 + acpi_status status = AE_OK; 142 139 struct acpi_device_walk_info info; 143 140 144 141 ACPI_FUNCTION_TRACE(ns_initialize_devices); 145 142 146 - /* Init counters */ 143 + if (!(flags & ACPI_NO_DEVICE_INIT)) { 144 + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, 145 + "[Init] Initializing ACPI Devices\n")); 147 146 148 - info.device_count = 0; 149 - info.num_STA = 0; 150 - info.num_INI = 0; 147 + /* Init counters */ 151 148 152 - ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT, 153 - "Initializing Device/Processor/Thermal objects " 154 - "and executing _INI/_STA methods:\n")); 149 + info.device_count = 0; 150 + info.num_STA = 0; 151 + info.num_INI = 0; 155 152 156 - /* Tree analysis: find all subtrees that contain _INI methods */ 153 + ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT, 154 + "Initializing Device/Processor/Thermal objects " 155 + "and executing _INI/_STA methods:\n")); 157 156 158 - status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, 159 - ACPI_UINT32_MAX, FALSE, 160 - acpi_ns_find_ini_methods, NULL, &info, 161 - NULL); 162 - if (ACPI_FAILURE(status)) { 163 - goto error_exit; 164 - } 157 + /* Tree analysis: find all subtrees that contain _INI methods */ 165 158 166 - /* Allocate the evaluation information block */ 159 + status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, 160 + ACPI_UINT32_MAX, FALSE, 161 + acpi_ns_find_ini_methods, NULL, 162 + &info, NULL); 163 + if (ACPI_FAILURE(status)) { 164 + goto error_exit; 165 + } 167 166 168 - info.evaluate_info = 169 - ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info)); 170 - if (!info.evaluate_info) { 171 - status = AE_NO_MEMORY; 172 - goto error_exit; 167 + /* Allocate the evaluation information block */ 168 + 169 + info.evaluate_info = 170 + ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info)); 171 + if (!info.evaluate_info) { 172 + status = AE_NO_MEMORY; 173 + goto error_exit; 174 + } 175 + 176 + /* 177 + * Execute the "global" _INI method that may appear at the root. 178 + * This support is provided for Windows compatibility (Vista+) and 179 + * is not part of the ACPI specification. 180 + */ 181 + info.evaluate_info->prefix_node = acpi_gbl_root_node; 182 + info.evaluate_info->relative_pathname = METHOD_NAME__INI; 183 + info.evaluate_info->parameters = NULL; 184 + info.evaluate_info->flags = ACPI_IGNORE_RETURN_VALUE; 185 + 186 + status = acpi_ns_evaluate(info.evaluate_info); 187 + if (ACPI_SUCCESS(status)) { 188 + info.num_INI++; 189 + } 173 190 } 174 191 175 192 /* 176 - * Execute the "global" _INI method that may appear at the root. This 177 - * support is provided for Windows compatibility (Vista+) and is not 178 - * part of the ACPI specification. 193 + * Run all _REG methods 194 + * 195 + * Note: Any objects accessed by the _REG methods will be automatically 196 + * initialized, even if they contain executable AML (see the call to 197 + * acpi_ns_initialize_objects below). 179 198 */ 180 - info.evaluate_info->prefix_node = acpi_gbl_root_node; 181 - info.evaluate_info->relative_pathname = METHOD_NAME__INI; 182 - info.evaluate_info->parameters = NULL; 183 - info.evaluate_info->flags = ACPI_IGNORE_RETURN_VALUE; 199 + if (!(flags & ACPI_NO_ADDRESS_SPACE_INIT)) { 200 + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, 201 + "[Init] Executing _REG OpRegion methods\n")); 184 202 185 - status = acpi_ns_evaluate(info.evaluate_info); 186 - if (ACPI_SUCCESS(status)) { 187 - info.num_INI++; 203 + status = acpi_ev_initialize_op_regions(); 204 + if (ACPI_FAILURE(status)) { 205 + goto error_exit; 206 + } 188 207 } 189 208 190 - /* Walk namespace to execute all _INIs on present devices */ 209 + if (!(flags & ACPI_NO_DEVICE_INIT)) { 191 210 192 - status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, 193 - ACPI_UINT32_MAX, FALSE, 194 - acpi_ns_init_one_device, NULL, &info, 195 - NULL); 211 + /* Walk namespace to execute all _INIs on present devices */ 196 212 197 - /* 198 - * Any _OSI requests should be completed by now. If the BIOS has 199 - * requested any Windows OSI strings, we will always truncate 200 - * I/O addresses to 16 bits -- for Windows compatibility. 201 - */ 202 - if (acpi_gbl_osi_data >= ACPI_OSI_WIN_2000) { 203 - acpi_gbl_truncate_io_addresses = TRUE; 213 + status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, 214 + ACPI_UINT32_MAX, FALSE, 215 + acpi_ns_init_one_device, NULL, 216 + &info, NULL); 217 + 218 + /* 219 + * Any _OSI requests should be completed by now. If the BIOS has 220 + * requested any Windows OSI strings, we will always truncate 221 + * I/O addresses to 16 bits -- for Windows compatibility. 222 + */ 223 + if (acpi_gbl_osi_data >= ACPI_OSI_WIN_2000) { 224 + acpi_gbl_truncate_io_addresses = TRUE; 225 + } 226 + 227 + ACPI_FREE(info.evaluate_info); 228 + if (ACPI_FAILURE(status)) { 229 + goto error_exit; 230 + } 231 + 232 + ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT, 233 + " Executed %u _INI methods requiring %u _STA executions " 234 + "(examined %u objects)\n", 235 + info.num_INI, info.num_STA, 236 + info.device_count)); 204 237 } 205 - 206 - ACPI_FREE(info.evaluate_info); 207 - if (ACPI_FAILURE(status)) { 208 - goto error_exit; 209 - } 210 - 211 - ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT, 212 - " Executed %u _INI methods requiring %u _STA executions " 213 - "(examined %u objects)\n", 214 - info.num_INI, info.num_STA, info.device_count)); 215 238 216 239 return_ACPI_STATUS(status); 217 240

+2 -3

drivers/acpi/acpica/tbinstal.c

··· 267 267 if (!reload && 268 268 acpi_gbl_disable_ssdt_table_install && 269 269 ACPI_COMPARE_NAME(&new_table_desc.signature, ACPI_SIG_SSDT)) { 270 - ACPI_INFO((AE_INFO, 271 - "Ignoring installation of %4.4s at %8.8X%8.8X", 270 + ACPI_INFO(("Ignoring installation of %4.4s at %8.8X%8.8X", 272 271 new_table_desc.signature.ascii, 273 272 ACPI_FORMAT_UINT64(address))); 274 273 goto release_and_exit; ··· 431 432 return; 432 433 } 433 434 434 - ACPI_INFO((AE_INFO, "%4.4s 0x%8.8X%8.8X" 435 + ACPI_INFO(("%4.4s 0x%8.8X%8.8X" 435 436 " %s table override, new table: 0x%8.8X%8.8X", 436 437 old_table_desc->signature.ascii, 437 438 ACPI_FORMAT_UINT64(old_table_desc->address),

+3 -4

drivers/acpi/acpica/tbprint.c

··· 132 132 133 133 /* FACS only has signature and length fields */ 134 134 135 - ACPI_INFO((AE_INFO, "%-4.4s 0x%8.8X%8.8X %06X", 135 + ACPI_INFO(("%-4.4s 0x%8.8X%8.8X %06X", 136 136 header->signature, ACPI_FORMAT_UINT64(address), 137 137 header->length)); 138 138 } else if (ACPI_VALIDATE_RSDP_SIG(header->signature)) { ··· 144 144 ACPI_OEM_ID_SIZE); 145 145 acpi_tb_fix_string(local_header.oem_id, ACPI_OEM_ID_SIZE); 146 146 147 - ACPI_INFO((AE_INFO, "RSDP 0x%8.8X%8.8X %06X (v%.2d %-6.6s)", 147 + ACPI_INFO(("RSDP 0x%8.8X%8.8X %06X (v%.2d %-6.6s)", 148 148 ACPI_FORMAT_UINT64(address), 149 149 (ACPI_CAST_PTR(struct acpi_table_rsdp, header)-> 150 150 revision > ··· 158 158 159 159 acpi_tb_cleanup_table_header(&local_header, header); 160 160 161 - ACPI_INFO((AE_INFO, 162 - "%-4.4s 0x%8.8X%8.8X" 161 + ACPI_INFO(("%-4.4s 0x%8.8X%8.8X" 163 162 " %06X (v%.2d %-6.6s %-8.8s %08X %-4.4s %08X)", 164 163 local_header.signature, ACPI_FORMAT_UINT64(address), 165 164 local_header.length, local_header.revision,

+1 -3

drivers/acpi/acpica/tbutils.c

··· 174 174 ACPI_TABLE_ORIGIN_INTERNAL_VIRTUAL, 175 175 new_table); 176 176 177 - ACPI_INFO((AE_INFO, 178 - "Forced DSDT copy: length 0x%05X copied locally, original unmapped", 179 - new_table->length)); 177 + ACPI_INFO(("Forced DSDT copy: length 0x%05X copied locally, original unmapped", new_table->length)); 180 178 181 179 return (new_table); 182 180 }

+36 -4

drivers/acpi/acpica/tbxfload.c

··· 47 47 #include "accommon.h" 48 48 #include "acnamesp.h" 49 49 #include "actables.h" 50 + #include "acevents.h" 50 51 51 52 #define _COMPONENT ACPI_TABLES 52 53 ACPI_MODULE_NAME("tbxfload") ··· 69 68 70 69 ACPI_FUNCTION_TRACE(acpi_load_tables); 71 70 71 + /* 72 + * Install the default operation region handlers. These are the 73 + * handlers that are defined by the ACPI specification to be 74 + * "always accessible" -- namely, system_memory, system_IO, and 75 + * PCI_Config. This also means that no _REG methods need to be 76 + * run for these address spaces. We need to have these handlers 77 + * installed before any AML code can be executed, especially any 78 + * module-level code (11/2015). 79 + * Note that we allow OSPMs to install their own region handlers 80 + * between acpi_initialize_subsystem() and acpi_load_tables() to use 81 + * their customized default region handlers. 82 + */ 83 + status = acpi_ev_install_region_handlers(); 84 + if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) { 85 + ACPI_EXCEPTION((AE_INFO, status, 86 + "During Region initialization")); 87 + return_ACPI_STATUS(status); 88 + } 89 + 72 90 /* Load the namespace from the tables */ 73 91 74 92 status = acpi_tb_load_namespace(); ··· 103 83 "While loading namespace from ACPI tables")); 104 84 } 105 85 86 + if (!acpi_gbl_group_module_level_code) { 87 + /* 88 + * Initialize the objects that remain uninitialized. This 89 + * runs the executable AML that may be part of the 90 + * declaration of these objects: 91 + * operation_regions, buffer_fields, Buffers, and Packages. 92 + */ 93 + status = acpi_ns_initialize_objects(); 94 + if (ACPI_FAILURE(status)) { 95 + return_ACPI_STATUS(status); 96 + } 97 + } 98 + 99 + acpi_gbl_namespace_initialized = TRUE; 106 100 return_ACPI_STATUS(status); 107 101 } 108 102 ··· 240 206 } 241 207 242 208 if (!tables_failed) { 243 - ACPI_INFO((AE_INFO, 244 - "%u ACPI AML tables successfully acquired and loaded\n", 245 - tables_loaded)); 209 + ACPI_INFO(("%u ACPI AML tables successfully acquired and loaded\n", tables_loaded)); 246 210 } else { 247 211 ACPI_ERROR((AE_INFO, 248 212 "%u table load failures, %u successful", ··· 333 301 334 302 /* Install the table and load it into the namespace */ 335 303 336 - ACPI_INFO((AE_INFO, "Host-directed Dynamic ACPI Table Load:")); 304 + ACPI_INFO(("Host-directed Dynamic ACPI Table Load:")); 337 305 (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); 338 306 339 307 status = acpi_tb_install_standard_table(ACPI_PTR_TO_PHYSADDR(table),

+1 -1

drivers/acpi/acpica/utcache.c

··· 245 245 acpi_status status; 246 246 void *object; 247 247 248 - ACPI_FUNCTION_NAME(os_acquire_object); 248 + ACPI_FUNCTION_TRACE(os_acquire_object); 249 249 250 250 if (!cache) { 251 251 return_PTR(NULL);

+211 -43

drivers/acpi/acpica/utnonansi.c

··· 140 140 return (c1 - c2); 141 141 } 142 142 143 + #if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION) 144 + /******************************************************************************* 145 + * 146 + * FUNCTION: acpi_ut_safe_strcpy, acpi_ut_safe_strcat, acpi_ut_safe_strncat 147 + * 148 + * PARAMETERS: Adds a "DestSize" parameter to each of the standard string 149 + * functions. This is the size of the Destination buffer. 150 + * 151 + * RETURN: TRUE if the operation would overflow the destination buffer. 152 + * 153 + * DESCRIPTION: Safe versions of standard Clib string functions. Ensure that 154 + * the result of the operation will not overflow the output string 155 + * buffer. 156 + * 157 + * NOTE: These functions are typically only helpful for processing 158 + * user input and command lines. For most ACPICA code, the 159 + * required buffer length is precisely calculated before buffer 160 + * allocation, so the use of these functions is unnecessary. 161 + * 162 + ******************************************************************************/ 163 + 164 + u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source) 165 + { 166 + 167 + if (strlen(source) >= dest_size) { 168 + return (TRUE); 169 + } 170 + 171 + strcpy(dest, source); 172 + return (FALSE); 173 + } 174 + 175 + u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source) 176 + { 177 + 178 + if ((strlen(dest) + strlen(source)) >= dest_size) { 179 + return (TRUE); 180 + } 181 + 182 + strcat(dest, source); 183 + return (FALSE); 184 + } 185 + 186 + u8 187 + acpi_ut_safe_strncat(char *dest, 188 + acpi_size dest_size, 189 + char *source, acpi_size max_transfer_length) 190 + { 191 + acpi_size actual_transfer_length; 192 + 193 + actual_transfer_length = ACPI_MIN(max_transfer_length, strlen(source)); 194 + 195 + if ((strlen(dest) + actual_transfer_length) >= dest_size) { 196 + return (TRUE); 197 + } 198 + 199 + strncat(dest, source, max_transfer_length); 200 + return (FALSE); 201 + } 202 + #endif 203 + 143 204 /******************************************************************************* 144 205 * 145 206 * FUNCTION: acpi_ut_strtoul64 ··· 216 155 * 32-bit or 64-bit conversion, depending on the current mode 217 156 * of the interpreter. 218 157 * 219 - * NOTE: Does not support Octal strings, not needed. 158 + * NOTES: acpi_gbl_integer_byte_width should be set to the proper width. 159 + * For the core ACPICA code, this width depends on the DSDT 160 + * version. For iASL, the default byte width is always 8. 161 + * 162 + * Does not support Octal strings, not needed at this time. 163 + * 164 + * There is an earlier version of the function after this one, 165 + * below. It is slightly different than this one, and the two 166 + * may eventually may need to be merged. (01/2016). 220 167 * 221 168 ******************************************************************************/ 222 169 ··· 240 171 u8 sign_of0x = 0; 241 172 u8 term = 0; 242 173 243 - ACPI_FUNCTION_TRACE_STR(ut_stroul64, string); 174 + ACPI_FUNCTION_TRACE_STR(ut_strtoul64, string); 244 175 245 176 switch (base) { 246 177 case ACPI_ANY_BASE: ··· 387 318 } 388 319 } 389 320 390 - #if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION) 321 + #ifdef _OBSOLETE_FUNCTIONS 322 + /* TBD: use version in ACPICA main code base? */ 323 + /* DONE: 01/2016 */ 324 + 391 325 /******************************************************************************* 392 326 * 393 - * FUNCTION: acpi_ut_safe_strcpy, acpi_ut_safe_strcat, acpi_ut_safe_strncat 327 + * FUNCTION: strtoul64 394 328 * 395 - * PARAMETERS: Adds a "DestSize" parameter to each of the standard string 396 - * functions. This is the size of the Destination buffer. 329 + * PARAMETERS: string - Null terminated string 330 + * terminater - Where a pointer to the terminating byte 331 + * is returned 332 + * base - Radix of the string 397 333 * 398 - * RETURN: TRUE if the operation would overflow the destination buffer. 334 + * RETURN: Converted value 399 335 * 400 - * DESCRIPTION: Safe versions of standard Clib string functions. Ensure that 401 - * the result of the operation will not overflow the output string 402 - * buffer. 403 - * 404 - * NOTE: These functions are typically only helpful for processing 405 - * user input and command lines. For most ACPICA code, the 406 - * required buffer length is precisely calculated before buffer 407 - * allocation, so the use of these functions is unnecessary. 336 + * DESCRIPTION: Convert a string into an unsigned value. 408 337 * 409 338 ******************************************************************************/ 410 339 411 - u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source) 340 + acpi_status strtoul64(char *string, u32 base, u64 *ret_integer) 412 341 { 342 + u32 index; 343 + u32 sign; 344 + u64 return_value = 0; 345 + acpi_status status = AE_OK; 413 346 414 - if (strlen(source) >= dest_size) { 415 - return (TRUE); 347 + *ret_integer = 0; 348 + 349 + switch (base) { 350 + case 0: 351 + case 8: 352 + case 10: 353 + case 16: 354 + 355 + break; 356 + 357 + default: 358 + /* 359 + * The specified Base parameter is not in the domain of 360 + * this function: 361 + */ 362 + return (AE_BAD_PARAMETER); 416 363 } 417 364 418 - strcpy(dest, source); 419 - return (FALSE); 420 - } 365 + /* Skip over any white space in the buffer: */ 421 366 422 - u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source) 423 - { 424 - 425 - if ((strlen(dest) + strlen(source)) >= dest_size) { 426 - return (TRUE); 367 + while (isspace((int)*string) || *string == '\t') { 368 + ++string; 427 369 } 428 370 429 - strcat(dest, source); 430 - return (FALSE); 431 - } 432 - 433 - u8 434 - acpi_ut_safe_strncat(char *dest, 435 - acpi_size dest_size, 436 - char *source, acpi_size max_transfer_length) 437 - { 438 - acpi_size actual_transfer_length; 439 - 440 - actual_transfer_length = ACPI_MIN(max_transfer_length, strlen(source)); 441 - 442 - if ((strlen(dest) + actual_transfer_length) >= dest_size) { 443 - return (TRUE); 371 + /* 372 + * The buffer may contain an optional plus or minus sign. 373 + * If it does, then skip over it but remember what is was: 374 + */ 375 + if (*string == '-') { 376 + sign = ACPI_SIGN_NEGATIVE; 377 + ++string; 378 + } else if (*string == '+') { 379 + ++string; 380 + sign = ACPI_SIGN_POSITIVE; 381 + } else { 382 + sign = ACPI_SIGN_POSITIVE; 444 383 } 445 384 446 - strncat(dest, source, max_transfer_length); 447 - return (FALSE); 385 + /* 386 + * If the input parameter Base is zero, then we need to 387 + * determine if it is octal, decimal, or hexadecimal: 388 + */ 389 + if (base == 0) { 390 + if (*string == '0') { 391 + if (tolower((int)*(++string)) == 'x') { 392 + base = 16; 393 + ++string; 394 + } else { 395 + base = 8; 396 + } 397 + } else { 398 + base = 10; 399 + } 400 + } 401 + 402 + /* 403 + * For octal and hexadecimal bases, skip over the leading 404 + * 0 or 0x, if they are present. 405 + */ 406 + if (base == 8 && *string == '0') { 407 + string++; 408 + } 409 + 410 + if (base == 16 && *string == '0' && tolower((int)*(++string)) == 'x') { 411 + string++; 412 + } 413 + 414 + /* Main loop: convert the string to an unsigned long */ 415 + 416 + while (*string) { 417 + if (isdigit((int)*string)) { 418 + index = ((u8)*string) - '0'; 419 + } else { 420 + index = (u8)toupper((int)*string); 421 + if (isupper((int)index)) { 422 + index = index - 'A' + 10; 423 + } else { 424 + goto error_exit; 425 + } 426 + } 427 + 428 + if (index >= base) { 429 + goto error_exit; 430 + } 431 + 432 + /* Check to see if value is out of range: */ 433 + 434 + if (return_value > ((ACPI_UINT64_MAX - (u64)index) / (u64)base)) { 435 + goto error_exit; 436 + } else { 437 + return_value *= base; 438 + return_value += index; 439 + } 440 + 441 + ++string; 442 + } 443 + 444 + /* If a minus sign was present, then "the conversion is negated": */ 445 + 446 + if (sign == ACPI_SIGN_NEGATIVE) { 447 + return_value = (ACPI_UINT32_MAX - return_value) + 1; 448 + } 449 + 450 + *ret_integer = return_value; 451 + return (status); 452 + 453 + error_exit: 454 + switch (base) { 455 + case 8: 456 + 457 + status = AE_BAD_OCTAL_CONSTANT; 458 + break; 459 + 460 + case 10: 461 + 462 + status = AE_BAD_DECIMAL_CONSTANT; 463 + break; 464 + 465 + case 16: 466 + 467 + status = AE_BAD_HEX_CONSTANT; 468 + break; 469 + 470 + default: 471 + 472 + /* Base validated above */ 473 + 474 + break; 475 + } 476 + 477 + return (status); 448 478 } 449 479 #endif

+1 -1

drivers/acpi/acpica/uttrack.c

··· 712 712 /* Print summary */ 713 713 714 714 if (!num_outstanding) { 715 - ACPI_INFO((AE_INFO, "No outstanding allocations")); 715 + ACPI_INFO(("No outstanding allocations")); 716 716 } else { 717 717 ACPI_ERROR((AE_INFO, "%u(0x%X) Outstanding allocations", 718 718 num_outstanding, num_outstanding));

+1 -2

drivers/acpi/acpica/utxferror.c

··· 175 175 * TBD: module_name and line_number args are not needed, should be removed. 176 176 * 177 177 ******************************************************************************/ 178 - void ACPI_INTERNAL_VAR_XFACE 179 - acpi_info(const char *module_name, u32 line_number, const char *format, ...) 178 + void ACPI_INTERNAL_VAR_XFACE acpi_info(const char *format, ...) 180 179 { 181 180 va_list arg_list; 182 181

+15 -52

drivers/acpi/acpica/utxfinit.c

··· 154 154 */ 155 155 acpi_gbl_early_initialization = FALSE; 156 156 157 - /* 158 - * Install the default operation region handlers. These are the 159 - * handlers that are defined by the ACPI specification to be 160 - * "always accessible" -- namely, system_memory, system_IO, and 161 - * PCI_Config. This also means that no _REG methods need to be 162 - * run for these address spaces. We need to have these handlers 163 - * installed before any AML code can be executed, especially any 164 - * module-level code (11/2015). 165 - */ 166 - status = acpi_ev_install_region_handlers(); 167 - if (ACPI_FAILURE(status)) { 168 - ACPI_EXCEPTION((AE_INFO, status, 169 - "During Region initialization")); 170 - return_ACPI_STATUS(status); 171 - } 172 157 #if (!ACPI_REDUCED_HARDWARE) 173 158 174 159 /* Enable ACPI mode */ ··· 245 260 246 261 ACPI_FUNCTION_TRACE(acpi_initialize_objects); 247 262 248 - /* 249 - * Run all _REG methods 250 - * 251 - * Note: Any objects accessed by the _REG methods will be automatically 252 - * initialized, even if they contain executable AML (see the call to 253 - * acpi_ns_initialize_objects below). 254 - */ 255 - acpi_gbl_reg_methods_enabled = TRUE; 256 - if (!(flags & ACPI_NO_ADDRESS_SPACE_INIT)) { 257 - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, 258 - "[Init] Executing _REG OpRegion methods\n")); 259 - 260 - status = acpi_ev_initialize_op_regions(); 261 - if (ACPI_FAILURE(status)) { 262 - return_ACPI_STATUS(status); 263 - } 264 - } 265 263 #ifdef ACPI_EXEC_APP 266 264 /* 267 265 * This call implements the "initialization file" option for acpi_exec. ··· 267 299 */ 268 300 if (acpi_gbl_group_module_level_code) { 269 301 acpi_ns_exec_module_code_list(); 270 - } 271 302 272 - /* 273 - * Initialize the objects that remain uninitialized. This runs the 274 - * executable AML that may be part of the declaration of these objects: 275 - * operation_regions, buffer_fields, Buffers, and Packages. 276 - */ 277 - if (!(flags & ACPI_NO_OBJECT_INIT)) { 278 - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, 279 - "[Init] Completing Initialization of ACPI Objects\n")); 280 - 281 - status = acpi_ns_initialize_objects(); 282 - if (ACPI_FAILURE(status)) { 283 - return_ACPI_STATUS(status); 303 + /* 304 + * Initialize the objects that remain uninitialized. This 305 + * runs the executable AML that may be part of the 306 + * declaration of these objects: 307 + * operation_regions, buffer_fields, Buffers, and Packages. 308 + */ 309 + if (!(flags & ACPI_NO_OBJECT_INIT)) { 310 + status = acpi_ns_initialize_objects(); 311 + if (ACPI_FAILURE(status)) { 312 + return_ACPI_STATUS(status); 313 + } 284 314 } 285 315 } 286 316 287 317 /* 288 - * Initialize all device objects in the namespace. This runs the device 289 - * _STA and _INI methods. 318 + * Initialize all device/region objects in the namespace. This runs 319 + * the device _STA and _INI methods and region _REG methods. 290 320 */ 291 - if (!(flags & ACPI_NO_DEVICE_INIT)) { 292 - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, 293 - "[Init] Initializing ACPI Devices\n")); 294 - 295 - status = acpi_ns_initialize_devices(); 321 + if (!(flags & (ACPI_NO_DEVICE_INIT | ACPI_NO_ADDRESS_SPACE_INIT))) { 322 + status = acpi_ns_initialize_devices(flags); 296 323 if (ACPI_FAILURE(status)) { 297 324 return_ACPI_STATUS(status); 298 325 }

+4 -2

drivers/acpi/apei/apei-base.c

··· 536 536 goto err_unmap_ioport; 537 537 } 538 538 539 - return 0; 539 + goto arch_res_fini; 540 + 540 541 err_unmap_ioport: 541 542 list_for_each_entry(res, &resources->ioport, list) { 542 543 if (res == res_bak) ··· 552 551 release_mem_region(res->start, res->end - res->start); 553 552 } 554 553 arch_res_fini: 555 - apei_resources_fini(&arch_res); 554 + if (arch_apei_filter_addr) 555 + apei_resources_fini(&arch_res); 556 556 nvs_res_fini: 557 557 apei_resources_fini(&nvs_resources); 558 558 return rc;

+3

drivers/acpi/apei/erst.c

··· 1207 1207 "Failed to allocate %lld bytes for persistent store error log.\n", 1208 1208 erst_erange.size); 1209 1209 1210 + /* Cleanup ERST Resources */ 1211 + apei_resources_fini(&erst_resources); 1212 + 1210 1213 return 0; 1211 1214 1212 1215 err_release_erange:

+7 -16

drivers/acpi/apei/ghes.c

··· 26 26 */ 27 27 28 28 #include <linux/kernel.h> 29 - #include <linux/module.h> 29 + #include <linux/moduleparam.h> 30 30 #include <linux/init.h> 31 31 #include <linux/acpi.h> 32 32 #include <linux/io.h> ··· 79 79 ((struct acpi_hest_generic_status *) \ 80 80 ((struct ghes_estatus_node *)(estatus_node) + 1)) 81 81 82 + /* 83 + * This driver isn't really modular, however for the time being, 84 + * continuing to use module_param is the easiest way to remain 85 + * compatible with existing boot arg use cases. 86 + */ 82 87 bool ghes_disable; 83 88 module_param_named(disable, ghes_disable, bool, 0); 84 89 ··· 1153 1148 err: 1154 1149 return rc; 1155 1150 } 1156 - 1157 - static void __exit ghes_exit(void) 1158 - { 1159 - platform_driver_unregister(&ghes_platform_driver); 1160 - ghes_estatus_pool_exit(); 1161 - ghes_ioremap_exit(); 1162 - } 1163 - 1164 - module_init(ghes_init); 1165 - module_exit(ghes_exit); 1166 - 1167 - MODULE_AUTHOR("Huang Ying"); 1168 - MODULE_DESCRIPTION("APEI Generic Hardware Error Source support"); 1169 - MODULE_LICENSE("GPL"); 1170 - MODULE_ALIAS("platform:GHES"); 1151 + device_initcall(ghes_init);

+3 -7

drivers/acpi/bgrt.c

··· 1 1 /* 2 + * BGRT boot graphic support 3 + * Authors: Matthew Garrett, Josh Triplett <josh@joshtriplett.org> 2 4 * Copyright 2012 Red Hat, Inc <mjg@redhat.com> 3 5 * Copyright 2012 Intel Corporation 4 6 * ··· 10 8 */ 11 9 12 10 #include <linux/kernel.h> 13 - #include <linux/module.h> 14 11 #include <linux/init.h> 15 12 #include <linux/device.h> 16 13 #include <linux/sysfs.h> ··· 104 103 kobject_put(bgrt_kobj); 105 104 return ret; 106 105 } 107 - 108 - module_init(bgrt_init); 109 - 110 - MODULE_AUTHOR("Matthew Garrett, Josh Triplett <josh@joshtriplett.org>"); 111 - MODULE_DESCRIPTION("BGRT boot graphic support"); 112 - MODULE_LICENSE("GPL"); 106 + device_initcall(bgrt_init);

+20 -6

drivers/acpi/bus.c

··· 479 479 Device Matching 480 480 -------------------------------------------------------------------------- */ 481 481 482 - static struct acpi_device *acpi_primary_dev_companion(struct acpi_device *adev, 483 - const struct device *dev) 482 + /** 483 + * acpi_get_first_physical_node - Get first physical node of an ACPI device 484 + * @adev: ACPI device in question 485 + * 486 + * Return: First physical node of ACPI device @adev 487 + */ 488 + struct device *acpi_get_first_physical_node(struct acpi_device *adev) 484 489 { 485 490 struct mutex *physical_node_lock = &adev->physical_node_lock; 491 + struct device *phys_dev; 486 492 487 493 mutex_lock(physical_node_lock); 488 494 if (list_empty(&adev->physical_node_list)) { 489 - adev = NULL; 495 + phys_dev = NULL; 490 496 } else { 491 497 const struct acpi_device_physical_node *node; 492 498 493 499 node = list_first_entry(&adev->physical_node_list, 494 500 struct acpi_device_physical_node, node); 495 - if (node->dev != dev) 496 - adev = NULL; 501 + 502 + phys_dev = node->dev; 497 503 } 498 504 mutex_unlock(physical_node_lock); 499 - return adev; 505 + return phys_dev; 506 + } 507 + 508 + static struct acpi_device *acpi_primary_dev_companion(struct acpi_device *adev, 509 + const struct device *dev) 510 + { 511 + const struct device *phys_dev = acpi_get_first_physical_node(adev); 512 + 513 + return phys_dev && phys_dev == dev ? adev : NULL; 500 514 } 501 515 502 516 /**

+200 -55

drivers/acpi/cppc_acpi.c

··· 39 39 40 40 #include <linux/cpufreq.h> 41 41 #include <linux/delay.h> 42 + #include <linux/ktime.h> 42 43 43 44 #include <acpi/cppc_acpi.h> 44 45 /* ··· 64 63 static void __iomem *pcc_comm_addr; 65 64 static u64 comm_base_addr; 66 65 static int pcc_subspace_idx = -1; 67 - static u16 pcc_cmd_delay; 68 66 static bool pcc_channel_acquired; 67 + static ktime_t deadline; 68 + static unsigned int pcc_mpar, pcc_mrtt; 69 + 70 + /* pcc mapped address + header size + offset within PCC subspace */ 71 + #define GET_PCC_VADDR(offs) (pcc_comm_addr + 0x8 + (offs)) 69 72 70 73 /* 71 74 * Arbitrary Retries in case the remote processor is slow to respond 72 - * to PCC commands. 75 + * to PCC commands. Keeping it high enough to cover emulators where 76 + * the processors run painfully slow. 73 77 */ 74 78 #define NUM_RETRIES 500 75 79 76 - static int send_pcc_cmd(u16 cmd) 80 + static int check_pcc_chan(void) 77 81 { 78 - int retries, result = -EIO; 79 - struct acpi_pcct_hw_reduced *pcct_ss = pcc_channel->con_priv; 80 - struct acpi_pcct_shared_memory *generic_comm_base = 81 - (struct acpi_pcct_shared_memory *) pcc_comm_addr; 82 - u32 cmd_latency = pcct_ss->latency; 83 - 84 - /* Min time OS should wait before sending next command. */ 85 - udelay(pcc_cmd_delay); 86 - 87 - /* Write to the shared comm region. */ 88 - writew(cmd, &generic_comm_base->command); 89 - 90 - /* Flip CMD COMPLETE bit */ 91 - writew(0, &generic_comm_base->status); 92 - 93 - /* Ring doorbell */ 94 - result = mbox_send_message(pcc_channel, &cmd); 95 - if (result < 0) { 96 - pr_err("Err sending PCC mbox message. cmd:%d, ret:%d\n", 97 - cmd, result); 98 - return result; 99 - } 100 - 101 - /* Wait for a nominal time to let platform process command. */ 102 - udelay(cmd_latency); 82 + int ret = -EIO; 83 + struct acpi_pcct_shared_memory __iomem *generic_comm_base = pcc_comm_addr; 84 + ktime_t next_deadline = ktime_add(ktime_get(), deadline); 103 85 104 86 /* Retry in case the remote processor was too slow to catch up. */ 105 - for (retries = NUM_RETRIES; retries > 0; retries--) { 87 + while (!ktime_after(ktime_get(), next_deadline)) { 88 + /* 89 + * Per spec, prior to boot the PCC space wil be initialized by 90 + * platform and should have set the command completion bit when 91 + * PCC can be used by OSPM 92 + */ 106 93 if (readw_relaxed(&generic_comm_base->status) & PCC_CMD_COMPLETE) { 107 - result = 0; 94 + ret = 0; 108 95 break; 109 96 } 97 + /* 98 + * Reducing the bus traffic in case this loop takes longer than 99 + * a few retries. 100 + */ 101 + udelay(3); 110 102 } 111 103 112 - mbox_client_txdone(pcc_channel, result); 113 - return result; 104 + return ret; 105 + } 106 + 107 + static int send_pcc_cmd(u16 cmd) 108 + { 109 + int ret = -EIO; 110 + struct acpi_pcct_shared_memory *generic_comm_base = 111 + (struct acpi_pcct_shared_memory *) pcc_comm_addr; 112 + static ktime_t last_cmd_cmpl_time, last_mpar_reset; 113 + static int mpar_count; 114 + unsigned int time_delta; 115 + 116 + /* 117 + * For CMD_WRITE we know for a fact the caller should have checked 118 + * the channel before writing to PCC space 119 + */ 120 + if (cmd == CMD_READ) { 121 + ret = check_pcc_chan(); 122 + if (ret) 123 + return ret; 124 + } 125 + 126 + /* 127 + * Handle the Minimum Request Turnaround Time(MRTT) 128 + * "The minimum amount of time that OSPM must wait after the completion 129 + * of a command before issuing the next command, in microseconds" 130 + */ 131 + if (pcc_mrtt) { 132 + time_delta = ktime_us_delta(ktime_get(), last_cmd_cmpl_time); 133 + if (pcc_mrtt > time_delta) 134 + udelay(pcc_mrtt - time_delta); 135 + } 136 + 137 + /* 138 + * Handle the non-zero Maximum Periodic Access Rate(MPAR) 139 + * "The maximum number of periodic requests that the subspace channel can 140 + * support, reported in commands per minute. 0 indicates no limitation." 141 + * 142 + * This parameter should be ideally zero or large enough so that it can 143 + * handle maximum number of requests that all the cores in the system can 144 + * collectively generate. If it is not, we will follow the spec and just 145 + * not send the request to the platform after hitting the MPAR limit in 146 + * any 60s window 147 + */ 148 + if (pcc_mpar) { 149 + if (mpar_count == 0) { 150 + time_delta = ktime_ms_delta(ktime_get(), last_mpar_reset); 151 + if (time_delta < 60 * MSEC_PER_SEC) { 152 + pr_debug("PCC cmd not sent due to MPAR limit"); 153 + return -EIO; 154 + } 155 + last_mpar_reset = ktime_get(); 156 + mpar_count = pcc_mpar; 157 + } 158 + mpar_count--; 159 + } 160 + 161 + /* Write to the shared comm region. */ 162 + writew_relaxed(cmd, &generic_comm_base->command); 163 + 164 + /* Flip CMD COMPLETE bit */ 165 + writew_relaxed(0, &generic_comm_base->status); 166 + 167 + /* Ring doorbell */ 168 + ret = mbox_send_message(pcc_channel, &cmd); 169 + if (ret < 0) { 170 + pr_err("Err sending PCC mbox message. cmd:%d, ret:%d\n", 171 + cmd, ret); 172 + return ret; 173 + } 174 + 175 + /* 176 + * For READs we need to ensure the cmd completed to ensure 177 + * the ensuing read()s can proceed. For WRITEs we dont care 178 + * because the actual write()s are done before coming here 179 + * and the next READ or WRITE will check if the channel 180 + * is busy/free at the entry of this call. 181 + * 182 + * If Minimum Request Turnaround Time is non-zero, we need 183 + * to record the completion time of both READ and WRITE 184 + * command for proper handling of MRTT, so we need to check 185 + * for pcc_mrtt in addition to CMD_READ 186 + */ 187 + if (cmd == CMD_READ || pcc_mrtt) { 188 + ret = check_pcc_chan(); 189 + if (pcc_mrtt) 190 + last_cmd_cmpl_time = ktime_get(); 191 + } 192 + 193 + mbox_client_txdone(pcc_channel, ret); 194 + return ret; 114 195 } 115 196 116 197 static void cppc_chan_tx_done(struct mbox_client *cl, void *msg, int ret) 117 198 { 118 - if (ret) 199 + if (ret < 0) 119 200 pr_debug("TX did not complete: CMD sent:%x, ret:%d\n", 120 201 *(u16 *)msg, ret); 121 202 else ··· 389 306 { 390 307 struct acpi_pcct_hw_reduced *cppc_ss; 391 308 unsigned int len; 309 + u64 usecs_lat; 392 310 393 311 if (pcc_subspace_idx >= 0) { 394 312 pcc_channel = pcc_mbox_request_channel(&cppc_mbox_cl, ··· 419 335 */ 420 336 comm_base_addr = cppc_ss->base_address; 421 337 len = cppc_ss->length; 422 - pcc_cmd_delay = cppc_ss->min_turnaround_time; 338 + 339 + /* 340 + * cppc_ss->latency is just a Nominal value. In reality 341 + * the remote processor could be much slower to reply. 342 + * So add an arbitrary amount of wait on top of Nominal. 343 + */ 344 + usecs_lat = NUM_RETRIES * cppc_ss->latency; 345 + deadline = ns_to_ktime(usecs_lat * NSEC_PER_USEC); 346 + pcc_mrtt = cppc_ss->min_turnaround_time; 347 + pcc_mpar = cppc_ss->max_access_rate; 423 348 424 349 pcc_comm_addr = acpi_os_ioremap(comm_base_addr, len); 425 350 if (!pcc_comm_addr) { ··· 639 546 } 640 547 EXPORT_SYMBOL_GPL(acpi_cppc_processor_exit); 641 548 642 - static u64 get_phys_addr(struct cpc_reg *reg) 549 + /* 550 + * Since cpc_read and cpc_write are called while holding pcc_lock, it should be 551 + * as fast as possible. We have already mapped the PCC subspace during init, so 552 + * we can directly write to it. 553 + */ 554 + 555 + static int cpc_read(struct cpc_reg *reg, u64 *val) 643 556 { 644 - /* PCC communication addr space begins at byte offset 0x8. */ 645 - if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) 646 - return (u64)comm_base_addr + 0x8 + reg->address; 647 - else 648 - return reg->address; 557 + int ret_val = 0; 558 + 559 + *val = 0; 560 + if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { 561 + void __iomem *vaddr = GET_PCC_VADDR(reg->address); 562 + 563 + switch (reg->bit_width) { 564 + case 8: 565 + *val = readb_relaxed(vaddr); 566 + break; 567 + case 16: 568 + *val = readw_relaxed(vaddr); 569 + break; 570 + case 32: 571 + *val = readl_relaxed(vaddr); 572 + break; 573 + case 64: 574 + *val = readq_relaxed(vaddr); 575 + break; 576 + default: 577 + pr_debug("Error: Cannot read %u bit width from PCC\n", 578 + reg->bit_width); 579 + ret_val = -EFAULT; 580 + } 581 + } else 582 + ret_val = acpi_os_read_memory((acpi_physical_address)reg->address, 583 + val, reg->bit_width); 584 + return ret_val; 649 585 } 650 586 651 - static void cpc_read(struct cpc_reg *reg, u64 *val) 587 + static int cpc_write(struct cpc_reg *reg, u64 val) 652 588 { 653 - u64 addr = get_phys_addr(reg); 589 + int ret_val = 0; 654 590 655 - acpi_os_read_memory((acpi_physical_address)addr, 656 - val, reg->bit_width); 657 - } 591 + if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { 592 + void __iomem *vaddr = GET_PCC_VADDR(reg->address); 658 593 659 - static void cpc_write(struct cpc_reg *reg, u64 val) 660 - { 661 - u64 addr = get_phys_addr(reg); 662 - 663 - acpi_os_write_memory((acpi_physical_address)addr, 664 - val, reg->bit_width); 594 + switch (reg->bit_width) { 595 + case 8: 596 + writeb_relaxed(val, vaddr); 597 + break; 598 + case 16: 599 + writew_relaxed(val, vaddr); 600 + break; 601 + case 32: 602 + writel_relaxed(val, vaddr); 603 + break; 604 + case 64: 605 + writeq_relaxed(val, vaddr); 606 + break; 607 + default: 608 + pr_debug("Error: Cannot write %u bit width to PCC\n", 609 + reg->bit_width); 610 + ret_val = -EFAULT; 611 + break; 612 + } 613 + } else 614 + ret_val = acpi_os_write_memory((acpi_physical_address)reg->address, 615 + val, reg->bit_width); 616 + return ret_val; 665 617 } 666 618 667 619 /** ··· 742 604 (ref_perf->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) || 743 605 (nom_perf->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM)) { 744 606 /* Ring doorbell once to update PCC subspace */ 745 - if (send_pcc_cmd(CMD_READ)) { 607 + if (send_pcc_cmd(CMD_READ) < 0) { 746 608 ret = -EIO; 747 609 goto out_err; 748 610 } ··· 800 662 if ((delivered_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) || 801 663 (reference_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM)) { 802 664 /* Ring doorbell once to update PCC subspace */ 803 - if (send_pcc_cmd(CMD_READ)) { 665 + if (send_pcc_cmd(CMD_READ) < 0) { 804 666 ret = -EIO; 805 667 goto out_err; 806 668 } ··· 851 713 852 714 spin_lock(&pcc_lock); 853 715 716 + /* If this is PCC reg, check if channel is free before writing */ 717 + if (desired_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { 718 + ret = check_pcc_chan(); 719 + if (ret) 720 + goto busy_channel; 721 + } 722 + 854 723 /* 855 724 * Skip writing MIN/MAX until Linux knows how to come up with 856 725 * useful values. ··· 867 722 /* Is this a PCC reg ?*/ 868 723 if (desired_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { 869 724 /* Ring doorbell so Remote can get our perf request. */ 870 - if (send_pcc_cmd(CMD_WRITE)) 725 + if (send_pcc_cmd(CMD_WRITE) < 0) 871 726 ret = -EIO; 872 727 } 873 - 728 + busy_channel: 874 729 spin_unlock(&pcc_lock); 875 730 876 731 return ret;

+3

drivers/acpi/ec_sys.c

··· 73 73 loff_t init_off = *off; 74 74 int err = 0; 75 75 76 + if (!write_support) 77 + return -EINVAL; 78 + 76 79 if (*off >= EC_SPACE_SIZE) 77 80 return 0; 78 81 if (*off + count >= EC_SPACE_SIZE) {

+1 -1

drivers/acpi/fan.c

··· 46 46 #ifdef CONFIG_PM_SLEEP 47 47 static int acpi_fan_suspend(struct device *dev); 48 48 static int acpi_fan_resume(struct device *dev); 49 - static struct dev_pm_ops acpi_fan_pm = { 49 + static const struct dev_pm_ops acpi_fan_pm = { 50 50 .resume = acpi_fan_resume, 51 51 .freeze = acpi_fan_suspend, 52 52 .thaw = acpi_fan_resume,

+7

drivers/acpi/internal.h

··· 20 20 21 21 #define PREFIX "ACPI: " 22 22 23 + void acpi_initrd_initialize_tables(void); 23 24 acpi_status acpi_os_initialize1(void); 24 25 void init_acpi_device_notify(void); 25 26 int acpi_scan_init(void); ··· 30 29 void acpi_platform_init(void); 31 30 void acpi_pnp_init(void); 32 31 void acpi_int340x_thermal_init(void); 32 + #ifdef CONFIG_ARM_AMBA 33 + void acpi_amba_init(void); 34 + #else 35 + static inline void acpi_amba_init(void) {} 36 + #endif 33 37 int acpi_sysfs_init(void); 34 38 void acpi_container_init(void); 35 39 void acpi_memory_hotplug_init(void); ··· 112 106 bool acpi_device_is_battery(struct acpi_device *adev); 113 107 bool acpi_device_is_first_physical_node(struct acpi_device *adev, 114 108 const struct device *dev); 109 + struct device *acpi_get_first_physical_node(struct acpi_device *adev); 115 110 116 111 /* -------------------------------------------------------------------------- 117 112 Device Matching and Notification

+113 -75

drivers/acpi/osl.c

··· 602 602 return AE_OK; 603 603 } 604 604 605 + static void acpi_table_taint(struct acpi_table_header *table) 606 + { 607 + pr_warn(PREFIX 608 + "Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n", 609 + table->signature, table->oem_table_id); 610 + add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE); 611 + } 612 + 605 613 #ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE 606 614 #include <linux/earlycpio.h> 607 615 #include <linux/memblock.h> ··· 644 636 645 637 #define ACPI_OVERRIDE_TABLES 64 646 638 static struct cpio_data __initdata acpi_initrd_files[ACPI_OVERRIDE_TABLES]; 639 + static DECLARE_BITMAP(acpi_initrd_installed, ACPI_OVERRIDE_TABLES); 647 640 648 641 #define MAP_CHUNK_SIZE (NR_FIX_BTMAPS << PAGE_SHIFT) 649 642 ··· 755 746 } 756 747 } 757 748 } 758 - #endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */ 759 - 760 - static void acpi_table_taint(struct acpi_table_header *table) 761 - { 762 - pr_warn(PREFIX 763 - "Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n", 764 - table->signature, table->oem_table_id); 765 - add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE); 766 - } 767 - 768 749 769 750 acpi_status 770 - acpi_os_table_override(struct acpi_table_header * existing_table, 771 - struct acpi_table_header ** new_table) 751 + acpi_os_physical_table_override(struct acpi_table_header *existing_table, 752 + acpi_physical_address *address, u32 *length) 753 + { 754 + int table_offset = 0; 755 + int table_index = 0; 756 + struct acpi_table_header *table; 757 + u32 table_length; 758 + 759 + *length = 0; 760 + *address = 0; 761 + if (!acpi_tables_addr) 762 + return AE_OK; 763 + 764 + while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) { 765 + table = acpi_os_map_memory(acpi_tables_addr + table_offset, 766 + ACPI_HEADER_SIZE); 767 + if (table_offset + table->length > all_tables_size) { 768 + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); 769 + WARN_ON(1); 770 + return AE_OK; 771 + } 772 + 773 + table_length = table->length; 774 + 775 + /* Only override tables matched */ 776 + if (test_bit(table_index, acpi_initrd_installed) || 777 + memcmp(existing_table->signature, table->signature, 4) || 778 + memcmp(table->oem_table_id, existing_table->oem_table_id, 779 + ACPI_OEM_TABLE_ID_SIZE)) { 780 + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); 781 + goto next_table; 782 + } 783 + 784 + *length = table_length; 785 + *address = acpi_tables_addr + table_offset; 786 + acpi_table_taint(existing_table); 787 + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); 788 + set_bit(table_index, acpi_initrd_installed); 789 + break; 790 + 791 + next_table: 792 + table_offset += table_length; 793 + table_index++; 794 + } 795 + return AE_OK; 796 + } 797 + 798 + void __init acpi_initrd_initialize_tables(void) 799 + { 800 + int table_offset = 0; 801 + int table_index = 0; 802 + u32 table_length; 803 + struct acpi_table_header *table; 804 + 805 + if (!acpi_tables_addr) 806 + return; 807 + 808 + while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) { 809 + table = acpi_os_map_memory(acpi_tables_addr + table_offset, 810 + ACPI_HEADER_SIZE); 811 + if (table_offset + table->length > all_tables_size) { 812 + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); 813 + WARN_ON(1); 814 + return; 815 + } 816 + 817 + table_length = table->length; 818 + 819 + /* Skip RSDT/XSDT which should only be used for override */ 820 + if (test_bit(table_index, acpi_initrd_installed) || 821 + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_RSDT) || 822 + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_XSDT)) { 823 + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); 824 + goto next_table; 825 + } 826 + 827 + acpi_table_taint(table); 828 + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); 829 + acpi_install_table(acpi_tables_addr + table_offset, TRUE); 830 + set_bit(table_index, acpi_initrd_installed); 831 + next_table: 832 + table_offset += table_length; 833 + table_index++; 834 + } 835 + } 836 + #else 837 + acpi_status 838 + acpi_os_physical_table_override(struct acpi_table_header *existing_table, 839 + acpi_physical_address *address, 840 + u32 *table_length) 841 + { 842 + *table_length = 0; 843 + *address = 0; 844 + return AE_OK; 845 + } 846 + 847 + void __init acpi_initrd_initialize_tables(void) 848 + { 849 + } 850 + #endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */ 851 + 852 + acpi_status 853 + acpi_os_table_override(struct acpi_table_header *existing_table, 854 + struct acpi_table_header **new_table) 772 855 { 773 856 if (!existing_table || !new_table) 774 857 return AE_BAD_PARAMETER; ··· 874 773 if (*new_table != NULL) 875 774 acpi_table_taint(existing_table); 876 775 return AE_OK; 877 - } 878 - 879 - acpi_status 880 - acpi_os_physical_table_override(struct acpi_table_header *existing_table, 881 - acpi_physical_address *address, 882 - u32 *table_length) 883 - { 884 - #ifndef CONFIG_ACPI_INITRD_TABLE_OVERRIDE 885 - *table_length = 0; 886 - *address = 0; 887 - return AE_OK; 888 - #else 889 - int table_offset = 0; 890 - struct acpi_table_header *table; 891 - 892 - *table_length = 0; 893 - *address = 0; 894 - 895 - if (!acpi_tables_addr) 896 - return AE_OK; 897 - 898 - do { 899 - if (table_offset + ACPI_HEADER_SIZE > all_tables_size) { 900 - WARN_ON(1); 901 - return AE_OK; 902 - } 903 - 904 - table = acpi_os_map_memory(acpi_tables_addr + table_offset, 905 - ACPI_HEADER_SIZE); 906 - 907 - if (table_offset + table->length > all_tables_size) { 908 - acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); 909 - WARN_ON(1); 910 - return AE_OK; 911 - } 912 - 913 - table_offset += table->length; 914 - 915 - if (memcmp(existing_table->signature, table->signature, 4)) { 916 - acpi_os_unmap_memory(table, 917 - ACPI_HEADER_SIZE); 918 - continue; 919 - } 920 - 921 - /* Only override tables with matching oem id */ 922 - if (memcmp(table->oem_table_id, existing_table->oem_table_id, 923 - ACPI_OEM_TABLE_ID_SIZE)) { 924 - acpi_os_unmap_memory(table, 925 - ACPI_HEADER_SIZE); 926 - continue; 927 - } 928 - 929 - table_offset -= table->length; 930 - *table_length = table->length; 931 - acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); 932 - *address = acpi_tables_addr + table_offset; 933 - break; 934 - } while (table_offset + ACPI_HEADER_SIZE < all_tables_size); 935 - 936 - if (*address != 0) 937 - acpi_table_taint(existing_table); 938 - return AE_OK; 939 - #endif 940 776 } 941 777 942 778 static irqreturn_t acpi_irq(int irq, void *dev_id)

+25 -4

drivers/acpi/pci_irq.c

··· 33 33 #include <linux/pci.h> 34 34 #include <linux/acpi.h> 35 35 #include <linux/slab.h> 36 + #include <linux/interrupt.h> 36 37 37 38 #define PREFIX "ACPI: " 38 39 ··· 388 387 } 389 388 #endif 390 389 390 + static inline bool acpi_pci_irq_valid(struct pci_dev *dev, u8 pin) 391 + { 392 + #ifdef CONFIG_X86 393 + /* 394 + * On x86 irq line 0xff means "unknown" or "no connection" 395 + * (PCI 3.0, Section 6.2.4, footnote on page 223). 396 + */ 397 + if (dev->irq == 0xff) { 398 + dev->irq = IRQ_NOTCONNECTED; 399 + dev_warn(&dev->dev, "PCI INT %c: not connected\n", 400 + pin_name(pin)); 401 + return false; 402 + } 403 + #endif 404 + return true; 405 + } 406 + 391 407 int acpi_pci_irq_enable(struct pci_dev *dev) 392 408 { 393 409 struct acpi_prt_entry *entry; ··· 449 431 } else 450 432 gsi = -1; 451 433 452 - /* 453 - * No IRQ known to the ACPI subsystem - maybe the BIOS / 454 - * driver reported one, then use it. Exit in any case. 455 - */ 456 434 if (gsi < 0) { 435 + /* 436 + * No IRQ known to the ACPI subsystem - maybe the BIOS / 437 + * driver reported one, then use it. Exit in any case. 438 + */ 439 + if (!acpi_pci_irq_valid(dev, pin)) 440 + return 0; 441 + 457 442 if (acpi_isa_register_gsi(dev)) 458 443 dev_warn(&dev->dev, "PCI INT %c: no GSI\n", 459 444 pin_name(pin));

+2 -5

drivers/acpi/pmic/intel_pmic_crc.c

··· 13 13 * GNU General Public License for more details. 14 14 */ 15 15 16 - #include <linux/module.h> 16 + #include <linux/init.h> 17 17 #include <linux/acpi.h> 18 18 #include <linux/mfd/intel_soc_pmic.h> 19 19 #include <linux/regmap.h> ··· 205 205 { 206 206 return platform_driver_register(&intel_crc_pmic_opregion_driver); 207 207 } 208 - module_init(intel_crc_pmic_opregion_driver_init); 209 - 210 - MODULE_DESCRIPTION("CrystalCove ACPI operation region driver"); 211 - MODULE_LICENSE("GPL"); 208 + device_initcall(intel_crc_pmic_opregion_driver_init);

-2

drivers/acpi/processor_driver.c

··· 314 314 if (result < 0) 315 315 return result; 316 316 317 - acpi_processor_syscore_init(); 318 317 register_hotcpu_notifier(&acpi_cpu_notifier); 319 318 acpi_thermal_cpufreq_init(); 320 319 acpi_processor_ppc_init(); ··· 329 330 acpi_processor_ppc_exit(); 330 331 acpi_thermal_cpufreq_exit(); 331 332 unregister_hotcpu_notifier(&acpi_cpu_notifier); 332 - acpi_processor_syscore_exit(); 333 333 driver_unregister(&acpi_processor_driver); 334 334 } 335 335

+11 -51

drivers/acpi/processor_idle.c

··· 23 23 * 24 24 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 25 25 */ 26 + #define pr_fmt(fmt) "ACPI: " fmt 26 27 27 28 #include <linux/module.h> 28 29 #include <linux/acpi.h> ··· 31 30 #include <linux/sched.h> /* need_resched() */ 32 31 #include <linux/tick.h> 33 32 #include <linux/cpuidle.h> 34 - #include <linux/syscore_ops.h> 35 33 #include <acpi/processor.h> 36 34 37 35 /* ··· 42 42 #ifdef CONFIG_X86 43 43 #include <asm/apic.h> 44 44 #endif 45 - 46 - #define PREFIX "ACPI: " 47 45 48 46 #define ACPI_PROCESSOR_CLASS "processor" 49 47 #define _COMPONENT ACPI_PROCESSOR_COMPONENT ··· 79 81 if (max_cstate > ACPI_PROCESSOR_MAX_POWER) 80 82 return 0; 81 83 82 - printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate." 83 - " Override with \"processor.max_cstate=%d\"\n", id->ident, 84 - (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1); 84 + pr_notice("%s detected - limiting to C%ld max_cstate." 85 + " Override with \"processor.max_cstate=%d\"\n", id->ident, 86 + (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1); 85 87 86 88 max_cstate = (long)id->driver_data; 87 89 ··· 191 193 } 192 194 193 195 #endif 194 - 195 - #ifdef CONFIG_PM_SLEEP 196 - static u32 saved_bm_rld; 197 - 198 - static int acpi_processor_suspend(void) 199 - { 200 - acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld); 201 - return 0; 202 - } 203 - 204 - static void acpi_processor_resume(void) 205 - { 206 - u32 resumed_bm_rld = 0; 207 - 208 - acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld); 209 - if (resumed_bm_rld == saved_bm_rld) 210 - return; 211 - 212 - acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld); 213 - } 214 - 215 - static struct syscore_ops acpi_processor_syscore_ops = { 216 - .suspend = acpi_processor_suspend, 217 - .resume = acpi_processor_resume, 218 - }; 219 - 220 - void acpi_processor_syscore_init(void) 221 - { 222 - register_syscore_ops(&acpi_processor_syscore_ops); 223 - } 224 - 225 - void acpi_processor_syscore_exit(void) 226 - { 227 - unregister_syscore_ops(&acpi_processor_syscore_ops); 228 - } 229 - #endif /* CONFIG_PM_SLEEP */ 230 196 231 197 #if defined(CONFIG_X86) 232 198 static void tsc_check_state(int state) ··· 313 351 314 352 /* There must be at least 2 elements */ 315 353 if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { 316 - printk(KERN_ERR PREFIX "not enough elements in _CST\n"); 354 + pr_err("not enough elements in _CST\n"); 317 355 ret = -EFAULT; 318 356 goto end; 319 357 } ··· 322 360 323 361 /* Validate number of power states. */ 324 362 if (count < 1 || count != cst->package.count - 1) { 325 - printk(KERN_ERR PREFIX "count given by _CST is not valid\n"); 363 + pr_err("count given by _CST is not valid\n"); 326 364 ret = -EFAULT; 327 365 goto end; 328 366 } ··· 431 469 * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1) 432 470 */ 433 471 if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) { 434 - printk(KERN_WARNING 435 - "Limiting number of power states to max (%d)\n", 436 - ACPI_PROCESSOR_MAX_POWER); 437 - printk(KERN_WARNING 438 - "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); 472 + pr_warn("Limiting number of power states to max (%d)\n", 473 + ACPI_PROCESSOR_MAX_POWER); 474 + pr_warn("Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); 439 475 break; 440 476 } 441 477 } ··· 1057 1097 retval = cpuidle_register_driver(&acpi_idle_driver); 1058 1098 if (retval) 1059 1099 return retval; 1060 - printk(KERN_DEBUG "ACPI: %s registered with cpuidle\n", 1061 - acpi_idle_driver.name); 1100 + pr_debug("%s registered with cpuidle\n", 1101 + acpi_idle_driver.name); 1062 1102 } 1063 1103 1064 1104 dev = kzalloc(sizeof(*dev), GFP_KERNEL);

+1

drivers/acpi/scan.c

··· 1930 1930 acpi_memory_hotplug_init(); 1931 1931 acpi_pnp_init(); 1932 1932 acpi_int340x_thermal_init(); 1933 + acpi_amba_init(); 1933 1934 1934 1935 acpi_scan_add_handler(&generic_device_handler); 1935 1936

+35

drivers/acpi/sleep.c

··· 19 19 #include <linux/reboot.h> 20 20 #include <linux/acpi.h> 21 21 #include <linux/module.h> 22 + #include <linux/syscore_ops.h> 22 23 #include <asm/io.h> 23 24 #include <trace/events/power.h> 24 25 ··· 678 677 static inline void acpi_sleep_suspend_setup(void) {} 679 678 #endif /* !CONFIG_SUSPEND */ 680 679 680 + #ifdef CONFIG_PM_SLEEP 681 + static u32 saved_bm_rld; 682 + 683 + static int acpi_save_bm_rld(void) 684 + { 685 + acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld); 686 + return 0; 687 + } 688 + 689 + static void acpi_restore_bm_rld(void) 690 + { 691 + u32 resumed_bm_rld = 0; 692 + 693 + acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld); 694 + if (resumed_bm_rld == saved_bm_rld) 695 + return; 696 + 697 + acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld); 698 + } 699 + 700 + static struct syscore_ops acpi_sleep_syscore_ops = { 701 + .suspend = acpi_save_bm_rld, 702 + .resume = acpi_restore_bm_rld, 703 + }; 704 + 705 + void acpi_sleep_syscore_init(void) 706 + { 707 + register_syscore_ops(&acpi_sleep_syscore_ops); 708 + } 709 + #else 710 + static inline void acpi_sleep_syscore_init(void) {} 711 + #endif /* CONFIG_PM_SLEEP */ 712 + 681 713 #ifdef CONFIG_HIBERNATION 682 714 static unsigned long s4_hardware_signature; 683 715 static struct acpi_table_facs *facs; ··· 873 839 874 840 sleep_states[ACPI_STATE_S0] = 1; 875 841 842 + acpi_sleep_syscore_init(); 876 843 acpi_sleep_suspend_setup(); 877 844 acpi_sleep_hibernate_setup(); 878 845

+12

drivers/acpi/tables.c

··· 32 32 #include <linux/errno.h> 33 33 #include <linux/acpi.h> 34 34 #include <linux/bootmem.h> 35 + #include "internal.h" 35 36 36 37 #define ACPI_MAX_TABLES 128 37 38 ··· 457 456 status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0); 458 457 if (ACPI_FAILURE(status)) 459 458 return -EINVAL; 459 + acpi_initrd_initialize_tables(); 460 460 461 461 check_multiple_madt(); 462 462 return 0; ··· 486 484 } 487 485 488 486 early_param("acpi_force_table_verification", acpi_force_table_verification_setup); 487 + 488 + static int __init acpi_force_32bit_fadt_addr(char *s) 489 + { 490 + pr_info("Forcing 32 Bit FADT addresses\n"); 491 + acpi_gbl_use32_bit_fadt_addresses = TRUE; 492 + 493 + return 0; 494 + } 495 + 496 + early_param("acpi_force_32bit_fadt_addr", acpi_force_32bit_fadt_addr);

-4

drivers/acpi/utils.c

··· 201 201 u8 **pointer = NULL; 202 202 union acpi_object *element = &(package->package.elements[i]); 203 203 204 - if (!element) { 205 - return AE_BAD_DATA; 206 - } 207 - 208 204 switch (element->type) { 209 205 210 206 case ACPI_TYPE_INTEGER:

+52 -8

drivers/base/power/domain.c

··· 104 104 105 105 static int genpd_power_on(struct generic_pm_domain *genpd, bool timed) 106 106 { 107 + unsigned int state_idx = genpd->state_idx; 107 108 ktime_t time_start; 108 109 s64 elapsed_ns; 109 110 int ret; ··· 121 120 return ret; 122 121 123 122 elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); 124 - if (elapsed_ns <= genpd->power_on_latency_ns) 123 + if (elapsed_ns <= genpd->states[state_idx].power_on_latency_ns) 125 124 return ret; 126 125 127 - genpd->power_on_latency_ns = elapsed_ns; 126 + genpd->states[state_idx].power_on_latency_ns = elapsed_ns; 128 127 genpd->max_off_time_changed = true; 129 128 pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n", 130 129 genpd->name, "on", elapsed_ns); ··· 134 133 135 134 static int genpd_power_off(struct generic_pm_domain *genpd, bool timed) 136 135 { 136 + unsigned int state_idx = genpd->state_idx; 137 137 ktime_t time_start; 138 138 s64 elapsed_ns; 139 139 int ret; ··· 151 149 return ret; 152 150 153 151 elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); 154 - if (elapsed_ns <= genpd->power_off_latency_ns) 152 + if (elapsed_ns <= genpd->states[state_idx].power_off_latency_ns) 155 153 return ret; 156 154 157 - genpd->power_off_latency_ns = elapsed_ns; 155 + genpd->states[state_idx].power_off_latency_ns = elapsed_ns; 158 156 genpd->max_off_time_changed = true; 159 157 pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n", 160 158 genpd->name, "off", elapsed_ns); ··· 487 485 if (timed && runtime_pm) 488 486 time_start = ktime_get(); 489 487 490 - genpd_start_dev(genpd, dev); 491 - genpd_restore_dev(genpd, dev); 488 + ret = genpd_start_dev(genpd, dev); 489 + if (ret) 490 + goto err_poweroff; 491 + 492 + ret = genpd_restore_dev(genpd, dev); 493 + if (ret) 494 + goto err_stop; 492 495 493 496 /* Update resume latency value if the measured time exceeds it. */ 494 497 if (timed && runtime_pm) { ··· 508 501 } 509 502 510 503 return 0; 504 + 505 + err_stop: 506 + genpd_stop_dev(genpd, dev); 507 + err_poweroff: 508 + if (!dev->power.irq_safe) { 509 + mutex_lock(&genpd->lock); 510 + genpd_poweroff(genpd, 0); 511 + mutex_unlock(&genpd->lock); 512 + } 513 + 514 + return ret; 511 515 } 512 516 513 517 static bool pd_ignore_unused; ··· 603 585 || atomic_read(&genpd->sd_count) > 0) 604 586 return; 605 587 588 + /* Choose the deepest state when suspending */ 589 + genpd->state_idx = genpd->state_count - 1; 606 590 genpd_power_off(genpd, timed); 607 591 608 592 genpd->status = GPD_STATE_POWER_OFF; ··· 1398 1378 mutex_lock(&subdomain->lock); 1399 1379 mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING); 1400 1380 1401 - if (!list_empty(&subdomain->slave_links) || subdomain->device_count) { 1381 + if (!list_empty(&subdomain->master_links) || subdomain->device_count) { 1402 1382 pr_warn("%s: unable to remove subdomain %s\n", genpd->name, 1403 1383 subdomain->name); 1404 1384 ret = -EBUSY; ··· 1527 1507 genpd->dev_ops.stop = pm_clk_suspend; 1528 1508 genpd->dev_ops.start = pm_clk_resume; 1529 1509 } 1510 + 1511 + if (genpd->state_idx >= GENPD_MAX_NUM_STATES) { 1512 + pr_warn("Initial state index out of bounds.\n"); 1513 + genpd->state_idx = GENPD_MAX_NUM_STATES - 1; 1514 + } 1515 + 1516 + if (genpd->state_count > GENPD_MAX_NUM_STATES) { 1517 + pr_warn("Limiting states to %d\n", GENPD_MAX_NUM_STATES); 1518 + genpd->state_count = GENPD_MAX_NUM_STATES; 1519 + } 1520 + 1521 + /* Use only one "off" state if there were no states declared */ 1522 + if (genpd->state_count == 0) 1523 + genpd->state_count = 1; 1530 1524 1531 1525 mutex_lock(&gpd_list_lock); 1532 1526 list_add(&genpd->gpd_list_node, &gpd_list); ··· 1701 1667 { 1702 1668 struct generic_pm_domain *genpd = ERR_PTR(-ENOENT); 1703 1669 struct of_genpd_provider *provider; 1670 + 1671 + if (!genpdspec) 1672 + return ERR_PTR(-EINVAL); 1704 1673 1705 1674 mutex_lock(&of_genpd_mutex); 1706 1675 ··· 1901 1864 struct pm_domain_data *pm_data; 1902 1865 const char *kobj_path; 1903 1866 struct gpd_link *link; 1867 + char state[16]; 1904 1868 int ret; 1905 1869 1906 1870 ret = mutex_lock_interruptible(&genpd->lock); ··· 1910 1872 1911 1873 if (WARN_ON(genpd->status >= ARRAY_SIZE(status_lookup))) 1912 1874 goto exit; 1913 - seq_printf(s, "%-30s %-15s ", genpd->name, status_lookup[genpd->status]); 1875 + if (genpd->status == GPD_STATE_POWER_OFF) 1876 + snprintf(state, sizeof(state), "%s-%u", 1877 + status_lookup[genpd->status], genpd->state_idx); 1878 + else 1879 + snprintf(state, sizeof(state), "%s", 1880 + status_lookup[genpd->status]); 1881 + seq_printf(s, "%-30s %-15s ", genpd->name, state); 1914 1882 1915 1883 /* 1916 1884 * Modifications on the list require holding locks on both

+40 -24

drivers/base/power/domain_governor.c

··· 98 98 * 99 99 * This routine must be executed under the PM domain's lock. 100 100 */ 101 - static bool default_power_down_ok(struct dev_pm_domain *pd) 101 + static bool __default_power_down_ok(struct dev_pm_domain *pd, 102 + unsigned int state) 102 103 { 103 104 struct generic_pm_domain *genpd = pd_to_genpd(pd); 104 105 struct gpd_link *link; ··· 107 106 s64 min_off_time_ns; 108 107 s64 off_on_time_ns; 109 108 110 - if (genpd->max_off_time_changed) { 111 - struct gpd_link *link; 109 + off_on_time_ns = genpd->states[state].power_off_latency_ns + 110 + genpd->states[state].power_on_latency_ns; 112 111 113 - /* 114 - * We have to invalidate the cached results for the masters, so 115 - * use the observation that default_power_down_ok() is not 116 - * going to be called for any master until this instance 117 - * returns. 118 - */ 119 - list_for_each_entry(link, &genpd->slave_links, slave_node) 120 - link->master->max_off_time_changed = true; 121 - 122 - genpd->max_off_time_changed = false; 123 - genpd->cached_power_down_ok = false; 124 - genpd->max_off_time_ns = -1; 125 - } else { 126 - return genpd->cached_power_down_ok; 127 - } 128 - 129 - off_on_time_ns = genpd->power_off_latency_ns + 130 - genpd->power_on_latency_ns; 131 112 132 113 min_off_time_ns = -1; 133 114 /* ··· 169 186 min_off_time_ns = constraint_ns; 170 187 } 171 188 172 - genpd->cached_power_down_ok = true; 173 - 174 189 /* 175 190 * If the computed minimum device off time is negative, there are no 176 191 * latency constraints, so the domain can spend arbitrary time in the ··· 182 201 * time and the time needed to turn the domain on is the maximum 183 202 * theoretical time this domain can spend in the "off" state. 184 203 */ 185 - genpd->max_off_time_ns = min_off_time_ns - genpd->power_on_latency_ns; 204 + genpd->max_off_time_ns = min_off_time_ns - 205 + genpd->states[state].power_on_latency_ns; 186 206 return true; 207 + } 208 + 209 + static bool default_power_down_ok(struct dev_pm_domain *pd) 210 + { 211 + struct generic_pm_domain *genpd = pd_to_genpd(pd); 212 + struct gpd_link *link; 213 + 214 + if (!genpd->max_off_time_changed) 215 + return genpd->cached_power_down_ok; 216 + 217 + /* 218 + * We have to invalidate the cached results for the masters, so 219 + * use the observation that default_power_down_ok() is not 220 + * going to be called for any master until this instance 221 + * returns. 222 + */ 223 + list_for_each_entry(link, &genpd->slave_links, slave_node) 224 + link->master->max_off_time_changed = true; 225 + 226 + genpd->max_off_time_ns = -1; 227 + genpd->max_off_time_changed = false; 228 + genpd->cached_power_down_ok = true; 229 + genpd->state_idx = genpd->state_count - 1; 230 + 231 + /* Find a state to power down to, starting from the deepest. */ 232 + while (!__default_power_down_ok(pd, genpd->state_idx)) { 233 + if (genpd->state_idx == 0) { 234 + genpd->cached_power_down_ok = false; 235 + break; 236 + } 237 + genpd->state_idx--; 238 + } 239 + 240 + return genpd->cached_power_down_ok; 187 241 } 188 242 189 243 static bool always_on_power_down_ok(struct dev_pm_domain *domain)

+760 -325

drivers/base/power/opp/core.c

··· 13 13 14 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 15 16 + #include <linux/clk.h> 16 17 #include <linux/errno.h> 17 18 #include <linux/err.h> 18 19 #include <linux/slab.h> 19 20 #include <linux/device.h> 20 21 #include <linux/of.h> 21 22 #include <linux/export.h> 23 + #include <linux/regulator/consumer.h> 22 24 23 25 #include "opp.h" 24 26 25 27 /* 26 - * The root of the list of all devices. All device_opp structures branch off 27 - * from here, with each device_opp containing the list of opp it supports in 28 + * The root of the list of all opp-tables. All opp_table structures branch off 29 + * from here, with each opp_table containing the list of opps it supports in 28 30 * various states of availability. 29 31 */ 30 - static LIST_HEAD(dev_opp_list); 32 + static LIST_HEAD(opp_tables); 31 33 /* Lock to allow exclusive modification to the device and opp lists */ 32 - DEFINE_MUTEX(dev_opp_list_lock); 34 + DEFINE_MUTEX(opp_table_lock); 33 35 34 36 #define opp_rcu_lockdep_assert() \ 35 37 do { \ 36 38 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ 37 - !lockdep_is_held(&dev_opp_list_lock), \ 38 - "Missing rcu_read_lock() or " \ 39 - "dev_opp_list_lock protection"); \ 39 + !lockdep_is_held(&opp_table_lock), \ 40 + "Missing rcu_read_lock() or " \ 41 + "opp_table_lock protection"); \ 40 42 } while (0) 41 43 42 - static struct device_list_opp *_find_list_dev(const struct device *dev, 43 - struct device_opp *dev_opp) 44 + static struct opp_device *_find_opp_dev(const struct device *dev, 45 + struct opp_table *opp_table) 44 46 { 45 - struct device_list_opp *list_dev; 47 + struct opp_device *opp_dev; 46 48 47 - list_for_each_entry(list_dev, &dev_opp->dev_list, node) 48 - if (list_dev->dev == dev) 49 - return list_dev; 49 + list_for_each_entry(opp_dev, &opp_table->dev_list, node) 50 + if (opp_dev->dev == dev) 51 + return opp_dev; 50 52 51 53 return NULL; 52 54 } 53 55 54 - static struct device_opp *_managed_opp(const struct device_node *np) 56 + static struct opp_table *_managed_opp(const struct device_node *np) 55 57 { 56 - struct device_opp *dev_opp; 58 + struct opp_table *opp_table; 57 59 58 - list_for_each_entry_rcu(dev_opp, &dev_opp_list, node) { 59 - if (dev_opp->np == np) { 60 + list_for_each_entry_rcu(opp_table, &opp_tables, node) { 61 + if (opp_table->np == np) { 60 62 /* 61 63 * Multiple devices can point to the same OPP table and 62 64 * so will have same node-pointer, np. ··· 66 64 * But the OPPs will be considered as shared only if the 67 65 * OPP table contains a "opp-shared" property. 68 66 */ 69 - return dev_opp->shared_opp ? dev_opp : NULL; 67 + return opp_table->shared_opp ? opp_table : NULL; 70 68 } 71 69 } 72 70 ··· 74 72 } 75 73 76 74 /** 77 - * _find_device_opp() - find device_opp struct using device pointer 78 - * @dev: device pointer used to lookup device OPPs 75 + * _find_opp_table() - find opp_table struct using device pointer 76 + * @dev: device pointer used to lookup OPP table 79 77 * 80 - * Search list of device OPPs for one containing matching device. Does a RCU 81 - * reader operation to grab the pointer needed. 78 + * Search OPP table for one containing matching device. Does a RCU reader 79 + * operation to grab the pointer needed. 82 80 * 83 - * Return: pointer to 'struct device_opp' if found, otherwise -ENODEV or 81 + * Return: pointer to 'struct opp_table' if found, otherwise -ENODEV or 84 82 * -EINVAL based on type of error. 85 83 * 86 84 * Locking: For readers, this function must be called under rcu_read_lock(). 87 - * device_opp is a RCU protected pointer, which means that device_opp is valid 85 + * opp_table is a RCU protected pointer, which means that opp_table is valid 88 86 * as long as we are under RCU lock. 89 87 * 90 - * For Writers, this function must be called with dev_opp_list_lock held. 88 + * For Writers, this function must be called with opp_table_lock held. 91 89 */ 92 - struct device_opp *_find_device_opp(struct device *dev) 90 + struct opp_table *_find_opp_table(struct device *dev) 93 91 { 94 - struct device_opp *dev_opp; 92 + struct opp_table *opp_table; 95 93 96 94 opp_rcu_lockdep_assert(); 97 95 ··· 100 98 return ERR_PTR(-EINVAL); 101 99 } 102 100 103 - list_for_each_entry_rcu(dev_opp, &dev_opp_list, node) 104 - if (_find_list_dev(dev, dev_opp)) 105 - return dev_opp; 101 + list_for_each_entry_rcu(opp_table, &opp_tables, node) 102 + if (_find_opp_dev(dev, opp_table)) 103 + return opp_table; 106 104 107 105 return ERR_PTR(-ENODEV); 108 106 } ··· 215 213 */ 216 214 unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) 217 215 { 218 - struct device_opp *dev_opp; 216 + struct opp_table *opp_table; 219 217 unsigned long clock_latency_ns; 220 218 221 219 rcu_read_lock(); 222 220 223 - dev_opp = _find_device_opp(dev); 224 - if (IS_ERR(dev_opp)) 221 + opp_table = _find_opp_table(dev); 222 + if (IS_ERR(opp_table)) 225 223 clock_latency_ns = 0; 226 224 else 227 - clock_latency_ns = dev_opp->clock_latency_ns_max; 225 + clock_latency_ns = opp_table->clock_latency_ns_max; 228 226 229 227 rcu_read_unlock(); 230 228 return clock_latency_ns; 231 229 } 232 230 EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency); 231 + 232 + /** 233 + * dev_pm_opp_get_max_volt_latency() - Get max voltage latency in nanoseconds 234 + * @dev: device for which we do this operation 235 + * 236 + * Return: This function returns the max voltage latency in nanoseconds. 237 + * 238 + * Locking: This function takes rcu_read_lock(). 239 + */ 240 + unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) 241 + { 242 + struct opp_table *opp_table; 243 + struct dev_pm_opp *opp; 244 + struct regulator *reg; 245 + unsigned long latency_ns = 0; 246 + unsigned long min_uV = ~0, max_uV = 0; 247 + int ret; 248 + 249 + rcu_read_lock(); 250 + 251 + opp_table = _find_opp_table(dev); 252 + if (IS_ERR(opp_table)) { 253 + rcu_read_unlock(); 254 + return 0; 255 + } 256 + 257 + reg = opp_table->regulator; 258 + if (IS_ERR(reg)) { 259 + /* Regulator may not be required for device */ 260 + if (reg) 261 + dev_err(dev, "%s: Invalid regulator (%ld)\n", __func__, 262 + PTR_ERR(reg)); 263 + rcu_read_unlock(); 264 + return 0; 265 + } 266 + 267 + list_for_each_entry_rcu(opp, &opp_table->opp_list, node) { 268 + if (!opp->available) 269 + continue; 270 + 271 + if (opp->u_volt_min < min_uV) 272 + min_uV = opp->u_volt_min; 273 + if (opp->u_volt_max > max_uV) 274 + max_uV = opp->u_volt_max; 275 + } 276 + 277 + rcu_read_unlock(); 278 + 279 + /* 280 + * The caller needs to ensure that opp_table (and hence the regulator) 281 + * isn't freed, while we are executing this routine. 282 + */ 283 + ret = regulator_set_voltage_time(reg, min_uV, max_uV); 284 + if (ret > 0) 285 + latency_ns = ret * 1000; 286 + 287 + return latency_ns; 288 + } 289 + EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_volt_latency); 290 + 291 + /** 292 + * dev_pm_opp_get_max_transition_latency() - Get max transition latency in 293 + * nanoseconds 294 + * @dev: device for which we do this operation 295 + * 296 + * Return: This function returns the max transition latency, in nanoseconds, to 297 + * switch from one OPP to other. 298 + * 299 + * Locking: This function takes rcu_read_lock(). 300 + */ 301 + unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev) 302 + { 303 + return dev_pm_opp_get_max_volt_latency(dev) + 304 + dev_pm_opp_get_max_clock_latency(dev); 305 + } 306 + EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency); 233 307 234 308 /** 235 309 * dev_pm_opp_get_suspend_opp() - Get suspend opp ··· 322 244 */ 323 245 struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) 324 246 { 325 - struct device_opp *dev_opp; 247 + struct opp_table *opp_table; 326 248 327 249 opp_rcu_lockdep_assert(); 328 250 329 - dev_opp = _find_device_opp(dev); 330 - if (IS_ERR(dev_opp) || !dev_opp->suspend_opp || 331 - !dev_opp->suspend_opp->available) 251 + opp_table = _find_opp_table(dev); 252 + if (IS_ERR(opp_table) || !opp_table->suspend_opp || 253 + !opp_table->suspend_opp->available) 332 254 return NULL; 333 255 334 - return dev_opp->suspend_opp; 256 + return opp_table->suspend_opp; 335 257 } 336 258 EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp); 337 259 338 260 /** 339 - * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list 261 + * dev_pm_opp_get_opp_count() - Get number of opps available in the opp table 340 262 * @dev: device for which we do this operation 341 263 * 342 264 * Return: This function returns the number of available opps if there are any, ··· 346 268 */ 347 269 int dev_pm_opp_get_opp_count(struct device *dev) 348 270 { 349 - struct device_opp *dev_opp; 271 + struct opp_table *opp_table; 350 272 struct dev_pm_opp *temp_opp; 351 273 int count = 0; 352 274 353 275 rcu_read_lock(); 354 276 355 - dev_opp = _find_device_opp(dev); 356 - if (IS_ERR(dev_opp)) { 357 - count = PTR_ERR(dev_opp); 358 - dev_err(dev, "%s: device OPP not found (%d)\n", 277 + opp_table = _find_opp_table(dev); 278 + if (IS_ERR(opp_table)) { 279 + count = PTR_ERR(opp_table); 280 + dev_err(dev, "%s: OPP table not found (%d)\n", 359 281 __func__, count); 360 282 goto out_unlock; 361 283 } 362 284 363 - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { 285 + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { 364 286 if (temp_opp->available) 365 287 count++; 366 288 } ··· 377 299 * @freq: frequency to search for 378 300 * @available: true/false - match for available opp 379 301 * 380 - * Return: Searches for exact match in the opp list and returns pointer to the 302 + * Return: Searches for exact match in the opp table and returns pointer to the 381 303 * matching opp if found, else returns ERR_PTR in case of error and should 382 304 * be handled using IS_ERR. Error return values can be: 383 305 * EINVAL: for bad pointer ··· 401 323 unsigned long freq, 402 324 bool available) 403 325 { 404 - struct device_opp *dev_opp; 326 + struct opp_table *opp_table; 405 327 struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); 406 328 407 329 opp_rcu_lockdep_assert(); 408 330 409 - dev_opp = _find_device_opp(dev); 410 - if (IS_ERR(dev_opp)) { 411 - int r = PTR_ERR(dev_opp); 412 - dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r); 331 + opp_table = _find_opp_table(dev); 332 + if (IS_ERR(opp_table)) { 333 + int r = PTR_ERR(opp_table); 334 + 335 + dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r); 413 336 return ERR_PTR(r); 414 337 } 415 338 416 - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { 339 + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { 417 340 if (temp_opp->available == available && 418 341 temp_opp->rate == freq) { 419 342 opp = temp_opp; ··· 450 371 struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, 451 372 unsigned long *freq) 452 373 { 453 - struct device_opp *dev_opp; 374 + struct opp_table *opp_table; 454 375 struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); 455 376 456 377 opp_rcu_lockdep_assert(); ··· 460 381 return ERR_PTR(-EINVAL); 461 382 } 462 383 463 - dev_opp = _find_device_opp(dev); 464 - if (IS_ERR(dev_opp)) 465 - return ERR_CAST(dev_opp); 384 + opp_table = _find_opp_table(dev); 385 + if (IS_ERR(opp_table)) 386 + return ERR_CAST(opp_table); 466 387 467 - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { 388 + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { 468 389 if (temp_opp->available && temp_opp->rate >= *freq) { 469 390 opp = temp_opp; 470 391 *freq = opp->rate; ··· 500 421 struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, 501 422 unsigned long *freq) 502 423 { 503 - struct device_opp *dev_opp; 424 + struct opp_table *opp_table; 504 425 struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); 505 426 506 427 opp_rcu_lockdep_assert(); ··· 510 431 return ERR_PTR(-EINVAL); 511 432 } 512 433 513 - dev_opp = _find_device_opp(dev); 514 - if (IS_ERR(dev_opp)) 515 - return ERR_CAST(dev_opp); 434 + opp_table = _find_opp_table(dev); 435 + if (IS_ERR(opp_table)) 436 + return ERR_CAST(opp_table); 516 437 517 - list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { 438 + list_for_each_entry_rcu(temp_opp, &opp_table->opp_list, node) { 518 439 if (temp_opp->available) { 519 440 /* go to the next node, before choosing prev */ 520 441 if (temp_opp->rate > *freq) ··· 530 451 } 531 452 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); 532 453 533 - /* List-dev Helpers */ 534 - static void _kfree_list_dev_rcu(struct rcu_head *head) 454 + /* 455 + * The caller needs to ensure that opp_table (and hence the clk) isn't freed, 456 + * while clk returned here is used. 457 + */ 458 + static struct clk *_get_opp_clk(struct device *dev) 535 459 { 536 - struct device_list_opp *list_dev; 460 + struct opp_table *opp_table; 461 + struct clk *clk; 537 462 538 - list_dev = container_of(head, struct device_list_opp, rcu_head); 539 - kfree_rcu(list_dev, rcu_head); 463 + rcu_read_lock(); 464 + 465 + opp_table = _find_opp_table(dev); 466 + if (IS_ERR(opp_table)) { 467 + dev_err(dev, "%s: device opp doesn't exist\n", __func__); 468 + clk = ERR_CAST(opp_table); 469 + goto unlock; 470 + } 471 + 472 + clk = opp_table->clk; 473 + if (IS_ERR(clk)) 474 + dev_err(dev, "%s: No clock available for the device\n", 475 + __func__); 476 + 477 + unlock: 478 + rcu_read_unlock(); 479 + return clk; 540 480 } 541 481 542 - static void _remove_list_dev(struct device_list_opp *list_dev, 543 - struct device_opp *dev_opp) 482 + static int _set_opp_voltage(struct device *dev, struct regulator *reg, 483 + unsigned long u_volt, unsigned long u_volt_min, 484 + unsigned long u_volt_max) 544 485 { 545 - opp_debug_unregister(list_dev, dev_opp); 546 - list_del(&list_dev->node); 547 - call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head, 548 - _kfree_list_dev_rcu); 549 - } 550 - 551 - struct device_list_opp *_add_list_dev(const struct device *dev, 552 - struct device_opp *dev_opp) 553 - { 554 - struct device_list_opp *list_dev; 555 486 int ret; 556 487 557 - list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL); 558 - if (!list_dev) 488 + /* Regulator not available for device */ 489 + if (IS_ERR(reg)) { 490 + dev_dbg(dev, "%s: regulator not available: %ld\n", __func__, 491 + PTR_ERR(reg)); 492 + return 0; 493 + } 494 + 495 + dev_dbg(dev, "%s: voltages (mV): %lu %lu %lu\n", __func__, u_volt_min, 496 + u_volt, u_volt_max); 497 + 498 + ret = regulator_set_voltage_triplet(reg, u_volt_min, u_volt, 499 + u_volt_max); 500 + if (ret) 501 + dev_err(dev, "%s: failed to set voltage (%lu %lu %lu mV): %d\n", 502 + __func__, u_volt_min, u_volt, u_volt_max, ret); 503 + 504 + return ret; 505 + } 506 + 507 + /** 508 + * dev_pm_opp_set_rate() - Configure new OPP based on frequency 509 + * @dev: device for which we do this operation 510 + * @target_freq: frequency to achieve 511 + * 512 + * This configures the power-supplies and clock source to the levels specified 513 + * by the OPP corresponding to the target_freq. 514 + * 515 + * Locking: This function takes rcu_read_lock(). 516 + */ 517 + int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) 518 + { 519 + struct opp_table *opp_table; 520 + struct dev_pm_opp *old_opp, *opp; 521 + struct regulator *reg; 522 + struct clk *clk; 523 + unsigned long freq, old_freq; 524 + unsigned long u_volt, u_volt_min, u_volt_max; 525 + unsigned long ou_volt, ou_volt_min, ou_volt_max; 526 + int ret; 527 + 528 + if (unlikely(!target_freq)) { 529 + dev_err(dev, "%s: Invalid target frequency %lu\n", __func__, 530 + target_freq); 531 + return -EINVAL; 532 + } 533 + 534 + clk = _get_opp_clk(dev); 535 + if (IS_ERR(clk)) 536 + return PTR_ERR(clk); 537 + 538 + freq = clk_round_rate(clk, target_freq); 539 + if ((long)freq <= 0) 540 + freq = target_freq; 541 + 542 + old_freq = clk_get_rate(clk); 543 + 544 + /* Return early if nothing to do */ 545 + if (old_freq == freq) { 546 + dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n", 547 + __func__, freq); 548 + return 0; 549 + } 550 + 551 + rcu_read_lock(); 552 + 553 + opp_table = _find_opp_table(dev); 554 + if (IS_ERR(opp_table)) { 555 + dev_err(dev, "%s: device opp doesn't exist\n", __func__); 556 + rcu_read_unlock(); 557 + return PTR_ERR(opp_table); 558 + } 559 + 560 + old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq); 561 + if (!IS_ERR(old_opp)) { 562 + ou_volt = old_opp->u_volt; 563 + ou_volt_min = old_opp->u_volt_min; 564 + ou_volt_max = old_opp->u_volt_max; 565 + } else { 566 + dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n", 567 + __func__, old_freq, PTR_ERR(old_opp)); 568 + } 569 + 570 + opp = dev_pm_opp_find_freq_ceil(dev, &freq); 571 + if (IS_ERR(opp)) { 572 + ret = PTR_ERR(opp); 573 + dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n", 574 + __func__, freq, ret); 575 + rcu_read_unlock(); 576 + return ret; 577 + } 578 + 579 + u_volt = opp->u_volt; 580 + u_volt_min = opp->u_volt_min; 581 + u_volt_max = opp->u_volt_max; 582 + 583 + reg = opp_table->regulator; 584 + 585 + rcu_read_unlock(); 586 + 587 + /* Scaling up? Scale voltage before frequency */ 588 + if (freq > old_freq) { 589 + ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min, 590 + u_volt_max); 591 + if (ret) 592 + goto restore_voltage; 593 + } 594 + 595 + /* Change frequency */ 596 + 597 + dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n", 598 + __func__, old_freq, freq); 599 + 600 + ret = clk_set_rate(clk, freq); 601 + if (ret) { 602 + dev_err(dev, "%s: failed to set clock rate: %d\n", __func__, 603 + ret); 604 + goto restore_voltage; 605 + } 606 + 607 + /* Scaling down? Scale voltage after frequency */ 608 + if (freq < old_freq) { 609 + ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min, 610 + u_volt_max); 611 + if (ret) 612 + goto restore_freq; 613 + } 614 + 615 + return 0; 616 + 617 + restore_freq: 618 + if (clk_set_rate(clk, old_freq)) 619 + dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n", 620 + __func__, old_freq); 621 + restore_voltage: 622 + /* This shouldn't harm even if the voltages weren't updated earlier */ 623 + if (!IS_ERR(old_opp)) 624 + _set_opp_voltage(dev, reg, ou_volt, ou_volt_min, ou_volt_max); 625 + 626 + return ret; 627 + } 628 + EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate); 629 + 630 + /* OPP-dev Helpers */ 631 + static void _kfree_opp_dev_rcu(struct rcu_head *head) 632 + { 633 + struct opp_device *opp_dev; 634 + 635 + opp_dev = container_of(head, struct opp_device, rcu_head); 636 + kfree_rcu(opp_dev, rcu_head); 637 + } 638 + 639 + static void _remove_opp_dev(struct opp_device *opp_dev, 640 + struct opp_table *opp_table) 641 + { 642 + opp_debug_unregister(opp_dev, opp_table); 643 + list_del(&opp_dev->node); 644 + call_srcu(&opp_table->srcu_head.srcu, &opp_dev->rcu_head, 645 + _kfree_opp_dev_rcu); 646 + } 647 + 648 + struct opp_device *_add_opp_dev(const struct device *dev, 649 + struct opp_table *opp_table) 650 + { 651 + struct opp_device *opp_dev; 652 + int ret; 653 + 654 + opp_dev = kzalloc(sizeof(*opp_dev), GFP_KERNEL); 655 + if (!opp_dev) 559 656 return NULL; 560 657 561 - /* Initialize list-dev */ 562 - list_dev->dev = dev; 563 - list_add_rcu(&list_dev->node, &dev_opp->dev_list); 658 + /* Initialize opp-dev */ 659 + opp_dev->dev = dev; 660 + list_add_rcu(&opp_dev->node, &opp_table->dev_list); 564 661 565 - /* Create debugfs entries for the dev_opp */ 566 - ret = opp_debug_register(list_dev, dev_opp); 662 + /* Create debugfs entries for the opp_table */ 663 + ret = opp_debug_register(opp_dev, opp_table); 567 664 if (ret) 568 665 dev_err(dev, "%s: Failed to register opp debugfs (%d)\n", 569 666 __func__, ret); 570 667 571 - return list_dev; 668 + return opp_dev; 572 669 } 573 670 574 671 /** 575 - * _add_device_opp() - Find device OPP table or allocate a new one 672 + * _add_opp_table() - Find OPP table or allocate a new one 576 673 * @dev: device for which we do this operation 577 674 * 578 675 * It tries to find an existing table first, if it couldn't find one, it 579 676 * allocates a new OPP table and returns that. 580 677 * 581 - * Return: valid device_opp pointer if success, else NULL. 678 + * Return: valid opp_table pointer if success, else NULL. 582 679 */ 583 - static struct device_opp *_add_device_opp(struct device *dev) 680 + static struct opp_table *_add_opp_table(struct device *dev) 584 681 { 585 - struct device_opp *dev_opp; 586 - struct device_list_opp *list_dev; 682 + struct opp_table *opp_table; 683 + struct opp_device *opp_dev; 684 + struct device_node *np; 685 + int ret; 587 686 588 - /* Check for existing list for 'dev' first */ 589 - dev_opp = _find_device_opp(dev); 590 - if (!IS_ERR(dev_opp)) 591 - return dev_opp; 687 + /* Check for existing table for 'dev' first */ 688 + opp_table = _find_opp_table(dev); 689 + if (!IS_ERR(opp_table)) 690 + return opp_table; 592 691 593 692 /* 594 - * Allocate a new device OPP table. In the infrequent case where a new 693 + * Allocate a new OPP table. In the infrequent case where a new 595 694 * device is needed to be added, we pay this penalty. 596 695 */ 597 - dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL); 598 - if (!dev_opp) 696 + opp_table = kzalloc(sizeof(*opp_table), GFP_KERNEL); 697 + if (!opp_table) 599 698 return NULL; 600 699 601 - INIT_LIST_HEAD(&dev_opp->dev_list); 700 + INIT_LIST_HEAD(&opp_table->dev_list); 602 701 603 - list_dev = _add_list_dev(dev, dev_opp); 604 - if (!list_dev) { 605 - kfree(dev_opp); 702 + opp_dev = _add_opp_dev(dev, opp_table); 703 + if (!opp_dev) { 704 + kfree(opp_table); 606 705 return NULL; 607 706 } 608 707 609 - srcu_init_notifier_head(&dev_opp->srcu_head); 610 - INIT_LIST_HEAD(&dev_opp->opp_list); 708 + /* 709 + * Only required for backward compatibility with v1 bindings, but isn't 710 + * harmful for other cases. And so we do it unconditionally. 711 + */ 712 + np = of_node_get(dev->of_node); 713 + if (np) { 714 + u32 val; 611 715 612 - /* Secure the device list modification */ 613 - list_add_rcu(&dev_opp->node, &dev_opp_list); 614 - return dev_opp; 716 + if (!of_property_read_u32(np, "clock-latency", &val)) 717 + opp_table->clock_latency_ns_max = val; 718 + of_property_read_u32(np, "voltage-tolerance", 719 + &opp_table->voltage_tolerance_v1); 720 + of_node_put(np); 721 + } 722 + 723 + /* Set regulator to a non-NULL error value */ 724 + opp_table->regulator = ERR_PTR(-ENXIO); 725 + 726 + /* Find clk for the device */ 727 + opp_table->clk = clk_get(dev, NULL); 728 + if (IS_ERR(opp_table->clk)) { 729 + ret = PTR_ERR(opp_table->clk); 730 + if (ret != -EPROBE_DEFER) 731 + dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, 732 + ret); 733 + } 734 + 735 + srcu_init_notifier_head(&opp_table->srcu_head); 736 + INIT_LIST_HEAD(&opp_table->opp_list); 737 + 738 + /* Secure the device table modification */ 739 + list_add_rcu(&opp_table->node, &opp_tables); 740 + return opp_table; 615 741 } 616 742 617 743 /** 618 - * _kfree_device_rcu() - Free device_opp RCU handler 744 + * _kfree_device_rcu() - Free opp_table RCU handler 619 745 * @head: RCU head 620 746 */ 621 747 static void _kfree_device_rcu(struct rcu_head *head) 622 748 { 623 - struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head); 749 + struct opp_table *opp_table = container_of(head, struct opp_table, 750 + rcu_head); 624 751 625 - kfree_rcu(device_opp, rcu_head); 752 + kfree_rcu(opp_table, rcu_head); 626 753 } 627 754 628 755 /** 629 - * _remove_device_opp() - Removes a device OPP table 630 - * @dev_opp: device OPP table to be removed. 756 + * _remove_opp_table() - Removes a OPP table 757 + * @opp_table: OPP table to be removed. 631 758 * 632 - * Removes/frees device OPP table it it doesn't contain any OPPs. 759 + * Removes/frees OPP table if it doesn't contain any OPPs. 633 760 */ 634 - static void _remove_device_opp(struct device_opp *dev_opp) 761 + static void _remove_opp_table(struct opp_table *opp_table) 635 762 { 636 - struct device_list_opp *list_dev; 763 + struct opp_device *opp_dev; 637 764 638 - if (!list_empty(&dev_opp->opp_list)) 765 + if (!list_empty(&opp_table->opp_list)) 639 766 return; 640 767 641 - if (dev_opp->supported_hw) 768 + if (opp_table->supported_hw) 642 769 return; 643 770 644 - if (dev_opp->prop_name) 771 + if (opp_table->prop_name) 645 772 return; 646 773 647 - list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp, 648 - node); 774 + if (!IS_ERR(opp_table->regulator)) 775 + return; 649 776 650 - _remove_list_dev(list_dev, dev_opp); 777 + /* Release clk */ 778 + if (!IS_ERR(opp_table->clk)) 779 + clk_put(opp_table->clk); 780 + 781 + opp_dev = list_first_entry(&opp_table->dev_list, struct opp_device, 782 + node); 783 + 784 + _remove_opp_dev(opp_dev, opp_table); 651 785 652 786 /* dev_list must be empty now */ 653 - WARN_ON(!list_empty(&dev_opp->dev_list)); 787 + WARN_ON(!list_empty(&opp_table->dev_list)); 654 788 655 - list_del_rcu(&dev_opp->node); 656 - call_srcu(&dev_opp->srcu_head.srcu, &dev_opp->rcu_head, 789 + list_del_rcu(&opp_table->node); 790 + call_srcu(&opp_table->srcu_head.srcu, &opp_table->rcu_head, 657 791 _kfree_device_rcu); 658 792 } 659 793 ··· 883 591 884 592 /** 885 593 * _opp_remove() - Remove an OPP from a table definition 886 - * @dev_opp: points back to the device_opp struct this opp belongs to 594 + * @opp_table: points back to the opp_table struct this opp belongs to 887 595 * @opp: pointer to the OPP to remove 888 596 * @notify: OPP_EVENT_REMOVE notification should be sent or not 889 597 * 890 - * This function removes an opp definition from the opp list. 598 + * This function removes an opp definition from the opp table. 891 599 * 892 - * Locking: The internal device_opp and opp structures are RCU protected. 600 + * Locking: The internal opp_table and opp structures are RCU protected. 893 601 * It is assumed that the caller holds required mutex for an RCU updater 894 602 * strategy. 895 603 */ 896 - static void _opp_remove(struct device_opp *dev_opp, 604 + static void _opp_remove(struct opp_table *opp_table, 897 605 struct dev_pm_opp *opp, bool notify) 898 606 { 899 607 /* ··· 901 609 * frequency/voltage list. 902 610 */ 903 611 if (notify) 904 - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp); 612 + srcu_notifier_call_chain(&opp_table->srcu_head, 613 + OPP_EVENT_REMOVE, opp); 905 614 opp_debug_remove_one(opp); 906 615 list_del_rcu(&opp->node); 907 - call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); 616 + call_srcu(&opp_table->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); 908 617 909 - _remove_device_opp(dev_opp); 618 + _remove_opp_table(opp_table); 910 619 } 911 620 912 621 /** 913 - * dev_pm_opp_remove() - Remove an OPP from OPP list 622 + * dev_pm_opp_remove() - Remove an OPP from OPP table 914 623 * @dev: device for which we do this operation 915 624 * @freq: OPP to remove with matching 'freq' 916 625 * 917 - * This function removes an opp from the opp list. 626 + * This function removes an opp from the opp table. 918 627 * 919 - * Locking: The internal device_opp and opp structures are RCU protected. 628 + * Locking: The internal opp_table and opp structures are RCU protected. 920 629 * Hence this function internally uses RCU updater strategy with mutex locks 921 630 * to keep the integrity of the internal data structures. Callers should ensure 922 631 * that this function is *NOT* called under RCU protection or in contexts where ··· 926 633 void dev_pm_opp_remove(struct device *dev, unsigned long freq) 927 634 { 928 635 struct dev_pm_opp *opp; 929 - struct device_opp *dev_opp; 636 + struct opp_table *opp_table; 930 637 bool found = false; 931 638 932 - /* Hold our list modification lock here */ 933 - mutex_lock(&dev_opp_list_lock); 639 + /* Hold our table modification lock here */ 640 + mutex_lock(&opp_table_lock); 934 641 935 - dev_opp = _find_device_opp(dev); 936 - if (IS_ERR(dev_opp)) 642 + opp_table = _find_opp_table(dev); 643 + if (IS_ERR(opp_table)) 937 644 goto unlock; 938 645 939 - list_for_each_entry(opp, &dev_opp->opp_list, node) { 646 + list_for_each_entry(opp, &opp_table->opp_list, node) { 940 647 if (opp->rate == freq) { 941 648 found = true; 942 649 break; ··· 949 656 goto unlock; 950 657 } 951 658 952 - _opp_remove(dev_opp, opp, true); 659 + _opp_remove(opp_table, opp, true); 953 660 unlock: 954 - mutex_unlock(&dev_opp_list_lock); 661 + mutex_unlock(&opp_table_lock); 955 662 } 956 663 EXPORT_SYMBOL_GPL(dev_pm_opp_remove); 957 664 958 665 static struct dev_pm_opp *_allocate_opp(struct device *dev, 959 - struct device_opp **dev_opp) 666 + struct opp_table **opp_table) 960 667 { 961 668 struct dev_pm_opp *opp; 962 669 ··· 967 674 968 675 INIT_LIST_HEAD(&opp->node); 969 676 970 - *dev_opp = _add_device_opp(dev); 971 - if (!*dev_opp) { 677 + *opp_table = _add_opp_table(dev); 678 + if (!*opp_table) { 972 679 kfree(opp); 973 680 return NULL; 974 681 } ··· 976 683 return opp; 977 684 } 978 685 686 + static bool _opp_supported_by_regulators(struct dev_pm_opp *opp, 687 + struct opp_table *opp_table) 688 + { 689 + struct regulator *reg = opp_table->regulator; 690 + 691 + if (!IS_ERR(reg) && 692 + !regulator_is_supported_voltage(reg, opp->u_volt_min, 693 + opp->u_volt_max)) { 694 + pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n", 695 + __func__, opp->u_volt_min, opp->u_volt_max); 696 + return false; 697 + } 698 + 699 + return true; 700 + } 701 + 979 702 static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, 980 - struct device_opp *dev_opp) 703 + struct opp_table *opp_table) 981 704 { 982 705 struct dev_pm_opp *opp; 983 - struct list_head *head = &dev_opp->opp_list; 706 + struct list_head *head = &opp_table->opp_list; 984 707 int ret; 985 708 986 709 /* 987 710 * Insert new OPP in order of increasing frequency and discard if 988 711 * already present. 989 712 * 990 - * Need to use &dev_opp->opp_list in the condition part of the 'for' 713 + * Need to use &opp_table->opp_list in the condition part of the 'for' 991 714 * loop, don't replace it with head otherwise it will become an infinite 992 715 * loop. 993 716 */ 994 - list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) { 717 + list_for_each_entry_rcu(opp, &opp_table->opp_list, node) { 995 718 if (new_opp->rate > opp->rate) { 996 719 head = &opp->node; 997 720 continue; ··· 1025 716 0 : -EEXIST; 1026 717 } 1027 718 1028 - new_opp->dev_opp = dev_opp; 719 + new_opp->opp_table = opp_table; 1029 720 list_add_rcu(&new_opp->node, head); 1030 721 1031 - ret = opp_debug_create_one(new_opp, dev_opp); 722 + ret = opp_debug_create_one(new_opp, opp_table); 1032 723 if (ret) 1033 724 dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n", 1034 725 __func__, ret); 726 + 727 + if (!_opp_supported_by_regulators(new_opp, opp_table)) { 728 + new_opp->available = false; 729 + dev_warn(dev, "%s: OPP not supported by regulators (%lu)\n", 730 + __func__, new_opp->rate); 731 + } 1035 732 1036 733 return 0; 1037 734 } ··· 1049 734 * @u_volt: Voltage in uVolts for this OPP 1050 735 * @dynamic: Dynamically added OPPs. 1051 736 * 1052 - * This function adds an opp definition to the opp list and returns status. 737 + * This function adds an opp definition to the opp table and returns status. 1053 738 * The opp is made available by default and it can be controlled using 1054 739 * dev_pm_opp_enable/disable functions and may be removed by dev_pm_opp_remove. 1055 740 * 1056 741 * NOTE: "dynamic" parameter impacts OPPs added by the dev_pm_opp_of_add_table 1057 742 * and freed by dev_pm_opp_of_remove_table. 1058 743 * 1059 - * Locking: The internal device_opp and opp structures are RCU protected. 744 + * Locking: The internal opp_table and opp structures are RCU protected. 1060 745 * Hence this function internally uses RCU updater strategy with mutex locks 1061 746 * to keep the integrity of the internal data structures. Callers should ensure 1062 747 * that this function is *NOT* called under RCU protection or in contexts where ··· 1072 757 static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, 1073 758 bool dynamic) 1074 759 { 1075 - struct device_opp *dev_opp; 760 + struct opp_table *opp_table; 1076 761 struct dev_pm_opp *new_opp; 762 + unsigned long tol; 1077 763 int ret; 1078 764 1079 - /* Hold our list modification lock here */ 1080 - mutex_lock(&dev_opp_list_lock); 765 + /* Hold our table modification lock here */ 766 + mutex_lock(&opp_table_lock); 1081 767 1082 - new_opp = _allocate_opp(dev, &dev_opp); 768 + new_opp = _allocate_opp(dev, &opp_table); 1083 769 if (!new_opp) { 1084 770 ret = -ENOMEM; 1085 771 goto unlock; ··· 1088 772 1089 773 /* populate the opp table */ 1090 774 new_opp->rate = freq; 775 + tol = u_volt * opp_table->voltage_tolerance_v1 / 100; 1091 776 new_opp->u_volt = u_volt; 777 + new_opp->u_volt_min = u_volt - tol; 778 + new_opp->u_volt_max = u_volt + tol; 1092 779 new_opp->available = true; 1093 780 new_opp->dynamic = dynamic; 1094 781 1095 - ret = _opp_add(dev, new_opp, dev_opp); 782 + ret = _opp_add(dev, new_opp, opp_table); 1096 783 if (ret) 1097 784 goto free_opp; 1098 785 1099 - mutex_unlock(&dev_opp_list_lock); 786 + mutex_unlock(&opp_table_lock); 1100 787 1101 788 /* 1102 789 * Notify the changes in the availability of the operable 1103 790 * frequency/voltage list. 1104 791 */ 1105 - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); 792 + srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp); 1106 793 return 0; 1107 794 1108 795 free_opp: 1109 - _opp_remove(dev_opp, new_opp, false); 796 + _opp_remove(opp_table, new_opp, false); 1110 797 unlock: 1111 - mutex_unlock(&dev_opp_list_lock); 798 + mutex_unlock(&opp_table_lock); 1112 799 return ret; 1113 800 } 1114 801 1115 802 /* TODO: Support multiple regulators */ 1116 803 static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, 1117 - struct device_opp *dev_opp) 804 + struct opp_table *opp_table) 1118 805 { 1119 806 u32 microvolt[3] = {0}; 1120 807 u32 val; ··· 1126 807 char name[NAME_MAX]; 1127 808 1128 809 /* Search for "opp-microvolt-<name>" */ 1129 - if (dev_opp->prop_name) { 810 + if (opp_table->prop_name) { 1130 811 snprintf(name, sizeof(name), "opp-microvolt-%s", 1131 - dev_opp->prop_name); 812 + opp_table->prop_name); 1132 813 prop = of_find_property(opp->np, name, NULL); 1133 814 } 1134 815 ··· 1163 844 } 1164 845 1165 846 opp->u_volt = microvolt[0]; 1166 - opp->u_volt_min = microvolt[1]; 1167 - opp->u_volt_max = microvolt[2]; 847 + 848 + if (count == 1) { 849 + opp->u_volt_min = opp->u_volt; 850 + opp->u_volt_max = opp->u_volt; 851 + } else { 852 + opp->u_volt_min = microvolt[1]; 853 + opp->u_volt_max = microvolt[2]; 854 + } 1168 855 1169 856 /* Search for "opp-microamp-<name>" */ 1170 857 prop = NULL; 1171 - if (dev_opp->prop_name) { 858 + if (opp_table->prop_name) { 1172 859 snprintf(name, sizeof(name), "opp-microamp-%s", 1173 - dev_opp->prop_name); 860 + opp_table->prop_name); 1174 861 prop = of_find_property(opp->np, name, NULL); 1175 862 } 1176 863 ··· 1203 878 * OPPs, which are available for those versions, based on its 'opp-supported-hw' 1204 879 * property. 1205 880 * 1206 - * Locking: The internal device_opp and opp structures are RCU protected. 881 + * Locking: The internal opp_table and opp structures are RCU protected. 1207 882 * Hence this function internally uses RCU updater strategy with mutex locks 1208 883 * to keep the integrity of the internal data structures. Callers should ensure 1209 884 * that this function is *NOT* called under RCU protection or in contexts where ··· 1212 887 int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, 1213 888 unsigned int count) 1214 889 { 1215 - struct device_opp *dev_opp; 890 + struct opp_table *opp_table; 1216 891 int ret = 0; 1217 892 1218 - /* Hold our list modification lock here */ 1219 - mutex_lock(&dev_opp_list_lock); 893 + /* Hold our table modification lock here */ 894 + mutex_lock(&opp_table_lock); 1220 895 1221 - dev_opp = _add_device_opp(dev); 1222 - if (!dev_opp) { 896 + opp_table = _add_opp_table(dev); 897 + if (!opp_table) { 1223 898 ret = -ENOMEM; 1224 899 goto unlock; 1225 900 } 1226 901 1227 - /* Make sure there are no concurrent readers while updating dev_opp */ 1228 - WARN_ON(!list_empty(&dev_opp->opp_list)); 902 + /* Make sure there are no concurrent readers while updating opp_table */ 903 + WARN_ON(!list_empty(&opp_table->opp_list)); 1229 904 1230 - /* Do we already have a version hierarchy associated with dev_opp? */ 1231 - if (dev_opp->supported_hw) { 905 + /* Do we already have a version hierarchy associated with opp_table? */ 906 + if (opp_table->supported_hw) { 1232 907 dev_err(dev, "%s: Already have supported hardware list\n", 1233 908 __func__); 1234 909 ret = -EBUSY; 1235 910 goto err; 1236 911 } 1237 912 1238 - dev_opp->supported_hw = kmemdup(versions, count * sizeof(*versions), 913 + opp_table->supported_hw = kmemdup(versions, count * sizeof(*versions), 1239 914 GFP_KERNEL); 1240 - if (!dev_opp->supported_hw) { 915 + if (!opp_table->supported_hw) { 1241 916 ret = -ENOMEM; 1242 917 goto err; 1243 918 } 1244 919 1245 - dev_opp->supported_hw_count = count; 1246 - mutex_unlock(&dev_opp_list_lock); 920 + opp_table->supported_hw_count = count; 921 + mutex_unlock(&opp_table_lock); 1247 922 return 0; 1248 923 1249 924 err: 1250 - _remove_device_opp(dev_opp); 925 + _remove_opp_table(opp_table); 1251 926 unlock: 1252 - mutex_unlock(&dev_opp_list_lock); 927 + mutex_unlock(&opp_table_lock); 1253 928 1254 929 return ret; 1255 930 } ··· 1257 932 1258 933 /** 1259 934 * dev_pm_opp_put_supported_hw() - Releases resources blocked for supported hw 1260 - * @dev: Device for which supported-hw has to be set. 935 + * @dev: Device for which supported-hw has to be put. 1261 936 * 1262 937 * This is required only for the V2 bindings, and is called for a matching 1263 - * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure 938 + * dev_pm_opp_set_supported_hw(). Until this is called, the opp_table structure 1264 939 * will not be freed. 1265 940 * 1266 - * Locking: The internal device_opp and opp structures are RCU protected. 941 + * Locking: The internal opp_table and opp structures are RCU protected. 1267 942 * Hence this function internally uses RCU updater strategy with mutex locks 1268 943 * to keep the integrity of the internal data structures. Callers should ensure 1269 944 * that this function is *NOT* called under RCU protection or in contexts where ··· 1271 946 */ 1272 947 void dev_pm_opp_put_supported_hw(struct device *dev) 1273 948 { 1274 - struct device_opp *dev_opp; 949 + struct opp_table *opp_table; 1275 950 1276 - /* Hold our list modification lock here */ 1277 - mutex_lock(&dev_opp_list_lock); 951 + /* Hold our table modification lock here */ 952 + mutex_lock(&opp_table_lock); 1278 953 1279 - /* Check for existing list for 'dev' first */ 1280 - dev_opp = _find_device_opp(dev); 1281 - if (IS_ERR(dev_opp)) { 1282 - dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); 954 + /* Check for existing table for 'dev' first */ 955 + opp_table = _find_opp_table(dev); 956 + if (IS_ERR(opp_table)) { 957 + dev_err(dev, "Failed to find opp_table: %ld\n", 958 + PTR_ERR(opp_table)); 1283 959 goto unlock; 1284 960 } 1285 961 1286 - /* Make sure there are no concurrent readers while updating dev_opp */ 1287 - WARN_ON(!list_empty(&dev_opp->opp_list)); 962 + /* Make sure there are no concurrent readers while updating opp_table */ 963 + WARN_ON(!list_empty(&opp_table->opp_list)); 1288 964 1289 - if (!dev_opp->supported_hw) { 965 + if (!opp_table->supported_hw) { 1290 966 dev_err(dev, "%s: Doesn't have supported hardware list\n", 1291 967 __func__); 1292 968 goto unlock; 1293 969 } 1294 970 1295 - kfree(dev_opp->supported_hw); 1296 - dev_opp->supported_hw = NULL; 1297 - dev_opp->supported_hw_count = 0; 971 + kfree(opp_table->supported_hw); 972 + opp_table->supported_hw = NULL; 973 + opp_table->supported_hw_count = 0; 1298 974 1299 - /* Try freeing device_opp if this was the last blocking resource */ 1300 - _remove_device_opp(dev_opp); 975 + /* Try freeing opp_table if this was the last blocking resource */ 976 + _remove_opp_table(opp_table); 1301 977 1302 978 unlock: 1303 - mutex_unlock(&dev_opp_list_lock); 979 + mutex_unlock(&opp_table_lock); 1304 980 } 1305 981 EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); 1306 982 1307 983 /** 1308 984 * dev_pm_opp_set_prop_name() - Set prop-extn name 1309 - * @dev: Device for which the regulator has to be set. 985 + * @dev: Device for which the prop-name has to be set. 1310 986 * @name: name to postfix to properties. 1311 987 * 1312 988 * This is required only for the V2 bindings, and it enables a platform to ··· 1315 989 * which the extension will apply are opp-microvolt and opp-microamp. OPP core 1316 990 * should postfix the property name with -<name> while looking for them. 1317 991 * 1318 - * Locking: The internal device_opp and opp structures are RCU protected. 992 + * Locking: The internal opp_table and opp structures are RCU protected. 1319 993 * Hence this function internally uses RCU updater strategy with mutex locks 1320 994 * to keep the integrity of the internal data structures. Callers should ensure 1321 995 * that this function is *NOT* called under RCU protection or in contexts where ··· 1323 997 */ 1324 998 int dev_pm_opp_set_prop_name(struct device *dev, const char *name) 1325 999 { 1326 - struct device_opp *dev_opp; 1000 + struct opp_table *opp_table; 1327 1001 int ret = 0; 1328 1002 1329 - /* Hold our list modification lock here */ 1330 - mutex_lock(&dev_opp_list_lock); 1003 + /* Hold our table modification lock here */ 1004 + mutex_lock(&opp_table_lock); 1331 1005 1332 - dev_opp = _add_device_opp(dev); 1333 - if (!dev_opp) { 1006 + opp_table = _add_opp_table(dev); 1007 + if (!opp_table) { 1334 1008 ret = -ENOMEM; 1335 1009 goto unlock; 1336 1010 } 1337 1011 1338 - /* Make sure there are no concurrent readers while updating dev_opp */ 1339 - WARN_ON(!list_empty(&dev_opp->opp_list)); 1012 + /* Make sure there are no concurrent readers while updating opp_table */ 1013 + WARN_ON(!list_empty(&opp_table->opp_list)); 1340 1014 1341 - /* Do we already have a prop-name associated with dev_opp? */ 1342 - if (dev_opp->prop_name) { 1015 + /* Do we already have a prop-name associated with opp_table? */ 1016 + if (opp_table->prop_name) { 1343 1017 dev_err(dev, "%s: Already have prop-name %s\n", __func__, 1344 - dev_opp->prop_name); 1018 + opp_table->prop_name); 1345 1019 ret = -EBUSY; 1346 1020 goto err; 1347 1021 } 1348 1022 1349 - dev_opp->prop_name = kstrdup(name, GFP_KERNEL); 1350 - if (!dev_opp->prop_name) { 1023 + opp_table->prop_name = kstrdup(name, GFP_KERNEL); 1024 + if (!opp_table->prop_name) { 1351 1025 ret = -ENOMEM; 1352 1026 goto err; 1353 1027 } 1354 1028 1355 - mutex_unlock(&dev_opp_list_lock); 1029 + mutex_unlock(&opp_table_lock); 1356 1030 return 0; 1357 1031 1358 1032 err: 1359 - _remove_device_opp(dev_opp); 1033 + _remove_opp_table(opp_table); 1360 1034 unlock: 1361 - mutex_unlock(&dev_opp_list_lock); 1035 + mutex_unlock(&opp_table_lock); 1362 1036 1363 1037 return ret; 1364 1038 } ··· 1366 1040 1367 1041 /** 1368 1042 * dev_pm_opp_put_prop_name() - Releases resources blocked for prop-name 1369 - * @dev: Device for which the regulator has to be set. 1043 + * @dev: Device for which the prop-name has to be put. 1370 1044 * 1371 1045 * This is required only for the V2 bindings, and is called for a matching 1372 - * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure 1046 + * dev_pm_opp_set_prop_name(). Until this is called, the opp_table structure 1373 1047 * will not be freed. 1374 1048 * 1375 - * Locking: The internal device_opp and opp structures are RCU protected. 1049 + * Locking: The internal opp_table and opp structures are RCU protected. 1376 1050 * Hence this function internally uses RCU updater strategy with mutex locks 1377 1051 * to keep the integrity of the internal data structures. Callers should ensure 1378 1052 * that this function is *NOT* called under RCU protection or in contexts where ··· 1380 1054 */ 1381 1055 void dev_pm_opp_put_prop_name(struct device *dev) 1382 1056 { 1383 - struct device_opp *dev_opp; 1057 + struct opp_table *opp_table; 1384 1058 1385 - /* Hold our list modification lock here */ 1386 - mutex_lock(&dev_opp_list_lock); 1059 + /* Hold our table modification lock here */ 1060 + mutex_lock(&opp_table_lock); 1387 1061 1388 - /* Check for existing list for 'dev' first */ 1389 - dev_opp = _find_device_opp(dev); 1390 - if (IS_ERR(dev_opp)) { 1391 - dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); 1062 + /* Check for existing table for 'dev' first */ 1063 + opp_table = _find_opp_table(dev); 1064 + if (IS_ERR(opp_table)) { 1065 + dev_err(dev, "Failed to find opp_table: %ld\n", 1066 + PTR_ERR(opp_table)); 1392 1067 goto unlock; 1393 1068 } 1394 1069 1395 - /* Make sure there are no concurrent readers while updating dev_opp */ 1396 - WARN_ON(!list_empty(&dev_opp->opp_list)); 1070 + /* Make sure there are no concurrent readers while updating opp_table */ 1071 + WARN_ON(!list_empty(&opp_table->opp_list)); 1397 1072 1398 - if (!dev_opp->prop_name) { 1073 + if (!opp_table->prop_name) { 1399 1074 dev_err(dev, "%s: Doesn't have a prop-name\n", __func__); 1400 1075 goto unlock; 1401 1076 } 1402 1077 1403 - kfree(dev_opp->prop_name); 1404 - dev_opp->prop_name = NULL; 1078 + kfree(opp_table->prop_name); 1079 + opp_table->prop_name = NULL; 1405 1080 1406 - /* Try freeing device_opp if this was the last blocking resource */ 1407 - _remove_device_opp(dev_opp); 1081 + /* Try freeing opp_table if this was the last blocking resource */ 1082 + _remove_opp_table(opp_table); 1408 1083 1409 1084 unlock: 1410 - mutex_unlock(&dev_opp_list_lock); 1085 + mutex_unlock(&opp_table_lock); 1411 1086 } 1412 1087 EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); 1413 1088 1414 - static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, 1089 + /** 1090 + * dev_pm_opp_set_regulator() - Set regulator name for the device 1091 + * @dev: Device for which regulator name is being set. 1092 + * @name: Name of the regulator. 1093 + * 1094 + * In order to support OPP switching, OPP layer needs to know the name of the 1095 + * device's regulator, as the core would be required to switch voltages as well. 1096 + * 1097 + * This must be called before any OPPs are initialized for the device. 1098 + * 1099 + * Locking: The internal opp_table and opp structures are RCU protected. 1100 + * Hence this function internally uses RCU updater strategy with mutex locks 1101 + * to keep the integrity of the internal data structures. Callers should ensure 1102 + * that this function is *NOT* called under RCU protection or in contexts where 1103 + * mutex cannot be locked. 1104 + */ 1105 + int dev_pm_opp_set_regulator(struct device *dev, const char *name) 1106 + { 1107 + struct opp_table *opp_table; 1108 + struct regulator *reg; 1109 + int ret; 1110 + 1111 + mutex_lock(&opp_table_lock); 1112 + 1113 + opp_table = _add_opp_table(dev); 1114 + if (!opp_table) { 1115 + ret = -ENOMEM; 1116 + goto unlock; 1117 + } 1118 + 1119 + /* This should be called before OPPs are initialized */ 1120 + if (WARN_ON(!list_empty(&opp_table->opp_list))) { 1121 + ret = -EBUSY; 1122 + goto err; 1123 + } 1124 + 1125 + /* Already have a regulator set */ 1126 + if (WARN_ON(!IS_ERR(opp_table->regulator))) { 1127 + ret = -EBUSY; 1128 + goto err; 1129 + } 1130 + /* Allocate the regulator */ 1131 + reg = regulator_get_optional(dev, name); 1132 + if (IS_ERR(reg)) { 1133 + ret = PTR_ERR(reg); 1134 + if (ret != -EPROBE_DEFER) 1135 + dev_err(dev, "%s: no regulator (%s) found: %d\n", 1136 + __func__, name, ret); 1137 + goto err; 1138 + } 1139 + 1140 + opp_table->regulator = reg; 1141 + 1142 + mutex_unlock(&opp_table_lock); 1143 + return 0; 1144 + 1145 + err: 1146 + _remove_opp_table(opp_table); 1147 + unlock: 1148 + mutex_unlock(&opp_table_lock); 1149 + 1150 + return ret; 1151 + } 1152 + EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator); 1153 + 1154 + /** 1155 + * dev_pm_opp_put_regulator() - Releases resources blocked for regulator 1156 + * @dev: Device for which regulator was set. 1157 + * 1158 + * Locking: The internal opp_table and opp structures are RCU protected. 1159 + * Hence this function internally uses RCU updater strategy with mutex locks 1160 + * to keep the integrity of the internal data structures. Callers should ensure 1161 + * that this function is *NOT* called under RCU protection or in contexts where 1162 + * mutex cannot be locked. 1163 + */ 1164 + void dev_pm_opp_put_regulator(struct device *dev) 1165 + { 1166 + struct opp_table *opp_table; 1167 + 1168 + mutex_lock(&opp_table_lock); 1169 + 1170 + /* Check for existing table for 'dev' first */ 1171 + opp_table = _find_opp_table(dev); 1172 + if (IS_ERR(opp_table)) { 1173 + dev_err(dev, "Failed to find opp_table: %ld\n", 1174 + PTR_ERR(opp_table)); 1175 + goto unlock; 1176 + } 1177 + 1178 + if (IS_ERR(opp_table->regulator)) { 1179 + dev_err(dev, "%s: Doesn't have regulator set\n", __func__); 1180 + goto unlock; 1181 + } 1182 + 1183 + /* Make sure there are no concurrent readers while updating opp_table */ 1184 + WARN_ON(!list_empty(&opp_table->opp_list)); 1185 + 1186 + regulator_put(opp_table->regulator); 1187 + opp_table->regulator = ERR_PTR(-ENXIO); 1188 + 1189 + /* Try freeing opp_table if this was the last blocking resource */ 1190 + _remove_opp_table(opp_table); 1191 + 1192 + unlock: 1193 + mutex_unlock(&opp_table_lock); 1194 + } 1195 + EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator); 1196 + 1197 + static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, 1415 1198 struct device_node *np) 1416 1199 { 1417 - unsigned int count = dev_opp->supported_hw_count; 1200 + unsigned int count = opp_table->supported_hw_count; 1418 1201 u32 version; 1419 1202 int ret; 1420 1203 1421 - if (!dev_opp->supported_hw) 1204 + if (!opp_table->supported_hw) 1422 1205 return true; 1423 1206 1424 1207 while (count--) { ··· 1540 1105 } 1541 1106 1542 1107 /* Both of these are bitwise masks of the versions */ 1543 - if (!(version & dev_opp->supported_hw[count])) 1108 + if (!(version & opp_table->supported_hw[count])) 1544 1109 return false; 1545 1110 } 1546 1111 ··· 1552 1117 * @dev: device for which we do this operation 1553 1118 * @np: device node 1554 1119 * 1555 - * This function adds an opp definition to the opp list and returns status. The 1120 + * This function adds an opp definition to the opp table and returns status. The 1556 1121 * opp can be controlled using dev_pm_opp_enable/disable functions and may be 1557 1122 * removed by dev_pm_opp_remove. 1558 1123 * 1559 - * Locking: The internal device_opp and opp structures are RCU protected. 1124 + * Locking: The internal opp_table and opp structures are RCU protected. 1560 1125 * Hence this function internally uses RCU updater strategy with mutex locks 1561 1126 * to keep the integrity of the internal data structures. Callers should ensure 1562 1127 * that this function is *NOT* called under RCU protection or in contexts where ··· 1572 1137 */ 1573 1138 static int _opp_add_static_v2(struct device *dev, struct device_node *np) 1574 1139 { 1575 - struct device_opp *dev_opp; 1140 + struct opp_table *opp_table; 1576 1141 struct dev_pm_opp *new_opp; 1577 1142 u64 rate; 1578 1143 u32 val; 1579 1144 int ret; 1580 1145 1581 - /* Hold our list modification lock here */ 1582 - mutex_lock(&dev_opp_list_lock); 1146 + /* Hold our table modification lock here */ 1147 + mutex_lock(&opp_table_lock); 1583 1148 1584 - new_opp = _allocate_opp(dev, &dev_opp); 1149 + new_opp = _allocate_opp(dev, &opp_table); 1585 1150 if (!new_opp) { 1586 1151 ret = -ENOMEM; 1587 1152 goto unlock; ··· 1594 1159 } 1595 1160 1596 1161 /* Check if the OPP supports hardware's hierarchy of versions or not */ 1597 - if (!_opp_is_supported(dev, dev_opp, np)) { 1162 + if (!_opp_is_supported(dev, opp_table, np)) { 1598 1163 dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate); 1599 1164 goto free_opp; 1600 1165 } ··· 1614 1179 if (!of_property_read_u32(np, "clock-latency-ns", &val)) 1615 1180 new_opp->clock_latency_ns = val; 1616 1181 1617 - ret = opp_parse_supplies(new_opp, dev, dev_opp); 1182 + ret = opp_parse_supplies(new_opp, dev, opp_table); 1618 1183 if (ret) 1619 1184 goto free_opp; 1620 1185 1621 - ret = _opp_add(dev, new_opp, dev_opp); 1186 + ret = _opp_add(dev, new_opp, opp_table); 1622 1187 if (ret) 1623 1188 goto free_opp; 1624 1189 1625 1190 /* OPP to select on device suspend */ 1626 1191 if (of_property_read_bool(np, "opp-suspend")) { 1627 - if (dev_opp->suspend_opp) { 1192 + if (opp_table->suspend_opp) { 1628 1193 dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n", 1629 - __func__, dev_opp->suspend_opp->rate, 1194 + __func__, opp_table->suspend_opp->rate, 1630 1195 new_opp->rate); 1631 1196 } else { 1632 1197 new_opp->suspend = true; 1633 - dev_opp->suspend_opp = new_opp; 1198 + opp_table->suspend_opp = new_opp; 1634 1199 } 1635 1200 } 1636 1201 1637 - if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max) 1638 - dev_opp->clock_latency_ns_max = new_opp->clock_latency_ns; 1202 + if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max) 1203 + opp_table->clock_latency_ns_max = new_opp->clock_latency_ns; 1639 1204 1640 - mutex_unlock(&dev_opp_list_lock); 1205 + mutex_unlock(&opp_table_lock); 1641 1206 1642 1207 pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n", 1643 1208 __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt, ··· 1648 1213 * Notify the changes in the availability of the operable 1649 1214 * frequency/voltage list. 1650 1215 */ 1651 - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); 1216 + srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp); 1652 1217 return 0; 1653 1218 1654 1219 free_opp: 1655 - _opp_remove(dev_opp, new_opp, false); 1220 + _opp_remove(opp_table, new_opp, false); 1656 1221 unlock: 1657 - mutex_unlock(&dev_opp_list_lock); 1222 + mutex_unlock(&opp_table_lock); 1658 1223 return ret; 1659 1224 } 1660 1225 ··· 1664 1229 * @freq: Frequency in Hz for this OPP 1665 1230 * @u_volt: Voltage in uVolts for this OPP 1666 1231 * 1667 - * This function adds an opp definition to the opp list and returns status. 1232 + * This function adds an opp definition to the opp table and returns status. 1668 1233 * The opp is made available by default and it can be controlled using 1669 1234 * dev_pm_opp_enable/disable functions. 1670 1235 * 1671 - * Locking: The internal device_opp and opp structures are RCU protected. 1236 + * Locking: The internal opp_table and opp structures are RCU protected. 1672 1237 * Hence this function internally uses RCU updater strategy with mutex locks 1673 1238 * to keep the integrity of the internal data structures. Callers should ensure 1674 1239 * that this function is *NOT* called under RCU protection or in contexts where ··· 1700 1265 * copy operation, returns 0 if no modification was done OR modification was 1701 1266 * successful. 1702 1267 * 1703 - * Locking: The internal device_opp and opp structures are RCU protected. 1268 + * Locking: The internal opp_table and opp structures are RCU protected. 1704 1269 * Hence this function internally uses RCU updater strategy with mutex locks to 1705 1270 * keep the integrity of the internal data structures. Callers should ensure 1706 1271 * that this function is *NOT* called under RCU protection or in contexts where ··· 1709 1274 static int _opp_set_availability(struct device *dev, unsigned long freq, 1710 1275 bool availability_req) 1711 1276 { 1712 - struct device_opp *dev_opp; 1277 + struct opp_table *opp_table; 1713 1278 struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV); 1714 1279 int r = 0; 1715 1280 ··· 1718 1283 if (!new_opp) 1719 1284 return -ENOMEM; 1720 1285 1721 - mutex_lock(&dev_opp_list_lock); 1286 + mutex_lock(&opp_table_lock); 1722 1287 1723 - /* Find the device_opp */ 1724 - dev_opp = _find_device_opp(dev); 1725 - if (IS_ERR(dev_opp)) { 1726 - r = PTR_ERR(dev_opp); 1288 + /* Find the opp_table */ 1289 + opp_table = _find_opp_table(dev); 1290 + if (IS_ERR(opp_table)) { 1291 + r = PTR_ERR(opp_table); 1727 1292 dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r); 1728 1293 goto unlock; 1729 1294 } 1730 1295 1731 1296 /* Do we have the frequency? */ 1732 - list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) { 1297 + list_for_each_entry(tmp_opp, &opp_table->opp_list, node) { 1733 1298 if (tmp_opp->rate == freq) { 1734 1299 opp = tmp_opp; 1735 1300 break; ··· 1750 1315 new_opp->available = availability_req; 1751 1316 1752 1317 list_replace_rcu(&opp->node, &new_opp->node); 1753 - mutex_unlock(&dev_opp_list_lock); 1754 - call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); 1318 + mutex_unlock(&opp_table_lock); 1319 + call_srcu(&opp_table->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); 1755 1320 1756 1321 /* Notify the change of the OPP availability */ 1757 1322 if (availability_req) 1758 - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ENABLE, 1759 - new_opp); 1323 + srcu_notifier_call_chain(&opp_table->srcu_head, 1324 + OPP_EVENT_ENABLE, new_opp); 1760 1325 else 1761 - srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_DISABLE, 1762 - new_opp); 1326 + srcu_notifier_call_chain(&opp_table->srcu_head, 1327 + OPP_EVENT_DISABLE, new_opp); 1763 1328 1764 1329 return 0; 1765 1330 1766 1331 unlock: 1767 - mutex_unlock(&dev_opp_list_lock); 1332 + mutex_unlock(&opp_table_lock); 1768 1333 kfree(new_opp); 1769 1334 return r; 1770 1335 } ··· 1778 1343 * corresponding error value. It is meant to be used for users an OPP available 1779 1344 * after being temporarily made unavailable with dev_pm_opp_disable. 1780 1345 * 1781 - * Locking: The internal device_opp and opp structures are RCU protected. 1346 + * Locking: The internal opp_table and opp structures are RCU protected. 1782 1347 * Hence this function indirectly uses RCU and mutex locks to keep the 1783 1348 * integrity of the internal data structures. Callers should ensure that 1784 1349 * this function is *NOT* called under RCU protection or in contexts where ··· 1804 1369 * control by users to make this OPP not available until the circumstances are 1805 1370 * right to make it available again (with a call to dev_pm_opp_enable). 1806 1371 * 1807 - * Locking: The internal device_opp and opp structures are RCU protected. 1372 + * Locking: The internal opp_table and opp structures are RCU protected. 1808 1373 * Hence this function indirectly uses RCU and mutex locks to keep the 1809 1374 * integrity of the internal data structures. Callers should ensure that 1810 1375 * this function is *NOT* called under RCU protection or in contexts where ··· 1822 1387 1823 1388 /** 1824 1389 * dev_pm_opp_get_notifier() - find notifier_head of the device with opp 1825 - * @dev: device pointer used to lookup device OPPs. 1390 + * @dev: device pointer used to lookup OPP table. 1826 1391 * 1827 1392 * Return: pointer to notifier head if found, otherwise -ENODEV or 1828 1393 * -EINVAL based on type of error casted as pointer. value must be checked 1829 1394 * with IS_ERR to determine valid pointer or error result. 1830 1395 * 1831 - * Locking: This function must be called under rcu_read_lock(). dev_opp is a RCU 1832 - * protected pointer. The reason for the same is that the opp pointer which is 1833 - * returned will remain valid for use with opp_get_{voltage, freq} only while 1396 + * Locking: This function must be called under rcu_read_lock(). opp_table is a 1397 + * RCU protected pointer. The reason for the same is that the opp pointer which 1398 + * is returned will remain valid for use with opp_get_{voltage, freq} only while 1834 1399 * under the locked area. The pointer returned must be used prior to unlocking 1835 1400 * with rcu_read_unlock() to maintain the integrity of the pointer. 1836 1401 */ 1837 1402 struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev) 1838 1403 { 1839 - struct device_opp *dev_opp = _find_device_opp(dev); 1404 + struct opp_table *opp_table = _find_opp_table(dev); 1840 1405 1841 - if (IS_ERR(dev_opp)) 1842 - return ERR_CAST(dev_opp); /* matching type */ 1406 + if (IS_ERR(opp_table)) 1407 + return ERR_CAST(opp_table); /* matching type */ 1843 1408 1844 - return &dev_opp->srcu_head; 1409 + return &opp_table->srcu_head; 1845 1410 } 1846 1411 EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier); 1847 1412 ··· 1849 1414 /** 1850 1415 * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT 1851 1416 * entries 1852 - * @dev: device pointer used to lookup device OPPs. 1417 + * @dev: device pointer used to lookup OPP table. 1853 1418 * 1854 1419 * Free OPPs created using static entries present in DT. 1855 1420 * 1856 - * Locking: The internal device_opp and opp structures are RCU protected. 1421 + * Locking: The internal opp_table and opp structures are RCU protected. 1857 1422 * Hence this function indirectly uses RCU updater strategy with mutex locks 1858 1423 * to keep the integrity of the internal data structures. Callers should ensure 1859 1424 * that this function is *NOT* called under RCU protection or in contexts where ··· 1861 1426 */ 1862 1427 void dev_pm_opp_of_remove_table(struct device *dev) 1863 1428 { 1864 - struct device_opp *dev_opp; 1429 + struct opp_table *opp_table; 1865 1430 struct dev_pm_opp *opp, *tmp; 1866 1431 1867 - /* Hold our list modification lock here */ 1868 - mutex_lock(&dev_opp_list_lock); 1432 + /* Hold our table modification lock here */ 1433 + mutex_lock(&opp_table_lock); 1869 1434 1870 - /* Check for existing list for 'dev' */ 1871 - dev_opp = _find_device_opp(dev); 1872 - if (IS_ERR(dev_opp)) { 1873 - int error = PTR_ERR(dev_opp); 1435 + /* Check for existing table for 'dev' */ 1436 + opp_table = _find_opp_table(dev); 1437 + if (IS_ERR(opp_table)) { 1438 + int error = PTR_ERR(opp_table); 1874 1439 1875 1440 if (error != -ENODEV) 1876 - WARN(1, "%s: dev_opp: %d\n", 1441 + WARN(1, "%s: opp_table: %d\n", 1877 1442 IS_ERR_OR_NULL(dev) ? 1878 1443 "Invalid device" : dev_name(dev), 1879 1444 error); 1880 1445 goto unlock; 1881 1446 } 1882 1447 1883 - /* Find if dev_opp manages a single device */ 1884 - if (list_is_singular(&dev_opp->dev_list)) { 1448 + /* Find if opp_table manages a single device */ 1449 + if (list_is_singular(&opp_table->dev_list)) { 1885 1450 /* Free static OPPs */ 1886 - list_for_each_entry_safe(opp, tmp, &dev_opp->opp_list, node) { 1451 + list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) { 1887 1452 if (!opp->dynamic) 1888 - _opp_remove(dev_opp, opp, true); 1453 + _opp_remove(opp_table, opp, true); 1889 1454 } 1890 1455 } else { 1891 - _remove_list_dev(_find_list_dev(dev, dev_opp), dev_opp); 1456 + _remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table); 1892 1457 } 1893 1458 1894 1459 unlock: 1895 - mutex_unlock(&dev_opp_list_lock); 1460 + mutex_unlock(&opp_table_lock); 1896 1461 } 1897 1462 EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); 1898 1463 ··· 1913 1478 static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) 1914 1479 { 1915 1480 struct device_node *np; 1916 - struct device_opp *dev_opp; 1481 + struct opp_table *opp_table; 1917 1482 int ret = 0, count = 0; 1918 1483 1919 - mutex_lock(&dev_opp_list_lock); 1484 + mutex_lock(&opp_table_lock); 1920 1485 1921 - dev_opp = _managed_opp(opp_np); 1922 - if (dev_opp) { 1486 + opp_table = _managed_opp(opp_np); 1487 + if (opp_table) { 1923 1488 /* OPPs are already managed */ 1924 - if (!_add_list_dev(dev, dev_opp)) 1489 + if (!_add_opp_dev(dev, opp_table)) 1925 1490 ret = -ENOMEM; 1926 - mutex_unlock(&dev_opp_list_lock); 1491 + mutex_unlock(&opp_table_lock); 1927 1492 return ret; 1928 1493 } 1929 - mutex_unlock(&dev_opp_list_lock); 1494 + mutex_unlock(&opp_table_lock); 1930 1495 1931 - /* We have opp-list node now, iterate over it and add OPPs */ 1496 + /* We have opp-table node now, iterate over it and add OPPs */ 1932 1497 for_each_available_child_of_node(opp_np, np) { 1933 1498 count++; 1934 1499 ··· 1944 1509 if (WARN_ON(!count)) 1945 1510 return -ENOENT; 1946 1511 1947 - mutex_lock(&dev_opp_list_lock); 1512 + mutex_lock(&opp_table_lock); 1948 1513 1949 - dev_opp = _find_device_opp(dev); 1950 - if (WARN_ON(IS_ERR(dev_opp))) { 1951 - ret = PTR_ERR(dev_opp); 1952 - mutex_unlock(&dev_opp_list_lock); 1514 + opp_table = _find_opp_table(dev); 1515 + if (WARN_ON(IS_ERR(opp_table))) { 1516 + ret = PTR_ERR(opp_table); 1517 + mutex_unlock(&opp_table_lock); 1953 1518 goto free_table; 1954 1519 } 1955 1520 1956 - dev_opp->np = opp_np; 1957 - dev_opp->shared_opp = of_property_read_bool(opp_np, "opp-shared"); 1521 + opp_table->np = opp_np; 1522 + opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared"); 1958 1523 1959 - mutex_unlock(&dev_opp_list_lock); 1524 + mutex_unlock(&opp_table_lock); 1960 1525 1961 1526 return 0; 1962 1527 ··· 1985 1550 */ 1986 1551 nr = prop->length / sizeof(u32); 1987 1552 if (nr % 2) { 1988 - dev_err(dev, "%s: Invalid OPP list\n", __func__); 1553 + dev_err(dev, "%s: Invalid OPP table\n", __func__); 1989 1554 return -EINVAL; 1990 1555 } 1991 1556 ··· 2005 1570 2006 1571 /** 2007 1572 * dev_pm_opp_of_add_table() - Initialize opp table from device tree 2008 - * @dev: device pointer used to lookup device OPPs. 1573 + * @dev: device pointer used to lookup OPP table. 2009 1574 * 2010 1575 * Register the initial OPP table with the OPP library for given device. 2011 1576 * 2012 - * Locking: The internal device_opp and opp structures are RCU protected. 1577 + * Locking: The internal opp_table and opp structures are RCU protected. 2013 1578 * Hence this function indirectly uses RCU updater strategy with mutex locks 2014 1579 * to keep the integrity of the internal data structures. Callers should ensure 2015 1580 * that this function is *NOT* called under RCU protection or in contexts where

+11 -11

drivers/base/power/opp/cpu.c

··· 31 31 * @table: Cpufreq table returned back to caller 32 32 * 33 33 * Generate a cpufreq table for a provided device- this assumes that the 34 - * opp list is already initialized and ready for usage. 34 + * opp table is already initialized and ready for usage. 35 35 * 36 36 * This function allocates required memory for the cpufreq table. It is 37 37 * expected that the caller does the required maintenance such as freeing ··· 44 44 * WARNING: It is important for the callers to ensure refreshing their copy of 45 45 * the table if any of the mentioned functions have been invoked in the interim. 46 46 * 47 - * Locking: The internal device_opp and opp structures are RCU protected. 47 + * Locking: The internal opp_table and opp structures are RCU protected. 48 48 * Since we just use the regular accessor functions to access the internal data 49 49 * structures, we use RCU read lock inside this function. As a result, users of 50 50 * this function DONOT need to use explicit locks for invoking. ··· 122 122 /* Required only for V1 bindings, as v2 can manage it from DT itself */ 123 123 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) 124 124 { 125 - struct device_list_opp *list_dev; 126 - struct device_opp *dev_opp; 125 + struct opp_device *opp_dev; 126 + struct opp_table *opp_table; 127 127 struct device *dev; 128 128 int cpu, ret = 0; 129 129 130 - mutex_lock(&dev_opp_list_lock); 130 + mutex_lock(&opp_table_lock); 131 131 132 - dev_opp = _find_device_opp(cpu_dev); 133 - if (IS_ERR(dev_opp)) { 132 + opp_table = _find_opp_table(cpu_dev); 133 + if (IS_ERR(opp_table)) { 134 134 ret = -EINVAL; 135 135 goto unlock; 136 136 } ··· 146 146 continue; 147 147 } 148 148 149 - list_dev = _add_list_dev(dev, dev_opp); 150 - if (!list_dev) { 151 - dev_err(dev, "%s: failed to add list-dev for cpu%d device\n", 149 + opp_dev = _add_opp_dev(dev, opp_table); 150 + if (!opp_dev) { 151 + dev_err(dev, "%s: failed to add opp-dev for cpu%d device\n", 152 152 __func__, cpu); 153 153 continue; 154 154 } 155 155 } 156 156 unlock: 157 - mutex_unlock(&dev_opp_list_lock); 157 + mutex_unlock(&opp_table_lock); 158 158 159 159 return ret; 160 160 }

+42 -43

drivers/base/power/opp/debugfs.c

··· 34 34 debugfs_remove_recursive(opp->dentry); 35 35 } 36 36 37 - int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp) 37 + int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table) 38 38 { 39 - struct dentry *pdentry = dev_opp->dentry; 39 + struct dentry *pdentry = opp_table->dentry; 40 40 struct dentry *d; 41 41 char name[25]; /* 20 chars for 64 bit value + 5 (opp:\0) */ 42 42 ··· 83 83 return 0; 84 84 } 85 85 86 - static int device_opp_debug_create_dir(struct device_list_opp *list_dev, 87 - struct device_opp *dev_opp) 86 + static int opp_list_debug_create_dir(struct opp_device *opp_dev, 87 + struct opp_table *opp_table) 88 88 { 89 - const struct device *dev = list_dev->dev; 89 + const struct device *dev = opp_dev->dev; 90 90 struct dentry *d; 91 91 92 - opp_set_dev_name(dev, dev_opp->dentry_name); 92 + opp_set_dev_name(dev, opp_table->dentry_name); 93 93 94 94 /* Create device specific directory */ 95 - d = debugfs_create_dir(dev_opp->dentry_name, rootdir); 95 + d = debugfs_create_dir(opp_table->dentry_name, rootdir); 96 96 if (!d) { 97 97 dev_err(dev, "%s: Failed to create debugfs dir\n", __func__); 98 98 return -ENOMEM; 99 99 } 100 100 101 - list_dev->dentry = d; 102 - dev_opp->dentry = d; 101 + opp_dev->dentry = d; 102 + opp_table->dentry = d; 103 103 104 104 return 0; 105 105 } 106 106 107 - static int device_opp_debug_create_link(struct device_list_opp *list_dev, 108 - struct device_opp *dev_opp) 107 + static int opp_list_debug_create_link(struct opp_device *opp_dev, 108 + struct opp_table *opp_table) 109 109 { 110 - const struct device *dev = list_dev->dev; 110 + const struct device *dev = opp_dev->dev; 111 111 char name[NAME_MAX]; 112 112 struct dentry *d; 113 113 114 - opp_set_dev_name(list_dev->dev, name); 114 + opp_set_dev_name(opp_dev->dev, name); 115 115 116 116 /* Create device specific directory link */ 117 - d = debugfs_create_symlink(name, rootdir, dev_opp->dentry_name); 117 + d = debugfs_create_symlink(name, rootdir, opp_table->dentry_name); 118 118 if (!d) { 119 119 dev_err(dev, "%s: Failed to create link\n", __func__); 120 120 return -ENOMEM; 121 121 } 122 122 123 - list_dev->dentry = d; 123 + opp_dev->dentry = d; 124 124 125 125 return 0; 126 126 } 127 127 128 128 /** 129 129 * opp_debug_register - add a device opp node to the debugfs 'opp' directory 130 - * @list_dev: list-dev pointer for device 131 - * @dev_opp: the device-opp being added 130 + * @opp_dev: opp-dev pointer for device 131 + * @opp_table: the device-opp being added 132 132 * 133 133 * Dynamically adds device specific directory in debugfs 'opp' directory. If the 134 134 * device-opp is shared with other devices, then links will be created for all ··· 136 136 * 137 137 * Return: 0 on success, otherwise negative error. 138 138 */ 139 - int opp_debug_register(struct device_list_opp *list_dev, 140 - struct device_opp *dev_opp) 139 + int opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table) 141 140 { 142 141 if (!rootdir) { 143 142 pr_debug("%s: Uninitialized rootdir\n", __func__); 144 143 return -EINVAL; 145 144 } 146 145 147 - if (dev_opp->dentry) 148 - return device_opp_debug_create_link(list_dev, dev_opp); 146 + if (opp_table->dentry) 147 + return opp_list_debug_create_link(opp_dev, opp_table); 149 148 150 - return device_opp_debug_create_dir(list_dev, dev_opp); 149 + return opp_list_debug_create_dir(opp_dev, opp_table); 151 150 } 152 151 153 - static void opp_migrate_dentry(struct device_list_opp *list_dev, 154 - struct device_opp *dev_opp) 152 + static void opp_migrate_dentry(struct opp_device *opp_dev, 153 + struct opp_table *opp_table) 155 154 { 156 - struct device_list_opp *new_dev; 155 + struct opp_device *new_dev; 157 156 const struct device *dev; 158 157 struct dentry *dentry; 159 158 160 - /* Look for next list-dev */ 161 - list_for_each_entry(new_dev, &dev_opp->dev_list, node) 162 - if (new_dev != list_dev) 159 + /* Look for next opp-dev */ 160 + list_for_each_entry(new_dev, &opp_table->dev_list, node) 161 + if (new_dev != opp_dev) 163 162 break; 164 163 165 164 /* new_dev is guaranteed to be valid here */ 166 165 dev = new_dev->dev; 167 166 debugfs_remove_recursive(new_dev->dentry); 168 167 169 - opp_set_dev_name(dev, dev_opp->dentry_name); 168 + opp_set_dev_name(dev, opp_table->dentry_name); 170 169 171 - dentry = debugfs_rename(rootdir, list_dev->dentry, rootdir, 172 - dev_opp->dentry_name); 170 + dentry = debugfs_rename(rootdir, opp_dev->dentry, rootdir, 171 + opp_table->dentry_name); 173 172 if (!dentry) { 174 173 dev_err(dev, "%s: Failed to rename link from: %s to %s\n", 175 - __func__, dev_name(list_dev->dev), dev_name(dev)); 174 + __func__, dev_name(opp_dev->dev), dev_name(dev)); 176 175 return; 177 176 } 178 177 179 178 new_dev->dentry = dentry; 180 - dev_opp->dentry = dentry; 179 + opp_table->dentry = dentry; 181 180 } 182 181 183 182 /** 184 183 * opp_debug_unregister - remove a device opp node from debugfs opp directory 185 - * @list_dev: list-dev pointer for device 186 - * @dev_opp: the device-opp being removed 184 + * @opp_dev: opp-dev pointer for device 185 + * @opp_table: the device-opp being removed 187 186 * 188 187 * Dynamically removes device specific directory from debugfs 'opp' directory. 189 188 */ 190 - void opp_debug_unregister(struct device_list_opp *list_dev, 191 - struct device_opp *dev_opp) 189 + void opp_debug_unregister(struct opp_device *opp_dev, 190 + struct opp_table *opp_table) 192 191 { 193 - if (list_dev->dentry == dev_opp->dentry) { 192 + if (opp_dev->dentry == opp_table->dentry) { 194 193 /* Move the real dentry object under another device */ 195 - if (!list_is_singular(&dev_opp->dev_list)) { 196 - opp_migrate_dentry(list_dev, dev_opp); 194 + if (!list_is_singular(&opp_table->dev_list)) { 195 + opp_migrate_dentry(opp_dev, opp_table); 197 196 goto out; 198 197 } 199 - dev_opp->dentry = NULL; 198 + opp_table->dentry = NULL; 200 199 } 201 200 202 - debugfs_remove_recursive(list_dev->dentry); 201 + debugfs_remove_recursive(opp_dev->dentry); 203 202 204 203 out: 205 - list_dev->dentry = NULL; 204 + opp_dev->dentry = NULL; 206 205 } 207 206 208 207 static int __init opp_debug_init(void)

+42 -32

drivers/base/power/opp/opp.h

··· 22 22 #include <linux/rculist.h> 23 23 #include <linux/rcupdate.h> 24 24 25 + struct clk; 26 + struct regulator; 27 + 25 28 /* Lock to allow exclusive modification to the device and opp lists */ 26 - extern struct mutex dev_opp_list_lock; 29 + extern struct mutex opp_table_lock; 27 30 28 31 /* 29 32 * Internal data structure organization with the OPP layer library is as 30 33 * follows: 31 - * dev_opp_list (root) 34 + * opp_tables (root) 32 35 * |- device 1 (represents voltage domain 1) 33 36 * | |- opp 1 (availability, freq, voltage) 34 37 * | |- opp 2 .. ··· 40 37 * |- device 2 (represents the next voltage domain) 41 38 * ... 42 39 * `- device m (represents mth voltage domain) 43 - * device 1, 2.. are represented by dev_opp structure while each opp 40 + * device 1, 2.. are represented by opp_table structure while each opp 44 41 * is represented by the opp structure. 45 42 */ 46 43 47 44 /** 48 45 * struct dev_pm_opp - Generic OPP description structure 49 - * @node: opp list node. The nodes are maintained throughout the lifetime 46 + * @node: opp table node. The nodes are maintained throughout the lifetime 50 47 * of boot. It is expected only an optimal set of OPPs are 51 48 * added to the library by the SoC framework. 52 - * RCU usage: opp list is traversed with RCU locks. node 49 + * RCU usage: opp table is traversed with RCU locks. node 53 50 * modification is possible realtime, hence the modifications 54 - * are protected by the dev_opp_list_lock for integrity. 51 + * are protected by the opp_table_lock for integrity. 55 52 * IMPORTANT: the opp nodes should be maintained in increasing 56 53 * order. 57 54 * @available: true/false - marks if this OPP as available or not ··· 65 62 * @u_amp: Maximum current drawn by the device in microamperes 66 63 * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's 67 64 * frequency from any other OPP's frequency. 68 - * @dev_opp: points back to the device_opp struct this opp belongs to 65 + * @opp_table: points back to the opp_table struct this opp belongs to 69 66 * @rcu_head: RCU callback head used for deferred freeing 70 67 * @np: OPP's device node. 71 68 * @dentry: debugfs dentry pointer (per opp) ··· 87 84 unsigned long u_amp; 88 85 unsigned long clock_latency_ns; 89 86 90 - struct device_opp *dev_opp; 87 + struct opp_table *opp_table; 91 88 struct rcu_head rcu_head; 92 89 93 90 struct device_node *np; ··· 98 95 }; 99 96 100 97 /** 101 - * struct device_list_opp - devices managed by 'struct device_opp' 98 + * struct opp_device - devices managed by 'struct opp_table' 102 99 * @node: list node 103 100 * @dev: device to which the struct object belongs 104 101 * @rcu_head: RCU callback head used for deferred freeing 105 102 * @dentry: debugfs dentry pointer (per device) 106 103 * 107 - * This is an internal data structure maintaining the list of devices that are 108 - * managed by 'struct device_opp'. 104 + * This is an internal data structure maintaining the devices that are managed 105 + * by 'struct opp_table'. 109 106 */ 110 - struct device_list_opp { 107 + struct opp_device { 111 108 struct list_head node; 112 109 const struct device *dev; 113 110 struct rcu_head rcu_head; ··· 118 115 }; 119 116 120 117 /** 121 - * struct device_opp - Device opp structure 122 - * @node: list node - contains the devices with OPPs that 118 + * struct opp_table - Device opp structure 119 + * @node: table node - contains the devices with OPPs that 123 120 * have been registered. Nodes once added are not modified in this 124 - * list. 125 - * RCU usage: nodes are not modified in the list of device_opp, 126 - * however addition is possible and is secured by dev_opp_list_lock 121 + * table. 122 + * RCU usage: nodes are not modified in the table of opp_table, 123 + * however addition is possible and is secured by opp_table_lock 127 124 * @srcu_head: notifier head to notify the OPP availability changes. 128 125 * @rcu_head: RCU callback head used for deferred freeing 129 126 * @dev_list: list of devices that share these OPPs 130 - * @opp_list: list of opps 127 + * @opp_list: table of opps 131 128 * @np: struct device_node pointer for opp's DT node. 132 129 * @clock_latency_ns_max: Max clock latency in nanoseconds. 133 130 * @shared_opp: OPP is shared between multiple devices. ··· 135 132 * @supported_hw: Array of version number to support. 136 133 * @supported_hw_count: Number of elements in supported_hw array. 137 134 * @prop_name: A name to postfix to many DT properties, while parsing them. 135 + * @clk: Device's clock handle 136 + * @regulator: Supply regulator 138 137 * @dentry: debugfs dentry pointer of the real device directory (not links). 139 138 * @dentry_name: Name of the real dentry. 139 + * 140 + * @voltage_tolerance_v1: In percentage, for v1 bindings only. 140 141 * 141 142 * This is an internal data structure maintaining the link to opps attached to 142 143 * a device. This structure is not meant to be shared to users as it is ··· 150 143 * need to wait for the grace period of both of them before freeing any 151 144 * resources. And so we have used kfree_rcu() from within call_srcu() handlers. 152 145 */ 153 - struct device_opp { 146 + struct opp_table { 154 147 struct list_head node; 155 148 156 149 struct srcu_notifier_head srcu_head; ··· 160 153 161 154 struct device_node *np; 162 155 unsigned long clock_latency_ns_max; 156 + 157 + /* For backward compatibility with v1 bindings */ 158 + unsigned int voltage_tolerance_v1; 159 + 163 160 bool shared_opp; 164 161 struct dev_pm_opp *suspend_opp; 165 162 166 163 unsigned int *supported_hw; 167 164 unsigned int supported_hw_count; 168 165 const char *prop_name; 166 + struct clk *clk; 167 + struct regulator *regulator; 169 168 170 169 #ifdef CONFIG_DEBUG_FS 171 170 struct dentry *dentry; ··· 180 167 }; 181 168 182 169 /* Routines internal to opp core */ 183 - struct device_opp *_find_device_opp(struct device *dev); 184 - struct device_list_opp *_add_list_dev(const struct device *dev, 185 - struct device_opp *dev_opp); 170 + struct opp_table *_find_opp_table(struct device *dev); 171 + struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table); 186 172 struct device_node *_of_get_opp_desc_node(struct device *dev); 187 173 188 174 #ifdef CONFIG_DEBUG_FS 189 175 void opp_debug_remove_one(struct dev_pm_opp *opp); 190 - int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp); 191 - int opp_debug_register(struct device_list_opp *list_dev, 192 - struct device_opp *dev_opp); 193 - void opp_debug_unregister(struct device_list_opp *list_dev, 194 - struct device_opp *dev_opp); 176 + int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table); 177 + int opp_debug_register(struct opp_device *opp_dev, struct opp_table *opp_table); 178 + void opp_debug_unregister(struct opp_device *opp_dev, struct opp_table *opp_table); 195 179 #else 196 180 static inline void opp_debug_remove_one(struct dev_pm_opp *opp) {} 197 181 198 182 static inline int opp_debug_create_one(struct dev_pm_opp *opp, 199 - struct device_opp *dev_opp) 183 + struct opp_table *opp_table) 200 184 { return 0; } 201 - static inline int opp_debug_register(struct device_list_opp *list_dev, 202 - struct device_opp *dev_opp) 185 + static inline int opp_debug_register(struct opp_device *opp_dev, 186 + struct opp_table *opp_table) 203 187 { return 0; } 204 188 205 - static inline void opp_debug_unregister(struct device_list_opp *list_dev, 206 - struct device_opp *dev_opp) 189 + static inline void opp_debug_unregister(struct opp_device *opp_dev, 190 + struct opp_table *opp_table) 207 191 { } 208 192 #endif /* DEBUG_FS */ 209 193

+2 -2

drivers/base/power/trace.c

··· 166 166 } 167 167 EXPORT_SYMBOL(generate_pm_trace); 168 168 169 - extern char __tracedata_start, __tracedata_end; 169 + extern char __tracedata_start[], __tracedata_end[]; 170 170 static int show_file_hash(unsigned int value) 171 171 { 172 172 int match; 173 173 char *tracedata; 174 174 175 175 match = 0; 176 - for (tracedata = &__tracedata_start ; tracedata < &__tracedata_end ; 176 + for (tracedata = __tracedata_start ; tracedata < __tracedata_end ; 177 177 tracedata += 2 + sizeof(unsigned long)) { 178 178 unsigned short lineno = *(unsigned short *)tracedata; 179 179 const char *file = *(const char **)(tracedata + 2);

+17 -8

drivers/base/property.c

··· 218 218 bool ret; 219 219 220 220 ret = __fwnode_property_present(fwnode, propname); 221 - if (ret == false && fwnode && !IS_ERR_OR_NULL(fwnode->secondary)) 221 + if (ret == false && !IS_ERR_OR_NULL(fwnode) && 222 + !IS_ERR_OR_NULL(fwnode->secondary)) 222 223 ret = __fwnode_property_present(fwnode->secondary, propname); 223 224 return ret; 224 225 } ··· 424 423 int _ret_; \ 425 424 _ret_ = FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_, \ 426 425 _val_, _nval_); \ 427 - if (_ret_ == -EINVAL && _fwnode_ && !IS_ERR_OR_NULL(_fwnode_->secondary)) \ 426 + if (_ret_ == -EINVAL && !IS_ERR_OR_NULL(_fwnode_) && \ 427 + !IS_ERR_OR_NULL(_fwnode_->secondary)) \ 428 428 _ret_ = FWNODE_PROP_READ(_fwnode_->secondary, _propname_, _type_, \ 429 429 _proptype_, _val_, _nval_); \ 430 430 _ret_; \ ··· 595 593 int ret; 596 594 597 595 ret = __fwnode_property_read_string_array(fwnode, propname, val, nval); 598 - if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary)) 596 + if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) && 597 + !IS_ERR_OR_NULL(fwnode->secondary)) 599 598 ret = __fwnode_property_read_string_array(fwnode->secondary, 600 599 propname, val, nval); 601 600 return ret; ··· 624 621 int ret; 625 622 626 623 ret = __fwnode_property_read_string(fwnode, propname, val); 627 - if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary)) 624 + if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) && 625 + !IS_ERR_OR_NULL(fwnode->secondary)) 628 626 ret = __fwnode_property_read_string(fwnode->secondary, 629 627 propname, val); 630 628 return ret; ··· 824 820 * the pset. If there is no real firmware node (ACPI/DT) primary 825 821 * will hold the pset. 826 822 */ 827 - if (!is_pset_node(fwnode)) 828 - fwnode = fwnode->secondary; 829 - if (!IS_ERR(fwnode) && is_pset_node(fwnode)) 823 + if (is_pset_node(fwnode)) { 824 + set_primary_fwnode(dev, NULL); 830 825 pset_free_set(to_pset_node(fwnode)); 831 - set_secondary_fwnode(dev, NULL); 826 + } else { 827 + fwnode = fwnode->secondary; 828 + if (!IS_ERR(fwnode) && is_pset_node(fwnode)) { 829 + set_secondary_fwnode(dev, NULL); 830 + pset_free_set(to_pset_node(fwnode)); 831 + } 832 + } 832 833 } 833 834 EXPORT_SYMBOL_GPL(device_remove_property_set); 834 835

+1

drivers/cpufreq/Kconfig

··· 19 19 if CPU_FREQ 20 20 21 21 config CPU_FREQ_GOV_COMMON 22 + select IRQ_WORK 22 23 bool 23 24 24 25 config CPU_FREQ_BOOST_SW

+97 -117

drivers/cpufreq/acpi-cpufreq.c

··· 70 70 unsigned int cpu_feature; 71 71 unsigned int acpi_perf_cpu; 72 72 cpumask_var_t freqdomain_cpus; 73 + void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val); 74 + u32 (*cpu_freq_read)(struct acpi_pct_register *reg); 73 75 }; 74 76 75 77 /* acpi_perf_data is a pointer to percpu data. */ ··· 245 243 } 246 244 } 247 245 248 - struct msr_addr { 249 - u32 reg; 250 - }; 246 + u32 cpu_freq_read_intel(struct acpi_pct_register *not_used) 247 + { 248 + u32 val, dummy; 251 249 252 - struct io_addr { 253 - u16 port; 254 - u8 bit_width; 255 - }; 250 + rdmsr(MSR_IA32_PERF_CTL, val, dummy); 251 + return val; 252 + } 253 + 254 + void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val) 255 + { 256 + u32 lo, hi; 257 + 258 + rdmsr(MSR_IA32_PERF_CTL, lo, hi); 259 + lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE); 260 + wrmsr(MSR_IA32_PERF_CTL, lo, hi); 261 + } 262 + 263 + u32 cpu_freq_read_amd(struct acpi_pct_register *not_used) 264 + { 265 + u32 val, dummy; 266 + 267 + rdmsr(MSR_AMD_PERF_CTL, val, dummy); 268 + return val; 269 + } 270 + 271 + void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val) 272 + { 273 + wrmsr(MSR_AMD_PERF_CTL, val, 0); 274 + } 275 + 276 + u32 cpu_freq_read_io(struct acpi_pct_register *reg) 277 + { 278 + u32 val; 279 + 280 + acpi_os_read_port(reg->address, &val, reg->bit_width); 281 + return val; 282 + } 283 + 284 + void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val) 285 + { 286 + acpi_os_write_port(reg->address, val, reg->bit_width); 287 + } 256 288 257 289 struct drv_cmd { 258 - unsigned int type; 259 - const struct cpumask *mask; 260 - union { 261 - struct msr_addr msr; 262 - struct io_addr io; 263 - } addr; 290 + struct acpi_pct_register *reg; 264 291 u32 val; 292 + union { 293 + void (*write)(struct acpi_pct_register *reg, u32 val); 294 + u32 (*read)(struct acpi_pct_register *reg); 295 + } func; 265 296 }; 266 297 267 298 /* Called via smp_call_function_single(), on the target CPU */ 268 299 static void do_drv_read(void *_cmd) 269 300 { 270 301 struct drv_cmd *cmd = _cmd; 271 - u32 h; 272 302 273 - switch (cmd->type) { 274 - case SYSTEM_INTEL_MSR_CAPABLE: 275 - case SYSTEM_AMD_MSR_CAPABLE: 276 - rdmsr(cmd->addr.msr.reg, cmd->val, h); 277 - break; 278 - case SYSTEM_IO_CAPABLE: 279 - acpi_os_read_port((acpi_io_address)cmd->addr.io.port, 280 - &cmd->val, 281 - (u32)cmd->addr.io.bit_width); 282 - break; 283 - default: 284 - break; 285 - } 303 + cmd->val = cmd->func.read(cmd->reg); 304 + } 305 + 306 + static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask) 307 + { 308 + struct acpi_processor_performance *perf = to_perf_data(data); 309 + struct drv_cmd cmd = { 310 + .reg = &perf->control_register, 311 + .func.read = data->cpu_freq_read, 312 + }; 313 + int err; 314 + 315 + err = smp_call_function_any(mask, do_drv_read, &cmd, 1); 316 + WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ 317 + return cmd.val; 286 318 } 287 319 288 320 /* Called via smp_call_function_many(), on the target CPUs */ 289 321 static void do_drv_write(void *_cmd) 290 322 { 291 323 struct drv_cmd *cmd = _cmd; 292 - u32 lo, hi; 293 324 294 - switch (cmd->type) { 295 - case SYSTEM_INTEL_MSR_CAPABLE: 296 - rdmsr(cmd->addr.msr.reg, lo, hi); 297 - lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); 298 - wrmsr(cmd->addr.msr.reg, lo, hi); 299 - break; 300 - case SYSTEM_AMD_MSR_CAPABLE: 301 - wrmsr(cmd->addr.msr.reg, cmd->val, 0); 302 - break; 303 - case SYSTEM_IO_CAPABLE: 304 - acpi_os_write_port((acpi_io_address)cmd->addr.io.port, 305 - cmd->val, 306 - (u32)cmd->addr.io.bit_width); 307 - break; 308 - default: 309 - break; 310 - } 325 + cmd->func.write(cmd->reg, cmd->val); 311 326 } 312 327 313 - static void drv_read(struct drv_cmd *cmd) 328 + static void drv_write(struct acpi_cpufreq_data *data, 329 + const struct cpumask *mask, u32 val) 314 330 { 315 - int err; 316 - cmd->val = 0; 317 - 318 - err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); 319 - WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ 320 - } 321 - 322 - static void drv_write(struct drv_cmd *cmd) 323 - { 331 + struct acpi_processor_performance *perf = to_perf_data(data); 332 + struct drv_cmd cmd = { 333 + .reg = &perf->control_register, 334 + .val = val, 335 + .func.write = data->cpu_freq_write, 336 + }; 324 337 int this_cpu; 325 338 326 339 this_cpu = get_cpu(); 327 - if (cpumask_test_cpu(this_cpu, cmd->mask)) 328 - do_drv_write(cmd); 329 - smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); 340 + if (cpumask_test_cpu(this_cpu, mask)) 341 + do_drv_write(&cmd); 342 + 343 + smp_call_function_many(mask, do_drv_write, &cmd, 1); 330 344 put_cpu(); 331 345 } 332 346 333 - static u32 334 - get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) 347 + static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) 335 348 { 336 - struct acpi_processor_performance *perf; 337 - struct drv_cmd cmd; 349 + u32 val; 338 350 339 351 if (unlikely(cpumask_empty(mask))) 340 352 return 0; 341 353 342 - switch (data->cpu_feature) { 343 - case SYSTEM_INTEL_MSR_CAPABLE: 344 - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 345 - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; 346 - break; 347 - case SYSTEM_AMD_MSR_CAPABLE: 348 - cmd.type = SYSTEM_AMD_MSR_CAPABLE; 349 - cmd.addr.msr.reg = MSR_AMD_PERF_CTL; 350 - break; 351 - case SYSTEM_IO_CAPABLE: 352 - cmd.type = SYSTEM_IO_CAPABLE; 353 - perf = to_perf_data(data); 354 - cmd.addr.io.port = perf->control_register.address; 355 - cmd.addr.io.bit_width = perf->control_register.bit_width; 356 - break; 357 - default: 358 - return 0; 359 - } 354 + val = drv_read(data, mask); 360 355 361 - cmd.mask = mask; 362 - drv_read(&cmd); 356 + pr_debug("get_cur_val = %u\n", val); 363 357 364 - pr_debug("get_cur_val = %u\n", cmd.val); 365 - 366 - return cmd.val; 358 + return val; 367 359 } 368 360 369 361 static unsigned int get_cur_freq_on_cpu(unsigned int cpu) ··· 412 416 { 413 417 struct acpi_cpufreq_data *data = policy->driver_data; 414 418 struct acpi_processor_performance *perf; 415 - struct drv_cmd cmd; 419 + const struct cpumask *mask; 416 420 unsigned int next_perf_state = 0; /* Index into perf table */ 417 421 int result = 0; 418 422 ··· 430 434 } else { 431 435 pr_debug("Already at target state (P%d)\n", 432 436 next_perf_state); 433 - goto out; 437 + return 0; 434 438 } 435 439 } 436 440 437 - switch (data->cpu_feature) { 438 - case SYSTEM_INTEL_MSR_CAPABLE: 439 - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 440 - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; 441 - cmd.val = (u32) perf->states[next_perf_state].control; 442 - break; 443 - case SYSTEM_AMD_MSR_CAPABLE: 444 - cmd.type = SYSTEM_AMD_MSR_CAPABLE; 445 - cmd.addr.msr.reg = MSR_AMD_PERF_CTL; 446 - cmd.val = (u32) perf->states[next_perf_state].control; 447 - break; 448 - case SYSTEM_IO_CAPABLE: 449 - cmd.type = SYSTEM_IO_CAPABLE; 450 - cmd.addr.io.port = perf->control_register.address; 451 - cmd.addr.io.bit_width = perf->control_register.bit_width; 452 - cmd.val = (u32) perf->states[next_perf_state].control; 453 - break; 454 - default: 455 - result = -ENODEV; 456 - goto out; 457 - } 441 + /* 442 + * The core won't allow CPUs to go away until the governor has been 443 + * stopped, so we can rely on the stability of policy->cpus. 444 + */ 445 + mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ? 446 + cpumask_of(policy->cpu) : policy->cpus; 458 447 459 - /* cpufreq holds the hotplug lock, so we are safe from here on */ 460 - if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) 461 - cmd.mask = policy->cpus; 462 - else 463 - cmd.mask = cpumask_of(policy->cpu); 464 - 465 - drv_write(&cmd); 448 + drv_write(data, mask, perf->states[next_perf_state].control); 466 449 467 450 if (acpi_pstate_strict) { 468 - if (!check_freqs(cmd.mask, data->freq_table[index].frequency, 451 + if (!check_freqs(mask, data->freq_table[index].frequency, 469 452 data)) { 470 453 pr_debug("acpi_cpufreq_target failed (%d)\n", 471 454 policy->cpu); ··· 455 480 if (!result) 456 481 perf->state = next_perf_state; 457 482 458 - out: 459 483 return result; 460 484 } 461 485 ··· 714 740 } 715 741 pr_debug("SYSTEM IO addr space\n"); 716 742 data->cpu_feature = SYSTEM_IO_CAPABLE; 743 + data->cpu_freq_read = cpu_freq_read_io; 744 + data->cpu_freq_write = cpu_freq_write_io; 717 745 break; 718 746 case ACPI_ADR_SPACE_FIXED_HARDWARE: 719 747 pr_debug("HARDWARE addr space\n"); 720 748 if (check_est_cpu(cpu)) { 721 749 data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; 750 + data->cpu_freq_read = cpu_freq_read_intel; 751 + data->cpu_freq_write = cpu_freq_write_intel; 722 752 break; 723 753 } 724 754 if (check_amd_hwpstate_cpu(cpu)) { 725 755 data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; 756 + data->cpu_freq_read = cpu_freq_read_amd; 757 + data->cpu_freq_write = cpu_freq_write_amd; 726 758 break; 727 759 } 728 760 result = -ENODEV;

+4 -4

drivers/cpufreq/amd_freq_sensitivity.c

··· 21 21 #include <asm/msr.h> 22 22 #include <asm/cpufeature.h> 23 23 24 - #include "cpufreq_governor.h" 24 + #include "cpufreq_ondemand.h" 25 25 26 26 #define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080 27 27 #define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081 ··· 45 45 long d_actual, d_reference; 46 46 struct msr actual, reference; 47 47 struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu); 48 - struct dbs_data *od_data = policy->governor_data; 48 + struct policy_dbs_info *policy_dbs = policy->governor_data; 49 + struct dbs_data *od_data = policy_dbs->dbs_data; 49 50 struct od_dbs_tuners *od_tuners = od_data->tuners; 50 - struct od_cpu_dbs_info_s *od_info = 51 - od_data->cdata->get_cpu_dbs_info_s(policy->cpu); 51 + struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs); 52 52 53 53 if (!od_info->freq_table) 54 54 return freq_next;

+116 -206

drivers/cpufreq/cpufreq-dt.c

··· 31 31 32 32 struct private_data { 33 33 struct device *cpu_dev; 34 - struct regulator *cpu_reg; 35 34 struct thermal_cooling_device *cdev; 36 - unsigned int voltage_tolerance; /* in percentage */ 35 + const char *reg_name; 37 36 }; 38 37 39 38 static struct freq_attr *cpufreq_dt_attr[] = { ··· 43 44 44 45 static int set_target(struct cpufreq_policy *policy, unsigned int index) 45 46 { 46 - struct dev_pm_opp *opp; 47 - struct cpufreq_frequency_table *freq_table = policy->freq_table; 48 - struct clk *cpu_clk = policy->clk; 49 47 struct private_data *priv = policy->driver_data; 50 - struct device *cpu_dev = priv->cpu_dev; 51 - struct regulator *cpu_reg = priv->cpu_reg; 52 - unsigned long volt = 0, tol = 0; 53 - int volt_old = 0; 54 - unsigned int old_freq, new_freq; 55 - long freq_Hz, freq_exact; 56 - int ret; 57 48 58 - freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000); 59 - if (freq_Hz <= 0) 60 - freq_Hz = freq_table[index].frequency * 1000; 61 - 62 - freq_exact = freq_Hz; 63 - new_freq = freq_Hz / 1000; 64 - old_freq = clk_get_rate(cpu_clk) / 1000; 65 - 66 - if (!IS_ERR(cpu_reg)) { 67 - unsigned long opp_freq; 68 - 69 - rcu_read_lock(); 70 - opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz); 71 - if (IS_ERR(opp)) { 72 - rcu_read_unlock(); 73 - dev_err(cpu_dev, "failed to find OPP for %ld\n", 74 - freq_Hz); 75 - return PTR_ERR(opp); 76 - } 77 - volt = dev_pm_opp_get_voltage(opp); 78 - opp_freq = dev_pm_opp_get_freq(opp); 79 - rcu_read_unlock(); 80 - tol = volt * priv->voltage_tolerance / 100; 81 - volt_old = regulator_get_voltage(cpu_reg); 82 - dev_dbg(cpu_dev, "Found OPP: %ld kHz, %ld uV\n", 83 - opp_freq / 1000, volt); 84 - } 85 - 86 - dev_dbg(cpu_dev, "%u MHz, %d mV --> %u MHz, %ld mV\n", 87 - old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1, 88 - new_freq / 1000, volt ? volt / 1000 : -1); 89 - 90 - /* scaling up? scale voltage before frequency */ 91 - if (!IS_ERR(cpu_reg) && new_freq > old_freq) { 92 - ret = regulator_set_voltage_tol(cpu_reg, volt, tol); 93 - if (ret) { 94 - dev_err(cpu_dev, "failed to scale voltage up: %d\n", 95 - ret); 96 - return ret; 97 - } 98 - } 99 - 100 - ret = clk_set_rate(cpu_clk, freq_exact); 101 - if (ret) { 102 - dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); 103 - if (!IS_ERR(cpu_reg) && volt_old > 0) 104 - regulator_set_voltage_tol(cpu_reg, volt_old, tol); 105 - return ret; 106 - } 107 - 108 - /* scaling down? scale voltage after frequency */ 109 - if (!IS_ERR(cpu_reg) && new_freq < old_freq) { 110 - ret = regulator_set_voltage_tol(cpu_reg, volt, tol); 111 - if (ret) { 112 - dev_err(cpu_dev, "failed to scale voltage down: %d\n", 113 - ret); 114 - clk_set_rate(cpu_clk, old_freq * 1000); 115 - } 116 - } 117 - 118 - return ret; 49 + return dev_pm_opp_set_rate(priv->cpu_dev, 50 + policy->freq_table[index].frequency * 1000); 119 51 } 120 52 121 - static int allocate_resources(int cpu, struct device **cdev, 122 - struct regulator **creg, struct clk **cclk) 53 + /* 54 + * An earlier version of opp-v1 bindings used to name the regulator 55 + * "cpu0-supply", we still need to handle that for backwards compatibility. 56 + */ 57 + static const char *find_supply_name(struct device *dev) 58 + { 59 + struct device_node *np; 60 + struct property *pp; 61 + int cpu = dev->id; 62 + const char *name = NULL; 63 + 64 + np = of_node_get(dev->of_node); 65 + 66 + /* This must be valid for sure */ 67 + if (WARN_ON(!np)) 68 + return NULL; 69 + 70 + /* Try "cpu0" for older DTs */ 71 + if (!cpu) { 72 + pp = of_find_property(np, "cpu0-supply", NULL); 73 + if (pp) { 74 + name = "cpu0"; 75 + goto node_put; 76 + } 77 + } 78 + 79 + pp = of_find_property(np, "cpu-supply", NULL); 80 + if (pp) { 81 + name = "cpu"; 82 + goto node_put; 83 + } 84 + 85 + dev_dbg(dev, "no regulator for cpu%d\n", cpu); 86 + node_put: 87 + of_node_put(np); 88 + return name; 89 + } 90 + 91 + static int resources_available(void) 123 92 { 124 93 struct device *cpu_dev; 125 94 struct regulator *cpu_reg; 126 95 struct clk *cpu_clk; 127 96 int ret = 0; 128 - char *reg_cpu0 = "cpu0", *reg_cpu = "cpu", *reg; 97 + const char *name; 129 98 130 - cpu_dev = get_cpu_device(cpu); 99 + cpu_dev = get_cpu_device(0); 131 100 if (!cpu_dev) { 132 - pr_err("failed to get cpu%d device\n", cpu); 101 + pr_err("failed to get cpu0 device\n"); 133 102 return -ENODEV; 134 103 } 135 104 136 - /* Try "cpu0" for older DTs */ 137 - if (!cpu) 138 - reg = reg_cpu0; 139 - else 140 - reg = reg_cpu; 105 + cpu_clk = clk_get(cpu_dev, NULL); 106 + ret = PTR_ERR_OR_ZERO(cpu_clk); 107 + if (ret) { 108 + /* 109 + * If cpu's clk node is present, but clock is not yet 110 + * registered, we should try defering probe. 111 + */ 112 + if (ret == -EPROBE_DEFER) 113 + dev_dbg(cpu_dev, "clock not ready, retry\n"); 114 + else 115 + dev_err(cpu_dev, "failed to get clock: %d\n", ret); 141 116 142 - try_again: 143 - cpu_reg = regulator_get_optional(cpu_dev, reg); 117 + return ret; 118 + } 119 + 120 + clk_put(cpu_clk); 121 + 122 + name = find_supply_name(cpu_dev); 123 + /* Platform doesn't require regulator */ 124 + if (!name) 125 + return 0; 126 + 127 + cpu_reg = regulator_get_optional(cpu_dev, name); 144 128 ret = PTR_ERR_OR_ZERO(cpu_reg); 145 129 if (ret) { 146 130 /* 147 131 * If cpu's regulator supply node is present, but regulator is 148 132 * not yet registered, we should try defering probe. 149 133 */ 150 - if (ret == -EPROBE_DEFER) { 151 - dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n", 152 - cpu); 153 - return ret; 154 - } 155 - 156 - /* Try with "cpu-supply" */ 157 - if (reg == reg_cpu0) { 158 - reg = reg_cpu; 159 - goto try_again; 160 - } 161 - 162 - dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret); 163 - } 164 - 165 - cpu_clk = clk_get(cpu_dev, NULL); 166 - ret = PTR_ERR_OR_ZERO(cpu_clk); 167 - if (ret) { 168 - /* put regulator */ 169 - if (!IS_ERR(cpu_reg)) 170 - regulator_put(cpu_reg); 171 - 172 - /* 173 - * If cpu's clk node is present, but clock is not yet 174 - * registered, we should try defering probe. 175 - */ 176 134 if (ret == -EPROBE_DEFER) 177 - dev_dbg(cpu_dev, "cpu%d clock not ready, retry\n", cpu); 135 + dev_dbg(cpu_dev, "cpu0 regulator not ready, retry\n"); 178 136 else 179 - dev_err(cpu_dev, "failed to get cpu%d clock: %d\n", cpu, 180 - ret); 181 - } else { 182 - *cdev = cpu_dev; 183 - *creg = cpu_reg; 184 - *cclk = cpu_clk; 137 + dev_dbg(cpu_dev, "no regulator for cpu0: %d\n", ret); 138 + 139 + return ret; 185 140 } 186 141 187 - return ret; 142 + regulator_put(cpu_reg); 143 + return 0; 188 144 } 189 145 190 146 static int cpufreq_init(struct cpufreq_policy *policy) 191 147 { 192 148 struct cpufreq_frequency_table *freq_table; 193 - struct device_node *np; 194 149 struct private_data *priv; 195 150 struct device *cpu_dev; 196 - struct regulator *cpu_reg; 197 151 struct clk *cpu_clk; 198 152 struct dev_pm_opp *suspend_opp; 199 - unsigned long min_uV = ~0, max_uV = 0; 200 153 unsigned int transition_latency; 201 - bool need_update = false; 154 + bool opp_v1 = false; 155 + const char *name; 202 156 int ret; 203 157 204 - ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk); 205 - if (ret) { 206 - pr_err("%s: Failed to allocate resources: %d\n", __func__, ret); 207 - return ret; 158 + cpu_dev = get_cpu_device(policy->cpu); 159 + if (!cpu_dev) { 160 + pr_err("failed to get cpu%d device\n", policy->cpu); 161 + return -ENODEV; 208 162 } 209 163 210 - np = of_node_get(cpu_dev->of_node); 211 - if (!np) { 212 - dev_err(cpu_dev, "failed to find cpu%d node\n", policy->cpu); 213 - ret = -ENOENT; 214 - goto out_put_reg_clk; 164 + cpu_clk = clk_get(cpu_dev, NULL); 165 + if (IS_ERR(cpu_clk)) { 166 + ret = PTR_ERR(cpu_clk); 167 + dev_err(cpu_dev, "%s: failed to get clk: %d\n", __func__, ret); 168 + return ret; 215 169 } 216 170 217 171 /* Get OPP-sharing information from "operating-points-v2" bindings */ ··· 175 223 * finding shared-OPPs for backward compatibility. 176 224 */ 177 225 if (ret == -ENOENT) 178 - need_update = true; 226 + opp_v1 = true; 179 227 else 180 - goto out_node_put; 228 + goto out_put_clk; 229 + } 230 + 231 + /* 232 + * OPP layer will be taking care of regulators now, but it needs to know 233 + * the name of the regulator first. 234 + */ 235 + name = find_supply_name(cpu_dev); 236 + if (name) { 237 + ret = dev_pm_opp_set_regulator(cpu_dev, name); 238 + if (ret) { 239 + dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n", 240 + policy->cpu, ret); 241 + goto out_put_clk; 242 + } 181 243 } 182 244 183 245 /* ··· 212 246 */ 213 247 ret = dev_pm_opp_get_opp_count(cpu_dev); 214 248 if (ret <= 0) { 215 - pr_debug("OPP table is not ready, deferring probe\n"); 249 + dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n"); 216 250 ret = -EPROBE_DEFER; 217 251 goto out_free_opp; 218 252 } 219 253 220 - if (need_update) { 254 + if (opp_v1) { 221 255 struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data(); 222 256 223 257 if (!pd || !pd->independent_clocks) ··· 231 265 if (ret) 232 266 dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", 233 267 __func__, ret); 234 - 235 - of_property_read_u32(np, "clock-latency", &transition_latency); 236 - } else { 237 - transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev); 238 268 } 239 269 240 270 priv = kzalloc(sizeof(*priv), GFP_KERNEL); ··· 239 277 goto out_free_opp; 240 278 } 241 279 242 - of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance); 243 - 244 - if (!transition_latency) 245 - transition_latency = CPUFREQ_ETERNAL; 246 - 247 - if (!IS_ERR(cpu_reg)) { 248 - unsigned long opp_freq = 0; 249 - 250 - /* 251 - * Disable any OPPs where the connected regulator isn't able to 252 - * provide the specified voltage and record minimum and maximum 253 - * voltage levels. 254 - */ 255 - while (1) { 256 - struct dev_pm_opp *opp; 257 - unsigned long opp_uV, tol_uV; 258 - 259 - rcu_read_lock(); 260 - opp = dev_pm_opp_find_freq_ceil(cpu_dev, &opp_freq); 261 - if (IS_ERR(opp)) { 262 - rcu_read_unlock(); 263 - break; 264 - } 265 - opp_uV = dev_pm_opp_get_voltage(opp); 266 - rcu_read_unlock(); 267 - 268 - tol_uV = opp_uV * priv->voltage_tolerance / 100; 269 - if (regulator_is_supported_voltage(cpu_reg, 270 - opp_uV - tol_uV, 271 - opp_uV + tol_uV)) { 272 - if (opp_uV < min_uV) 273 - min_uV = opp_uV; 274 - if (opp_uV > max_uV) 275 - max_uV = opp_uV; 276 - } else { 277 - dev_pm_opp_disable(cpu_dev, opp_freq); 278 - } 279 - 280 - opp_freq++; 281 - } 282 - 283 - ret = regulator_set_voltage_time(cpu_reg, min_uV, max_uV); 284 - if (ret > 0) 285 - transition_latency += ret * 1000; 286 - } 280 + priv->reg_name = name; 287 281 288 282 ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); 289 283 if (ret) { 290 - pr_err("failed to init cpufreq table: %d\n", ret); 284 + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); 291 285 goto out_free_priv; 292 286 } 293 287 294 288 priv->cpu_dev = cpu_dev; 295 - priv->cpu_reg = cpu_reg; 296 289 policy->driver_data = priv; 297 - 298 290 policy->clk = cpu_clk; 299 291 300 292 rcu_read_lock(); ··· 273 357 cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; 274 358 } 275 359 276 - policy->cpuinfo.transition_latency = transition_latency; 360 + transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev); 361 + if (!transition_latency) 362 + transition_latency = CPUFREQ_ETERNAL; 277 363 278 - of_node_put(np); 364 + policy->cpuinfo.transition_latency = transition_latency; 279 365 280 366 return 0; 281 367 ··· 287 369 kfree(priv); 288 370 out_free_opp: 289 371 dev_pm_opp_of_cpumask_remove_table(policy->cpus); 290 - out_node_put: 291 - of_node_put(np); 292 - out_put_reg_clk: 372 + if (name) 373 + dev_pm_opp_put_regulator(cpu_dev); 374 + out_put_clk: 293 375 clk_put(cpu_clk); 294 - if (!IS_ERR(cpu_reg)) 295 - regulator_put(cpu_reg); 296 376 297 377 return ret; 298 378 } ··· 302 386 cpufreq_cooling_unregister(priv->cdev); 303 387 dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); 304 388 dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); 389 + if (priv->reg_name) 390 + dev_pm_opp_put_regulator(priv->cpu_dev); 391 + 305 392 clk_put(policy->clk); 306 - if (!IS_ERR(priv->cpu_reg)) 307 - regulator_put(priv->cpu_reg); 308 393 kfree(priv); 309 394 310 395 return 0; ··· 358 441 359 442 static int dt_cpufreq_probe(struct platform_device *pdev) 360 443 { 361 - struct device *cpu_dev; 362 - struct regulator *cpu_reg; 363 - struct clk *cpu_clk; 364 444 int ret; 365 445 366 446 /* ··· 367 453 * 368 454 * FIXME: Is checking this only for CPU0 sufficient ? 369 455 */ 370 - ret = allocate_resources(0, &cpu_dev, &cpu_reg, &cpu_clk); 456 + ret = resources_available(); 371 457 if (ret) 372 458 return ret; 373 - 374 - clk_put(cpu_clk); 375 - if (!IS_ERR(cpu_reg)) 376 - regulator_put(cpu_reg); 377 459 378 460 dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); 379 461 380 462 ret = cpufreq_register_driver(&dt_cpufreq_driver); 381 463 if (ret) 382 - dev_err(cpu_dev, "failed register driver: %d\n", ret); 464 + dev_err(&pdev->dev, "failed register driver: %d\n", ret); 383 465 384 466 return ret; 385 467 }

+129 -210

drivers/cpufreq/cpufreq.c

··· 38 38 return cpumask_empty(policy->cpus); 39 39 } 40 40 41 - static bool suitable_policy(struct cpufreq_policy *policy, bool active) 42 - { 43 - return active == !policy_is_inactive(policy); 44 - } 45 - 46 - /* Finds Next Acive/Inactive policy */ 47 - static struct cpufreq_policy *next_policy(struct cpufreq_policy *policy, 48 - bool active) 49 - { 50 - do { 51 - /* No more policies in the list */ 52 - if (list_is_last(&policy->policy_list, &cpufreq_policy_list)) 53 - return NULL; 54 - 55 - policy = list_next_entry(policy, policy_list); 56 - } while (!suitable_policy(policy, active)); 57 - 58 - return policy; 59 - } 60 - 61 - static struct cpufreq_policy *first_policy(bool active) 62 - { 63 - struct cpufreq_policy *policy; 64 - 65 - /* No policies in the list */ 66 - if (list_empty(&cpufreq_policy_list)) 67 - return NULL; 68 - 69 - policy = list_first_entry(&cpufreq_policy_list, typeof(*policy), 70 - policy_list); 71 - 72 - if (!suitable_policy(policy, active)) 73 - policy = next_policy(policy, active); 74 - 75 - return policy; 76 - } 77 - 78 41 /* Macros to iterate over CPU policies */ 79 - #define for_each_suitable_policy(__policy, __active) \ 80 - for (__policy = first_policy(__active); \ 81 - __policy; \ 82 - __policy = next_policy(__policy, __active)) 42 + #define for_each_suitable_policy(__policy, __active) \ 43 + list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \ 44 + if ((__active) == !policy_is_inactive(__policy)) 83 45 84 46 #define for_each_active_policy(__policy) \ 85 47 for_each_suitable_policy(__policy, true) ··· 64 102 static struct cpufreq_driver *cpufreq_driver; 65 103 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); 66 104 static DEFINE_RWLOCK(cpufreq_driver_lock); 67 - DEFINE_MUTEX(cpufreq_governor_lock); 68 105 69 106 /* Flag to suspend/resume CPUFreq governors */ 70 107 static bool cpufreq_suspended; ··· 74 113 } 75 114 76 115 /* internal prototypes */ 77 - static int __cpufreq_governor(struct cpufreq_policy *policy, 78 - unsigned int event); 116 + static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); 79 117 static unsigned int __cpufreq_get(struct cpufreq_policy *policy); 80 - static void handle_update(struct work_struct *work); 81 118 82 119 /** 83 120 * Two notifier lists: the "policy" list is involved in the ··· 777 818 ssize_t ret; 778 819 779 820 down_read(&policy->rwsem); 780 - 781 - if (fattr->show) 782 - ret = fattr->show(policy, buf); 783 - else 784 - ret = -EIO; 785 - 821 + ret = fattr->show(policy, buf); 786 822 up_read(&policy->rwsem); 787 823 788 824 return ret; ··· 792 838 793 839 get_online_cpus(); 794 840 795 - if (!cpu_online(policy->cpu)) 796 - goto unlock; 797 - 798 - down_write(&policy->rwsem); 799 - 800 - if (fattr->store) 841 + if (cpu_online(policy->cpu)) { 842 + down_write(&policy->rwsem); 801 843 ret = fattr->store(policy, buf, count); 802 - else 803 - ret = -EIO; 844 + up_write(&policy->rwsem); 845 + } 804 846 805 - up_write(&policy->rwsem); 806 - unlock: 807 847 put_online_cpus(); 808 848 809 849 return ret; ··· 907 959 return cpufreq_add_dev_symlink(policy); 908 960 } 909 961 962 + __weak struct cpufreq_governor *cpufreq_default_governor(void) 963 + { 964 + return NULL; 965 + } 966 + 910 967 static int cpufreq_init_policy(struct cpufreq_policy *policy) 911 968 { 912 969 struct cpufreq_governor *gov = NULL; ··· 921 968 922 969 /* Update governor of new_policy to the governor used before hotplug */ 923 970 gov = find_governor(policy->last_governor); 924 - if (gov) 971 + if (gov) { 925 972 pr_debug("Restoring governor %s for cpu %d\n", 926 973 policy->governor->name, policy->cpu); 927 - else 928 - gov = CPUFREQ_DEFAULT_GOVERNOR; 974 + } else { 975 + gov = cpufreq_default_governor(); 976 + if (!gov) 977 + return -ENODATA; 978 + } 929 979 930 980 new_policy.governor = gov; 931 981 ··· 952 996 if (cpumask_test_cpu(cpu, policy->cpus)) 953 997 return 0; 954 998 999 + down_write(&policy->rwsem); 955 1000 if (has_target()) { 956 - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); 1001 + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); 957 1002 if (ret) { 958 1003 pr_err("%s: Failed to stop governor\n", __func__); 959 - return ret; 1004 + goto unlock; 960 1005 } 961 1006 } 962 1007 963 - down_write(&policy->rwsem); 964 1008 cpumask_set_cpu(cpu, policy->cpus); 965 - up_write(&policy->rwsem); 966 1009 967 1010 if (has_target()) { 968 - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); 1011 + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); 969 1012 if (!ret) 970 - ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); 1013 + ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); 971 1014 972 - if (ret) { 1015 + if (ret) 973 1016 pr_err("%s: Failed to start governor\n", __func__); 974 - return ret; 975 - } 976 1017 } 977 1018 978 - return 0; 1019 + unlock: 1020 + up_write(&policy->rwsem); 1021 + return ret; 1022 + } 1023 + 1024 + static void handle_update(struct work_struct *work) 1025 + { 1026 + struct cpufreq_policy *policy = 1027 + container_of(work, struct cpufreq_policy, update); 1028 + unsigned int cpu = policy->cpu; 1029 + pr_debug("handle_update for cpu %u called\n", cpu); 1030 + cpufreq_update_policy(cpu); 979 1031 } 980 1032 981 1033 static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) 982 1034 { 983 1035 struct device *dev = get_cpu_device(cpu); 984 1036 struct cpufreq_policy *policy; 1037 + int ret; 985 1038 986 1039 if (WARN_ON(!dev)) 987 1040 return NULL; ··· 1008 1043 if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) 1009 1044 goto err_free_rcpumask; 1010 1045 1011 - kobject_init(&policy->kobj, &ktype_cpufreq); 1046 + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, 1047 + cpufreq_global_kobject, "policy%u", cpu); 1048 + if (ret) { 1049 + pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); 1050 + goto err_free_real_cpus; 1051 + } 1052 + 1012 1053 INIT_LIST_HEAD(&policy->policy_list); 1013 1054 init_rwsem(&policy->rwsem); 1014 1055 spin_lock_init(&policy->transition_lock); ··· 1025 1054 policy->cpu = cpu; 1026 1055 return policy; 1027 1056 1057 + err_free_real_cpus: 1058 + free_cpumask_var(policy->real_cpus); 1028 1059 err_free_rcpumask: 1029 1060 free_cpumask_var(policy->related_cpus); 1030 1061 err_free_cpumask: ··· 1131 1158 cpumask_copy(policy->related_cpus, policy->cpus); 1132 1159 /* Remember CPUs present at the policy creation time. */ 1133 1160 cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); 1134 - 1135 - /* Name and add the kobject */ 1136 - ret = kobject_add(&policy->kobj, cpufreq_global_kobject, 1137 - "policy%u", 1138 - cpumask_first(policy->related_cpus)); 1139 - if (ret) { 1140 - pr_err("%s: failed to add policy->kobj: %d\n", __func__, 1141 - ret); 1142 - goto out_exit_policy; 1143 - } 1144 1161 } 1145 1162 1146 1163 /* ··· 1272 1309 return ret; 1273 1310 } 1274 1311 1275 - static void cpufreq_offline_prepare(unsigned int cpu) 1312 + static void cpufreq_offline(unsigned int cpu) 1276 1313 { 1277 1314 struct cpufreq_policy *policy; 1315 + int ret; 1278 1316 1279 1317 pr_debug("%s: unregistering CPU %u\n", __func__, cpu); 1280 1318 ··· 1285 1321 return; 1286 1322 } 1287 1323 1324 + down_write(&policy->rwsem); 1288 1325 if (has_target()) { 1289 - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); 1326 + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); 1290 1327 if (ret) 1291 1328 pr_err("%s: Failed to stop governor\n", __func__); 1292 1329 } 1293 1330 1294 - down_write(&policy->rwsem); 1295 1331 cpumask_clear_cpu(cpu, policy->cpus); 1296 1332 1297 1333 if (policy_is_inactive(policy)) { ··· 1304 1340 /* Nominate new CPU */ 1305 1341 policy->cpu = cpumask_any(policy->cpus); 1306 1342 } 1307 - up_write(&policy->rwsem); 1308 1343 1309 1344 /* Start governor again for active policy */ 1310 1345 if (!policy_is_inactive(policy)) { 1311 1346 if (has_target()) { 1312 - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); 1347 + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); 1313 1348 if (!ret) 1314 - ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); 1349 + ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); 1315 1350 1316 1351 if (ret) 1317 1352 pr_err("%s: Failed to start governor\n", __func__); 1318 1353 } 1319 - } else if (cpufreq_driver->stop_cpu) { 1354 + 1355 + goto unlock; 1356 + } 1357 + 1358 + if (cpufreq_driver->stop_cpu) 1320 1359 cpufreq_driver->stop_cpu(policy); 1321 - } 1322 - } 1323 - 1324 - static void cpufreq_offline_finish(unsigned int cpu) 1325 - { 1326 - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); 1327 - 1328 - if (!policy) { 1329 - pr_debug("%s: No cpu_data found\n", __func__); 1330 - return; 1331 - } 1332 - 1333 - /* Only proceed for inactive policies */ 1334 - if (!policy_is_inactive(policy)) 1335 - return; 1336 1360 1337 1361 /* If cpu is last user of policy, free policy */ 1338 1362 if (has_target()) { 1339 - int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); 1363 + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); 1340 1364 if (ret) 1341 1365 pr_err("%s: Failed to exit governor\n", __func__); 1342 1366 } ··· 1338 1386 cpufreq_driver->exit(policy); 1339 1387 policy->freq_table = NULL; 1340 1388 } 1389 + 1390 + unlock: 1391 + up_write(&policy->rwsem); 1341 1392 } 1342 1393 1343 1394 /** ··· 1356 1401 if (!policy) 1357 1402 return; 1358 1403 1359 - if (cpu_online(cpu)) { 1360 - cpufreq_offline_prepare(cpu); 1361 - cpufreq_offline_finish(cpu); 1362 - } 1404 + if (cpu_online(cpu)) 1405 + cpufreq_offline(cpu); 1363 1406 1364 1407 cpumask_clear_cpu(cpu, policy->real_cpus); 1365 1408 remove_cpu_dev_symlink(policy, cpu); 1366 1409 1367 1410 if (cpumask_empty(policy->real_cpus)) 1368 1411 cpufreq_policy_free(policy, true); 1369 - } 1370 - 1371 - static void handle_update(struct work_struct *work) 1372 - { 1373 - struct cpufreq_policy *policy = 1374 - container_of(work, struct cpufreq_policy, update); 1375 - unsigned int cpu = policy->cpu; 1376 - pr_debug("handle_update for cpu %u called\n", cpu); 1377 - cpufreq_update_policy(cpu); 1378 1412 } 1379 1413 1380 1414 /** ··· 1528 1584 void cpufreq_suspend(void) 1529 1585 { 1530 1586 struct cpufreq_policy *policy; 1587 + int ret; 1531 1588 1532 1589 if (!cpufreq_driver) 1533 1590 return; ··· 1539 1594 pr_debug("%s: Suspending Governors\n", __func__); 1540 1595 1541 1596 for_each_active_policy(policy) { 1542 - if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP)) 1597 + down_write(&policy->rwsem); 1598 + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); 1599 + up_write(&policy->rwsem); 1600 + 1601 + if (ret) 1543 1602 pr_err("%s: Failed to stop governor for policy: %p\n", 1544 1603 __func__, policy); 1545 1604 else if (cpufreq_driver->suspend ··· 1565 1616 void cpufreq_resume(void) 1566 1617 { 1567 1618 struct cpufreq_policy *policy; 1619 + int ret; 1568 1620 1569 1621 if (!cpufreq_driver) 1570 1622 return; ··· 1578 1628 pr_debug("%s: Resuming Governors\n", __func__); 1579 1629 1580 1630 for_each_active_policy(policy) { 1581 - if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) 1631 + if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) { 1582 1632 pr_err("%s: Failed to resume driver: %p\n", __func__, 1583 1633 policy); 1584 - else if (__cpufreq_governor(policy, CPUFREQ_GOV_START) 1585 - || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS)) 1586 - pr_err("%s: Failed to start governor for policy: %p\n", 1587 - __func__, policy); 1634 + } else { 1635 + down_write(&policy->rwsem); 1636 + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); 1637 + if (!ret) 1638 + cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); 1639 + up_write(&policy->rwsem); 1640 + 1641 + if (ret) 1642 + pr_err("%s: Failed to start governor for policy: %p\n", 1643 + __func__, policy); 1644 + } 1588 1645 } 1589 1646 1590 1647 /* ··· 1803 1846 unsigned int relation) 1804 1847 { 1805 1848 unsigned int old_target_freq = target_freq; 1806 - int retval = -EINVAL; 1849 + struct cpufreq_frequency_table *freq_table; 1850 + int index, retval; 1807 1851 1808 1852 if (cpufreq_disabled()) 1809 1853 return -ENODEV; ··· 1831 1873 policy->restore_freq = policy->cur; 1832 1874 1833 1875 if (cpufreq_driver->target) 1834 - retval = cpufreq_driver->target(policy, target_freq, relation); 1835 - else if (cpufreq_driver->target_index) { 1836 - struct cpufreq_frequency_table *freq_table; 1837 - int index; 1876 + return cpufreq_driver->target(policy, target_freq, relation); 1838 1877 1839 - freq_table = cpufreq_frequency_get_table(policy->cpu); 1840 - if (unlikely(!freq_table)) { 1841 - pr_err("%s: Unable to find freq_table\n", __func__); 1842 - goto out; 1843 - } 1878 + if (!cpufreq_driver->target_index) 1879 + return -EINVAL; 1844 1880 1845 - retval = cpufreq_frequency_table_target(policy, freq_table, 1846 - target_freq, relation, &index); 1847 - if (unlikely(retval)) { 1848 - pr_err("%s: Unable to find matching freq\n", __func__); 1849 - goto out; 1850 - } 1851 - 1852 - if (freq_table[index].frequency == policy->cur) { 1853 - retval = 0; 1854 - goto out; 1855 - } 1856 - 1857 - retval = __target_index(policy, freq_table, index); 1881 + freq_table = cpufreq_frequency_get_table(policy->cpu); 1882 + if (unlikely(!freq_table)) { 1883 + pr_err("%s: Unable to find freq_table\n", __func__); 1884 + return -EINVAL; 1858 1885 } 1859 1886 1860 - out: 1861 - return retval; 1887 + retval = cpufreq_frequency_table_target(policy, freq_table, target_freq, 1888 + relation, &index); 1889 + if (unlikely(retval)) { 1890 + pr_err("%s: Unable to find matching freq\n", __func__); 1891 + return retval; 1892 + } 1893 + 1894 + if (freq_table[index].frequency == policy->cur) 1895 + return 0; 1896 + 1897 + return __target_index(policy, freq_table, index); 1862 1898 } 1863 1899 EXPORT_SYMBOL_GPL(__cpufreq_driver_target); 1864 1900 ··· 1872 1920 } 1873 1921 EXPORT_SYMBOL_GPL(cpufreq_driver_target); 1874 1922 1875 - static int __cpufreq_governor(struct cpufreq_policy *policy, 1876 - unsigned int event) 1923 + __weak struct cpufreq_governor *cpufreq_fallback_governor(void) 1924 + { 1925 + return NULL; 1926 + } 1927 + 1928 + static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) 1877 1929 { 1878 1930 int ret; 1879 - 1880 - /* Only must be defined when default governor is known to have latency 1881 - restrictions, like e.g. conservative or ondemand. 1882 - That this is the case is already ensured in Kconfig 1883 - */ 1884 - #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE 1885 - struct cpufreq_governor *gov = &cpufreq_gov_performance; 1886 - #else 1887 - struct cpufreq_governor *gov = NULL; 1888 - #endif 1889 1931 1890 1932 /* Don't start any governor operations if we are entering suspend */ 1891 1933 if (cpufreq_suspended) ··· 1894 1948 if (policy->governor->max_transition_latency && 1895 1949 policy->cpuinfo.transition_latency > 1896 1950 policy->governor->max_transition_latency) { 1897 - if (!gov) 1898 - return -EINVAL; 1899 - else { 1951 + struct cpufreq_governor *gov = cpufreq_fallback_governor(); 1952 + 1953 + if (gov) { 1900 1954 pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", 1901 1955 policy->governor->name, gov->name); 1902 1956 policy->governor = gov; 1957 + } else { 1958 + return -EINVAL; 1903 1959 } 1904 1960 } 1905 1961 ··· 1911 1963 1912 1964 pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); 1913 1965 1914 - mutex_lock(&cpufreq_governor_lock); 1915 - if ((policy->governor_enabled && event == CPUFREQ_GOV_START) 1916 - || (!policy->governor_enabled 1917 - && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) { 1918 - mutex_unlock(&cpufreq_governor_lock); 1919 - return -EBUSY; 1920 - } 1921 - 1922 - if (event == CPUFREQ_GOV_STOP) 1923 - policy->governor_enabled = false; 1924 - else if (event == CPUFREQ_GOV_START) 1925 - policy->governor_enabled = true; 1926 - 1927 - mutex_unlock(&cpufreq_governor_lock); 1928 - 1929 1966 ret = policy->governor->governor(policy, event); 1930 1967 1931 1968 if (!ret) { ··· 1918 1985 policy->governor->initialized++; 1919 1986 else if (event == CPUFREQ_GOV_POLICY_EXIT) 1920 1987 policy->governor->initialized--; 1921 - } else { 1922 - /* Restore original values */ 1923 - mutex_lock(&cpufreq_governor_lock); 1924 - if (event == CPUFREQ_GOV_STOP) 1925 - policy->governor_enabled = true; 1926 - else if (event == CPUFREQ_GOV_START) 1927 - policy->governor_enabled = false; 1928 - mutex_unlock(&cpufreq_governor_lock); 1929 1988 } 1930 1989 1931 1990 if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) || ··· 2072 2147 old_gov = policy->governor; 2073 2148 /* end old governor */ 2074 2149 if (old_gov) { 2075 - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); 2150 + ret = cpufreq_governor(policy, CPUFREQ_GOV_STOP); 2076 2151 if (ret) { 2077 2152 /* This can happen due to race with other operations */ 2078 2153 pr_debug("%s: Failed to Stop Governor: %s (%d)\n", ··· 2080 2155 return ret; 2081 2156 } 2082 2157 2083 - up_write(&policy->rwsem); 2084 - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); 2085 - down_write(&policy->rwsem); 2086 - 2158 + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); 2087 2159 if (ret) { 2088 2160 pr_err("%s: Failed to Exit Governor: %s (%d)\n", 2089 2161 __func__, old_gov->name, ret); ··· 2090 2168 2091 2169 /* start new governor */ 2092 2170 policy->governor = new_policy->governor; 2093 - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); 2171 + ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); 2094 2172 if (!ret) { 2095 - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); 2173 + ret = cpufreq_governor(policy, CPUFREQ_GOV_START); 2096 2174 if (!ret) 2097 2175 goto out; 2098 2176 2099 - up_write(&policy->rwsem); 2100 - __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); 2101 - down_write(&policy->rwsem); 2177 + cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); 2102 2178 } 2103 2179 2104 2180 /* new governor failed, so re-start old one */ 2105 2181 pr_debug("starting governor %s failed\n", policy->governor->name); 2106 2182 if (old_gov) { 2107 2183 policy->governor = old_gov; 2108 - if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) 2184 + if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) 2109 2185 policy->governor = NULL; 2110 2186 else 2111 - __cpufreq_governor(policy, CPUFREQ_GOV_START); 2187 + cpufreq_governor(policy, CPUFREQ_GOV_START); 2112 2188 } 2113 2189 2114 2190 return ret; 2115 2191 2116 2192 out: 2117 2193 pr_debug("governor: change or update limits\n"); 2118 - return __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); 2194 + return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); 2119 2195 } 2120 2196 2121 2197 /** ··· 2180 2260 break; 2181 2261 2182 2262 case CPU_DOWN_PREPARE: 2183 - cpufreq_offline_prepare(cpu); 2184 - break; 2185 - 2186 - case CPU_POST_DEAD: 2187 - cpufreq_offline_finish(cpu); 2263 + cpufreq_offline(cpu); 2188 2264 break; 2189 2265 2190 2266 case CPU_DOWN_FAILED: ··· 2213 2297 __func__); 2214 2298 break; 2215 2299 } 2300 + 2301 + down_write(&policy->rwsem); 2216 2302 policy->user_policy.max = policy->max; 2217 - __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); 2303 + cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); 2304 + up_write(&policy->rwsem); 2218 2305 } 2219 2306 } 2220 2307 ··· 2303 2384 * submitted by the CPU Frequency driver. 2304 2385 * 2305 2386 * Registers a CPU Frequency driver to this core code. This code 2306 - * returns zero on success, -EBUSY when another driver got here first 2387 + * returns zero on success, -EEXIST when another driver got here first 2307 2388 * (and isn't unregistered in the meantime). 2308 2389 * 2309 2390 */

+128 -154

drivers/cpufreq/cpufreq_conservative.c

··· 14 14 #include <linux/slab.h> 15 15 #include "cpufreq_governor.h" 16 16 17 + struct cs_policy_dbs_info { 18 + struct policy_dbs_info policy_dbs; 19 + unsigned int down_skip; 20 + unsigned int requested_freq; 21 + }; 22 + 23 + static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) 24 + { 25 + return container_of(policy_dbs, struct cs_policy_dbs_info, policy_dbs); 26 + } 27 + 28 + struct cs_dbs_tuners { 29 + unsigned int down_threshold; 30 + unsigned int freq_step; 31 + }; 32 + 17 33 /* Conservative governor macros */ 18 34 #define DEF_FREQUENCY_UP_THRESHOLD (80) 19 35 #define DEF_FREQUENCY_DOWN_THRESHOLD (20) 20 36 #define DEF_FREQUENCY_STEP (5) 21 37 #define DEF_SAMPLING_DOWN_FACTOR (1) 22 38 #define MAX_SAMPLING_DOWN_FACTOR (10) 23 - 24 - static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); 25 - 26 - static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, 27 - unsigned int event); 28 - 29 - #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 30 - static 31 - #endif 32 - struct cpufreq_governor cpufreq_gov_conservative = { 33 - .name = "conservative", 34 - .governor = cs_cpufreq_governor_dbs, 35 - .max_transition_latency = TRANSITION_LATENCY_LIMIT, 36 - .owner = THIS_MODULE, 37 - }; 38 39 39 40 static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, 40 41 struct cpufreq_policy *policy) ··· 58 57 * Any frequency increase takes it to the maximum frequency. Frequency reduction 59 58 * happens at minimum steps of 5% (default) of maximum frequency 60 59 */ 61 - static void cs_check_cpu(int cpu, unsigned int load) 60 + static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) 62 61 { 63 - struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); 64 - struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; 65 - struct dbs_data *dbs_data = policy->governor_data; 62 + struct policy_dbs_info *policy_dbs = policy->governor_data; 63 + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); 64 + struct dbs_data *dbs_data = policy_dbs->dbs_data; 66 65 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 66 + unsigned int load = dbs_update(policy); 67 67 68 68 /* 69 69 * break out if we 'cannot' reduce the speed as the user might 70 70 * want freq_step to be zero 71 71 */ 72 72 if (cs_tuners->freq_step == 0) 73 - return; 73 + goto out; 74 74 75 75 /* Check for frequency increase */ 76 - if (load > cs_tuners->up_threshold) { 76 + if (load > dbs_data->up_threshold) { 77 77 dbs_info->down_skip = 0; 78 78 79 79 /* if we are already at full speed then break out early */ 80 80 if (dbs_info->requested_freq == policy->max) 81 - return; 81 + goto out; 82 82 83 83 dbs_info->requested_freq += get_freq_target(cs_tuners, policy); 84 84 ··· 88 86 89 87 __cpufreq_driver_target(policy, dbs_info->requested_freq, 90 88 CPUFREQ_RELATION_H); 91 - return; 89 + goto out; 92 90 } 93 91 94 92 /* if sampling_down_factor is active break out early */ 95 - if (++dbs_info->down_skip < cs_tuners->sampling_down_factor) 96 - return; 93 + if (++dbs_info->down_skip < dbs_data->sampling_down_factor) 94 + goto out; 97 95 dbs_info->down_skip = 0; 98 96 99 97 /* Check for frequency decrease */ ··· 103 101 * if we cannot reduce the frequency anymore, break out early 104 102 */ 105 103 if (policy->cur == policy->min) 106 - return; 104 + goto out; 107 105 108 106 freq_target = get_freq_target(cs_tuners, policy); 109 107 if (dbs_info->requested_freq > freq_target) ··· 113 111 114 112 __cpufreq_driver_target(policy, dbs_info->requested_freq, 115 113 CPUFREQ_RELATION_L); 116 - return; 117 114 } 118 - } 119 115 120 - static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all) 121 - { 122 - struct dbs_data *dbs_data = policy->governor_data; 123 - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 124 - 125 - if (modify_all) 126 - dbs_check_cpu(dbs_data, policy->cpu); 127 - 128 - return delay_for_sampling_rate(cs_tuners->sampling_rate); 116 + out: 117 + return dbs_data->sampling_rate; 129 118 } 130 119 131 120 static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 132 - void *data) 133 - { 134 - struct cpufreq_freqs *freq = data; 135 - struct cs_cpu_dbs_info_s *dbs_info = 136 - &per_cpu(cs_cpu_dbs_info, freq->cpu); 137 - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); 138 - 139 - if (!policy) 140 - return 0; 141 - 142 - /* policy isn't governed by conservative governor */ 143 - if (policy->governor != &cpufreq_gov_conservative) 144 - return 0; 145 - 146 - /* 147 - * we only care if our internally tracked freq moves outside the 'valid' 148 - * ranges of frequency available to us otherwise we do not change it 149 - */ 150 - if (dbs_info->requested_freq > policy->max 151 - || dbs_info->requested_freq < policy->min) 152 - dbs_info->requested_freq = freq->new; 153 - 154 - return 0; 155 - } 121 + void *data); 156 122 157 123 static struct notifier_block cs_cpufreq_notifier_block = { 158 124 .notifier_call = dbs_cpufreq_notifier, 159 125 }; 160 126 161 127 /************************** sysfs interface ************************/ 162 - static struct common_dbs_data cs_dbs_cdata; 128 + static struct dbs_governor cs_dbs_gov; 163 129 164 130 static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, 165 131 const char *buf, size_t count) 166 132 { 167 - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 168 133 unsigned int input; 169 134 int ret; 170 135 ret = sscanf(buf, "%u", &input); ··· 139 170 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 140 171 return -EINVAL; 141 172 142 - cs_tuners->sampling_down_factor = input; 143 - return count; 144 - } 145 - 146 - static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, 147 - size_t count) 148 - { 149 - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 150 - unsigned int input; 151 - int ret; 152 - ret = sscanf(buf, "%u", &input); 153 - 154 - if (ret != 1) 155 - return -EINVAL; 156 - 157 - cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate); 173 + dbs_data->sampling_down_factor = input; 158 174 return count; 159 175 } 160 176 ··· 154 200 if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) 155 201 return -EINVAL; 156 202 157 - cs_tuners->up_threshold = input; 203 + dbs_data->up_threshold = input; 158 204 return count; 159 205 } 160 206 ··· 168 214 169 215 /* cannot be lower than 11 otherwise freq will not fall */ 170 216 if (ret != 1 || input < 11 || input > 100 || 171 - input >= cs_tuners->up_threshold) 217 + input >= dbs_data->up_threshold) 172 218 return -EINVAL; 173 219 174 220 cs_tuners->down_threshold = input; ··· 178 224 static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, 179 225 const char *buf, size_t count) 180 226 { 181 - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 182 - unsigned int input, j; 227 + unsigned int input; 183 228 int ret; 184 229 185 230 ret = sscanf(buf, "%u", &input); ··· 188 235 if (input > 1) 189 236 input = 1; 190 237 191 - if (input == cs_tuners->ignore_nice_load) /* nothing to do */ 238 + if (input == dbs_data->ignore_nice_load) /* nothing to do */ 192 239 return count; 193 240 194 - cs_tuners->ignore_nice_load = input; 241 + dbs_data->ignore_nice_load = input; 195 242 196 243 /* we need to re-evaluate prev_cpu_idle */ 197 - for_each_online_cpu(j) { 198 - struct cs_cpu_dbs_info_s *dbs_info; 199 - dbs_info = &per_cpu(cs_cpu_dbs_info, j); 200 - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, 201 - &dbs_info->cdbs.prev_cpu_wall, 0); 202 - if (cs_tuners->ignore_nice_load) 203 - dbs_info->cdbs.prev_cpu_nice = 204 - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 205 - } 244 + gov_update_cpu_data(dbs_data); 245 + 206 246 return count; 207 247 } 208 248 ··· 221 275 return count; 222 276 } 223 277 224 - show_store_one(cs, sampling_rate); 225 - show_store_one(cs, sampling_down_factor); 226 - show_store_one(cs, up_threshold); 227 - show_store_one(cs, down_threshold); 228 - show_store_one(cs, ignore_nice_load); 229 - show_store_one(cs, freq_step); 230 - declare_show_sampling_rate_min(cs); 278 + gov_show_one_common(sampling_rate); 279 + gov_show_one_common(sampling_down_factor); 280 + gov_show_one_common(up_threshold); 281 + gov_show_one_common(ignore_nice_load); 282 + gov_show_one_common(min_sampling_rate); 283 + gov_show_one(cs, down_threshold); 284 + gov_show_one(cs, freq_step); 231 285 232 - gov_sys_pol_attr_rw(sampling_rate); 233 - gov_sys_pol_attr_rw(sampling_down_factor); 234 - gov_sys_pol_attr_rw(up_threshold); 235 - gov_sys_pol_attr_rw(down_threshold); 236 - gov_sys_pol_attr_rw(ignore_nice_load); 237 - gov_sys_pol_attr_rw(freq_step); 238 - gov_sys_pol_attr_ro(sampling_rate_min); 286 + gov_attr_rw(sampling_rate); 287 + gov_attr_rw(sampling_down_factor); 288 + gov_attr_rw(up_threshold); 289 + gov_attr_rw(ignore_nice_load); 290 + gov_attr_ro(min_sampling_rate); 291 + gov_attr_rw(down_threshold); 292 + gov_attr_rw(freq_step); 239 293 240 - static struct attribute *dbs_attributes_gov_sys[] = { 241 - &sampling_rate_min_gov_sys.attr, 242 - &sampling_rate_gov_sys.attr, 243 - &sampling_down_factor_gov_sys.attr, 244 - &up_threshold_gov_sys.attr, 245 - &down_threshold_gov_sys.attr, 246 - &ignore_nice_load_gov_sys.attr, 247 - &freq_step_gov_sys.attr, 294 + static struct attribute *cs_attributes[] = { 295 + &min_sampling_rate.attr, 296 + &sampling_rate.attr, 297 + &sampling_down_factor.attr, 298 + &up_threshold.attr, 299 + &down_threshold.attr, 300 + &ignore_nice_load.attr, 301 + &freq_step.attr, 248 302 NULL 249 - }; 250 - 251 - static struct attribute_group cs_attr_group_gov_sys = { 252 - .attrs = dbs_attributes_gov_sys, 253 - .name = "conservative", 254 - }; 255 - 256 - static struct attribute *dbs_attributes_gov_pol[] = { 257 - &sampling_rate_min_gov_pol.attr, 258 - &sampling_rate_gov_pol.attr, 259 - &sampling_down_factor_gov_pol.attr, 260 - &up_threshold_gov_pol.attr, 261 - &down_threshold_gov_pol.attr, 262 - &ignore_nice_load_gov_pol.attr, 263 - &freq_step_gov_pol.attr, 264 - NULL 265 - }; 266 - 267 - static struct attribute_group cs_attr_group_gov_pol = { 268 - .attrs = dbs_attributes_gov_pol, 269 - .name = "conservative", 270 303 }; 271 304 272 305 /************************** sysfs end ************************/ 306 + 307 + static struct policy_dbs_info *cs_alloc(void) 308 + { 309 + struct cs_policy_dbs_info *dbs_info; 310 + 311 + dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); 312 + return dbs_info ? &dbs_info->policy_dbs : NULL; 313 + } 314 + 315 + static void cs_free(struct policy_dbs_info *policy_dbs) 316 + { 317 + kfree(to_dbs_info(policy_dbs)); 318 + } 273 319 274 320 static int cs_init(struct dbs_data *dbs_data, bool notify) 275 321 { ··· 273 335 return -ENOMEM; 274 336 } 275 337 276 - tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; 277 338 tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; 278 - tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; 279 - tuners->ignore_nice_load = 0; 280 339 tuners->freq_step = DEF_FREQUENCY_STEP; 340 + dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; 341 + dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; 342 + dbs_data->ignore_nice_load = 0; 281 343 282 344 dbs_data->tuners = tuners; 283 345 dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * ··· 299 361 kfree(dbs_data->tuners); 300 362 } 301 363 302 - define_get_cpu_dbs_routines(cs_cpu_dbs_info); 364 + static void cs_start(struct cpufreq_policy *policy) 365 + { 366 + struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); 303 367 304 - static struct common_dbs_data cs_dbs_cdata = { 305 - .governor = GOV_CONSERVATIVE, 306 - .attr_group_gov_sys = &cs_attr_group_gov_sys, 307 - .attr_group_gov_pol = &cs_attr_group_gov_pol, 308 - .get_cpu_cdbs = get_cpu_cdbs, 309 - .get_cpu_dbs_info_s = get_cpu_dbs_info_s, 368 + dbs_info->down_skip = 0; 369 + dbs_info->requested_freq = policy->cur; 370 + } 371 + 372 + static struct dbs_governor cs_dbs_gov = { 373 + .gov = { 374 + .name = "conservative", 375 + .governor = cpufreq_governor_dbs, 376 + .max_transition_latency = TRANSITION_LATENCY_LIMIT, 377 + .owner = THIS_MODULE, 378 + }, 379 + .kobj_type = { .default_attrs = cs_attributes }, 310 380 .gov_dbs_timer = cs_dbs_timer, 311 - .gov_check_cpu = cs_check_cpu, 381 + .alloc = cs_alloc, 382 + .free = cs_free, 312 383 .init = cs_init, 313 384 .exit = cs_exit, 314 - .mutex = __MUTEX_INITIALIZER(cs_dbs_cdata.mutex), 385 + .start = cs_start, 315 386 }; 316 387 317 - static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, 318 - unsigned int event) 388 + #define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) 389 + 390 + static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 391 + void *data) 319 392 { 320 - return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); 393 + struct cpufreq_freqs *freq = data; 394 + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); 395 + struct cs_policy_dbs_info *dbs_info; 396 + 397 + if (!policy) 398 + return 0; 399 + 400 + /* policy isn't governed by conservative governor */ 401 + if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE) 402 + return 0; 403 + 404 + dbs_info = to_dbs_info(policy->governor_data); 405 + /* 406 + * we only care if our internally tracked freq moves outside the 'valid' 407 + * ranges of frequency available to us otherwise we do not change it 408 + */ 409 + if (dbs_info->requested_freq > policy->max 410 + || dbs_info->requested_freq < policy->min) 411 + dbs_info->requested_freq = freq->new; 412 + 413 + return 0; 321 414 } 322 415 323 416 static int __init cpufreq_gov_dbs_init(void) 324 417 { 325 - return cpufreq_register_governor(&cpufreq_gov_conservative); 418 + return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE); 326 419 } 327 420 328 421 static void __exit cpufreq_gov_dbs_exit(void) 329 422 { 330 - cpufreq_unregister_governor(&cpufreq_gov_conservative); 423 + cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE); 331 424 } 332 425 333 426 MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); ··· 368 399 MODULE_LICENSE("GPL"); 369 400 370 401 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 402 + struct cpufreq_governor *cpufreq_default_governor(void) 403 + { 404 + return CPU_FREQ_GOV_CONSERVATIVE; 405 + } 406 + 371 407 fs_initcall(cpufreq_gov_dbs_init); 372 408 #else 373 409 module_init(cpufreq_gov_dbs_init);

+442 -402

drivers/cpufreq/cpufreq_governor.c

··· 18 18 19 19 #include <linux/export.h> 20 20 #include <linux/kernel_stat.h> 21 + #include <linux/sched.h> 21 22 #include <linux/slab.h> 22 23 23 24 #include "cpufreq_governor.h" 24 25 25 - static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) 26 + static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); 27 + 28 + static DEFINE_MUTEX(gov_dbs_data_mutex); 29 + 30 + /* Common sysfs tunables */ 31 + /** 32 + * store_sampling_rate - update sampling rate effective immediately if needed. 33 + * 34 + * If new rate is smaller than the old, simply updating 35 + * dbs.sampling_rate might not be appropriate. For example, if the 36 + * original sampling_rate was 1 second and the requested new sampling rate is 10 37 + * ms because the user needs immediate reaction from ondemand governor, but not 38 + * sure if higher frequency will be required or not, then, the governor may 39 + * change the sampling rate too late; up to 1 second later. Thus, if we are 40 + * reducing the sampling rate, we need to make the new value effective 41 + * immediately. 42 + * 43 + * This must be called with dbs_data->mutex held, otherwise traversing 44 + * policy_dbs_list isn't safe. 45 + */ 46 + ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, 47 + size_t count) 26 48 { 27 - if (have_governor_per_policy()) 28 - return dbs_data->cdata->attr_group_gov_pol; 29 - else 30 - return dbs_data->cdata->attr_group_gov_sys; 49 + struct policy_dbs_info *policy_dbs; 50 + unsigned int rate; 51 + int ret; 52 + ret = sscanf(buf, "%u", &rate); 53 + if (ret != 1) 54 + return -EINVAL; 55 + 56 + dbs_data->sampling_rate = max(rate, dbs_data->min_sampling_rate); 57 + 58 + /* 59 + * We are operating under dbs_data->mutex and so the list and its 60 + * entries can't be freed concurrently. 61 + */ 62 + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { 63 + mutex_lock(&policy_dbs->timer_mutex); 64 + /* 65 + * On 32-bit architectures this may race with the 66 + * sample_delay_ns read in dbs_update_util_handler(), but that 67 + * really doesn't matter. If the read returns a value that's 68 + * too big, the sample will be skipped, but the next invocation 69 + * of dbs_update_util_handler() (when the update has been 70 + * completed) will take a sample. 71 + * 72 + * If this runs in parallel with dbs_work_handler(), we may end 73 + * up overwriting the sample_delay_ns value that it has just 74 + * written, but it will be corrected next time a sample is 75 + * taken, so it shouldn't be significant. 76 + */ 77 + gov_update_sample_delay(policy_dbs, 0); 78 + mutex_unlock(&policy_dbs->timer_mutex); 79 + } 80 + 81 + return count; 82 + } 83 + EXPORT_SYMBOL_GPL(store_sampling_rate); 84 + 85 + /** 86 + * gov_update_cpu_data - Update CPU load data. 87 + * @dbs_data: Top-level governor data pointer. 88 + * 89 + * Update CPU load data for all CPUs in the domain governed by @dbs_data 90 + * (that may be a single policy or a bunch of them if governor tunables are 91 + * system-wide). 92 + * 93 + * Call under the @dbs_data mutex. 94 + */ 95 + void gov_update_cpu_data(struct dbs_data *dbs_data) 96 + { 97 + struct policy_dbs_info *policy_dbs; 98 + 99 + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { 100 + unsigned int j; 101 + 102 + for_each_cpu(j, policy_dbs->policy->cpus) { 103 + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); 104 + 105 + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, 106 + dbs_data->io_is_busy); 107 + if (dbs_data->ignore_nice_load) 108 + j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 109 + } 110 + } 111 + } 112 + EXPORT_SYMBOL_GPL(gov_update_cpu_data); 113 + 114 + static inline struct dbs_data *to_dbs_data(struct kobject *kobj) 115 + { 116 + return container_of(kobj, struct dbs_data, kobj); 31 117 } 32 118 33 - void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) 119 + static inline struct governor_attr *to_gov_attr(struct attribute *attr) 34 120 { 35 - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 36 - struct od_dbs_tuners *od_tuners = dbs_data->tuners; 37 - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 38 - struct cpufreq_policy *policy = cdbs->shared->policy; 39 - unsigned int sampling_rate; 121 + return container_of(attr, struct governor_attr, attr); 122 + } 123 + 124 + static ssize_t governor_show(struct kobject *kobj, struct attribute *attr, 125 + char *buf) 126 + { 127 + struct dbs_data *dbs_data = to_dbs_data(kobj); 128 + struct governor_attr *gattr = to_gov_attr(attr); 129 + 130 + return gattr->show(dbs_data, buf); 131 + } 132 + 133 + static ssize_t governor_store(struct kobject *kobj, struct attribute *attr, 134 + const char *buf, size_t count) 135 + { 136 + struct dbs_data *dbs_data = to_dbs_data(kobj); 137 + struct governor_attr *gattr = to_gov_attr(attr); 138 + int ret = -EBUSY; 139 + 140 + mutex_lock(&dbs_data->mutex); 141 + 142 + if (dbs_data->usage_count) 143 + ret = gattr->store(dbs_data, buf, count); 144 + 145 + mutex_unlock(&dbs_data->mutex); 146 + 147 + return ret; 148 + } 149 + 150 + /* 151 + * Sysfs Ops for accessing governor attributes. 152 + * 153 + * All show/store invocations for governor specific sysfs attributes, will first 154 + * call the below show/store callbacks and the attribute specific callback will 155 + * be called from within it. 156 + */ 157 + static const struct sysfs_ops governor_sysfs_ops = { 158 + .show = governor_show, 159 + .store = governor_store, 160 + }; 161 + 162 + unsigned int dbs_update(struct cpufreq_policy *policy) 163 + { 164 + struct policy_dbs_info *policy_dbs = policy->governor_data; 165 + struct dbs_data *dbs_data = policy_dbs->dbs_data; 166 + unsigned int ignore_nice = dbs_data->ignore_nice_load; 40 167 unsigned int max_load = 0; 41 - unsigned int ignore_nice; 42 - unsigned int j; 168 + unsigned int sampling_rate, io_busy, j; 43 169 44 - if (dbs_data->cdata->governor == GOV_ONDEMAND) { 45 - struct od_cpu_dbs_info_s *od_dbs_info = 46 - dbs_data->cdata->get_cpu_dbs_info_s(cpu); 47 - 48 - /* 49 - * Sometimes, the ondemand governor uses an additional 50 - * multiplier to give long delays. So apply this multiplier to 51 - * the 'sampling_rate', so as to keep the wake-up-from-idle 52 - * detection logic a bit conservative. 53 - */ 54 - sampling_rate = od_tuners->sampling_rate; 55 - sampling_rate *= od_dbs_info->rate_mult; 56 - 57 - ignore_nice = od_tuners->ignore_nice_load; 58 - } else { 59 - sampling_rate = cs_tuners->sampling_rate; 60 - ignore_nice = cs_tuners->ignore_nice_load; 61 - } 170 + /* 171 + * Sometimes governors may use an additional multiplier to increase 172 + * sample delays temporarily. Apply that multiplier to sampling_rate 173 + * so as to keep the wake-up-from-idle detection logic a bit 174 + * conservative. 175 + */ 176 + sampling_rate = dbs_data->sampling_rate * policy_dbs->rate_mult; 177 + /* 178 + * For the purpose of ondemand, waiting for disk IO is an indication 179 + * that you're performance critical, and not that the system is actually 180 + * idle, so do not add the iowait time to the CPU idle time then. 181 + */ 182 + io_busy = dbs_data->io_is_busy; 62 183 63 184 /* Get Absolute Load */ 64 185 for_each_cpu(j, policy->cpus) { 65 - struct cpu_dbs_info *j_cdbs; 186 + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); 66 187 u64 cur_wall_time, cur_idle_time; 67 188 unsigned int idle_time, wall_time; 68 189 unsigned int load; 69 - int io_busy = 0; 70 190 71 - j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); 72 - 73 - /* 74 - * For the purpose of ondemand, waiting for disk IO is 75 - * an indication that you're performance critical, and 76 - * not that the system is actually idle. So do not add 77 - * the iowait time to the cpu idle time. 78 - */ 79 - if (dbs_data->cdata->governor == GOV_ONDEMAND) 80 - io_busy = od_tuners->io_is_busy; 81 191 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); 82 192 83 - wall_time = (unsigned int) 84 - (cur_wall_time - j_cdbs->prev_cpu_wall); 193 + wall_time = cur_wall_time - j_cdbs->prev_cpu_wall; 85 194 j_cdbs->prev_cpu_wall = cur_wall_time; 86 195 87 - if (cur_idle_time < j_cdbs->prev_cpu_idle) 88 - cur_idle_time = j_cdbs->prev_cpu_idle; 89 - 90 - idle_time = (unsigned int) 91 - (cur_idle_time - j_cdbs->prev_cpu_idle); 92 - j_cdbs->prev_cpu_idle = cur_idle_time; 196 + if (cur_idle_time <= j_cdbs->prev_cpu_idle) { 197 + idle_time = 0; 198 + } else { 199 + idle_time = cur_idle_time - j_cdbs->prev_cpu_idle; 200 + j_cdbs->prev_cpu_idle = cur_idle_time; 201 + } 93 202 94 203 if (ignore_nice) { 95 - u64 cur_nice; 96 - unsigned long cur_nice_jiffies; 204 + u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 97 205 98 - cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 99 - cdbs->prev_cpu_nice; 100 - /* 101 - * Assumption: nice time between sampling periods will 102 - * be less than 2^32 jiffies for 32 bit sys 103 - */ 104 - cur_nice_jiffies = (unsigned long) 105 - cputime64_to_jiffies64(cur_nice); 106 - 107 - cdbs->prev_cpu_nice = 108 - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 109 - idle_time += jiffies_to_usecs(cur_nice_jiffies); 206 + idle_time += cputime_to_usecs(cur_nice - j_cdbs->prev_cpu_nice); 207 + j_cdbs->prev_cpu_nice = cur_nice; 110 208 } 111 209 112 210 if (unlikely(!wall_time || wall_time < idle_time)) ··· 226 128 * dropped down. So we perform the copy only once, upon the 227 129 * first wake-up from idle.) 228 130 * 229 - * Detecting this situation is easy: the governor's deferrable 230 - * timer would not have fired during CPU-idle periods. Hence 231 - * an unusually large 'wall_time' (as compared to the sampling 232 - * rate) indicates this scenario. 131 + * Detecting this situation is easy: the governor's utilization 132 + * update handler would not have run during CPU-idle periods. 133 + * Hence, an unusually large 'wall_time' (as compared to the 134 + * sampling rate) indicates this scenario. 233 135 * 234 136 * prev_load can be zero in two cases and we must recalculate it 235 137 * for both cases: ··· 254 156 if (load > max_load) 255 157 max_load = load; 256 158 } 257 - 258 - dbs_data->cdata->gov_check_cpu(cpu, max_load); 159 + return max_load; 259 160 } 260 - EXPORT_SYMBOL_GPL(dbs_check_cpu); 161 + EXPORT_SYMBOL_GPL(dbs_update); 261 162 262 - void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay) 163 + static void gov_set_update_util(struct policy_dbs_info *policy_dbs, 164 + unsigned int delay_us) 263 165 { 264 - struct dbs_data *dbs_data = policy->governor_data; 265 - struct cpu_dbs_info *cdbs; 166 + struct cpufreq_policy *policy = policy_dbs->policy; 266 167 int cpu; 267 168 169 + gov_update_sample_delay(policy_dbs, delay_us); 170 + policy_dbs->last_sample_time = 0; 171 + 268 172 for_each_cpu(cpu, policy->cpus) { 269 - cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 270 - cdbs->timer.expires = jiffies + delay; 271 - add_timer_on(&cdbs->timer, cpu); 173 + struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu); 174 + 175 + cpufreq_set_update_util_data(cpu, &cdbs->update_util); 272 176 } 273 177 } 274 - EXPORT_SYMBOL_GPL(gov_add_timers); 275 178 276 - static inline void gov_cancel_timers(struct cpufreq_policy *policy) 179 + static inline void gov_clear_update_util(struct cpufreq_policy *policy) 277 180 { 278 - struct dbs_data *dbs_data = policy->governor_data; 279 - struct cpu_dbs_info *cdbs; 280 181 int i; 281 182 282 - for_each_cpu(i, policy->cpus) { 283 - cdbs = dbs_data->cdata->get_cpu_cdbs(i); 284 - del_timer_sync(&cdbs->timer); 285 - } 183 + for_each_cpu(i, policy->cpus) 184 + cpufreq_set_update_util_data(i, NULL); 185 + 186 + synchronize_sched(); 286 187 } 287 188 288 - void gov_cancel_work(struct cpu_common_dbs_info *shared) 189 + static void gov_cancel_work(struct cpufreq_policy *policy) 289 190 { 290 - /* Tell dbs_timer_handler() to skip queuing up work items. */ 291 - atomic_inc(&shared->skip_work); 292 - /* 293 - * If dbs_timer_handler() is already running, it may not notice the 294 - * incremented skip_work, so wait for it to complete to prevent its work 295 - * item from being queued up after the cancel_work_sync() below. 296 - */ 297 - gov_cancel_timers(shared->policy); 298 - /* 299 - * In case dbs_timer_handler() managed to run and spawn a work item 300 - * before the timers have been canceled, wait for that work item to 301 - * complete and then cancel all of the timers set up by it. If 302 - * dbs_timer_handler() runs again at that point, it will see the 303 - * positive value of skip_work and won't spawn any more work items. 304 - */ 305 - cancel_work_sync(&shared->work); 306 - gov_cancel_timers(shared->policy); 307 - atomic_set(&shared->skip_work, 0); 308 - } 309 - EXPORT_SYMBOL_GPL(gov_cancel_work); 191 + struct policy_dbs_info *policy_dbs = policy->governor_data; 310 192 311 - /* Will return if we need to evaluate cpu load again or not */ 312 - static bool need_load_eval(struct cpu_common_dbs_info *shared, 313 - unsigned int sampling_rate) 314 - { 315 - if (policy_is_shared(shared->policy)) { 316 - ktime_t time_now = ktime_get(); 317 - s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); 318 - 319 - /* Do nothing if we recently have sampled */ 320 - if (delta_us < (s64)(sampling_rate / 2)) 321 - return false; 322 - else 323 - shared->time_stamp = time_now; 324 - } 325 - 326 - return true; 193 + gov_clear_update_util(policy_dbs->policy); 194 + irq_work_sync(&policy_dbs->irq_work); 195 + cancel_work_sync(&policy_dbs->work); 196 + atomic_set(&policy_dbs->work_count, 0); 197 + policy_dbs->work_in_progress = false; 327 198 } 328 199 329 200 static void dbs_work_handler(struct work_struct *work) 330 201 { 331 - struct cpu_common_dbs_info *shared = container_of(work, struct 332 - cpu_common_dbs_info, work); 202 + struct policy_dbs_info *policy_dbs; 333 203 struct cpufreq_policy *policy; 204 + struct dbs_governor *gov; 205 + 206 + policy_dbs = container_of(work, struct policy_dbs_info, work); 207 + policy = policy_dbs->policy; 208 + gov = dbs_governor_of(policy); 209 + 210 + /* 211 + * Make sure cpufreq_governor_limits() isn't evaluating load or the 212 + * ondemand governor isn't updating the sampling rate in parallel. 213 + */ 214 + mutex_lock(&policy_dbs->timer_mutex); 215 + gov_update_sample_delay(policy_dbs, gov->gov_dbs_timer(policy)); 216 + mutex_unlock(&policy_dbs->timer_mutex); 217 + 218 + /* Allow the utilization update handler to queue up more work. */ 219 + atomic_set(&policy_dbs->work_count, 0); 220 + /* 221 + * If the update below is reordered with respect to the sample delay 222 + * modification, the utilization update handler may end up using a stale 223 + * sample delay value. 224 + */ 225 + smp_wmb(); 226 + policy_dbs->work_in_progress = false; 227 + } 228 + 229 + static void dbs_irq_work(struct irq_work *irq_work) 230 + { 231 + struct policy_dbs_info *policy_dbs; 232 + 233 + policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work); 234 + schedule_work(&policy_dbs->work); 235 + } 236 + 237 + static void dbs_update_util_handler(struct update_util_data *data, u64 time, 238 + unsigned long util, unsigned long max) 239 + { 240 + struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); 241 + struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; 242 + u64 delta_ns, lst; 243 + 244 + /* 245 + * The work may not be allowed to be queued up right now. 246 + * Possible reasons: 247 + * - Work has already been queued up or is in progress. 248 + * - It is too early (too little time from the previous sample). 249 + */ 250 + if (policy_dbs->work_in_progress) 251 + return; 252 + 253 + /* 254 + * If the reads below are reordered before the check above, the value 255 + * of sample_delay_ns used in the computation may be stale. 256 + */ 257 + smp_rmb(); 258 + lst = READ_ONCE(policy_dbs->last_sample_time); 259 + delta_ns = time - lst; 260 + if ((s64)delta_ns < policy_dbs->sample_delay_ns) 261 + return; 262 + 263 + /* 264 + * If the policy is not shared, the irq_work may be queued up right away 265 + * at this point. Otherwise, we need to ensure that only one of the 266 + * CPUs sharing the policy will do that. 267 + */ 268 + if (policy_dbs->is_shared) { 269 + if (!atomic_add_unless(&policy_dbs->work_count, 1, 1)) 270 + return; 271 + 272 + /* 273 + * If another CPU updated last_sample_time in the meantime, we 274 + * shouldn't be here, so clear the work counter and bail out. 275 + */ 276 + if (unlikely(lst != READ_ONCE(policy_dbs->last_sample_time))) { 277 + atomic_set(&policy_dbs->work_count, 0); 278 + return; 279 + } 280 + } 281 + 282 + policy_dbs->last_sample_time = time; 283 + policy_dbs->work_in_progress = true; 284 + irq_work_queue(&policy_dbs->irq_work); 285 + } 286 + 287 + static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, 288 + struct dbs_governor *gov) 289 + { 290 + struct policy_dbs_info *policy_dbs; 291 + int j; 292 + 293 + /* Allocate memory for per-policy governor data. */ 294 + policy_dbs = gov->alloc(); 295 + if (!policy_dbs) 296 + return NULL; 297 + 298 + policy_dbs->policy = policy; 299 + mutex_init(&policy_dbs->timer_mutex); 300 + atomic_set(&policy_dbs->work_count, 0); 301 + init_irq_work(&policy_dbs->irq_work, dbs_irq_work); 302 + INIT_WORK(&policy_dbs->work, dbs_work_handler); 303 + 304 + /* Set policy_dbs for all CPUs, online+offline */ 305 + for_each_cpu(j, policy->related_cpus) { 306 + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); 307 + 308 + j_cdbs->policy_dbs = policy_dbs; 309 + j_cdbs->update_util.func = dbs_update_util_handler; 310 + } 311 + return policy_dbs; 312 + } 313 + 314 + static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, 315 + struct dbs_governor *gov) 316 + { 317 + int j; 318 + 319 + mutex_destroy(&policy_dbs->timer_mutex); 320 + 321 + for_each_cpu(j, policy_dbs->policy->related_cpus) { 322 + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); 323 + 324 + j_cdbs->policy_dbs = NULL; 325 + j_cdbs->update_util.func = NULL; 326 + } 327 + gov->free(policy_dbs); 328 + } 329 + 330 + static int cpufreq_governor_init(struct cpufreq_policy *policy) 331 + { 332 + struct dbs_governor *gov = dbs_governor_of(policy); 334 333 struct dbs_data *dbs_data; 335 - unsigned int sampling_rate, delay; 336 - bool eval_load; 337 - 338 - policy = shared->policy; 339 - dbs_data = policy->governor_data; 340 - 341 - /* Kill all timers */ 342 - gov_cancel_timers(policy); 343 - 344 - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 345 - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 346 - 347 - sampling_rate = cs_tuners->sampling_rate; 348 - } else { 349 - struct od_dbs_tuners *od_tuners = dbs_data->tuners; 350 - 351 - sampling_rate = od_tuners->sampling_rate; 352 - } 353 - 354 - eval_load = need_load_eval(shared, sampling_rate); 355 - 356 - /* 357 - * Make sure cpufreq_governor_limits() isn't evaluating load in 358 - * parallel. 359 - */ 360 - mutex_lock(&shared->timer_mutex); 361 - delay = dbs_data->cdata->gov_dbs_timer(policy, eval_load); 362 - mutex_unlock(&shared->timer_mutex); 363 - 364 - atomic_dec(&shared->skip_work); 365 - 366 - gov_add_timers(policy, delay); 367 - } 368 - 369 - static void dbs_timer_handler(unsigned long data) 370 - { 371 - struct cpu_dbs_info *cdbs = (struct cpu_dbs_info *)data; 372 - struct cpu_common_dbs_info *shared = cdbs->shared; 373 - 374 - /* 375 - * Timer handler may not be allowed to queue the work at the moment, 376 - * because: 377 - * - Another timer handler has done that 378 - * - We are stopping the governor 379 - * - Or we are updating the sampling rate of the ondemand governor 380 - */ 381 - if (atomic_inc_return(&shared->skip_work) > 1) 382 - atomic_dec(&shared->skip_work); 383 - else 384 - queue_work(system_wq, &shared->work); 385 - } 386 - 387 - static void set_sampling_rate(struct dbs_data *dbs_data, 388 - unsigned int sampling_rate) 389 - { 390 - if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 391 - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 392 - cs_tuners->sampling_rate = sampling_rate; 393 - } else { 394 - struct od_dbs_tuners *od_tuners = dbs_data->tuners; 395 - od_tuners->sampling_rate = sampling_rate; 396 - } 397 - } 398 - 399 - static int alloc_common_dbs_info(struct cpufreq_policy *policy, 400 - struct common_dbs_data *cdata) 401 - { 402 - struct cpu_common_dbs_info *shared; 403 - int j; 404 - 405 - /* Allocate memory for the common information for policy->cpus */ 406 - shared = kzalloc(sizeof(*shared), GFP_KERNEL); 407 - if (!shared) 408 - return -ENOMEM; 409 - 410 - /* Set shared for all CPUs, online+offline */ 411 - for_each_cpu(j, policy->related_cpus) 412 - cdata->get_cpu_cdbs(j)->shared = shared; 413 - 414 - mutex_init(&shared->timer_mutex); 415 - atomic_set(&shared->skip_work, 0); 416 - INIT_WORK(&shared->work, dbs_work_handler); 417 - return 0; 418 - } 419 - 420 - static void free_common_dbs_info(struct cpufreq_policy *policy, 421 - struct common_dbs_data *cdata) 422 - { 423 - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); 424 - struct cpu_common_dbs_info *shared = cdbs->shared; 425 - int j; 426 - 427 - mutex_destroy(&shared->timer_mutex); 428 - 429 - for_each_cpu(j, policy->cpus) 430 - cdata->get_cpu_cdbs(j)->shared = NULL; 431 - 432 - kfree(shared); 433 - } 434 - 435 - static int cpufreq_governor_init(struct cpufreq_policy *policy, 436 - struct dbs_data *dbs_data, 437 - struct common_dbs_data *cdata) 438 - { 334 + struct policy_dbs_info *policy_dbs; 439 335 unsigned int latency; 440 - int ret; 336 + int ret = 0; 441 337 442 338 /* State should be equivalent to EXIT */ 443 339 if (policy->governor_data) 444 340 return -EBUSY; 445 341 342 + policy_dbs = alloc_policy_dbs_info(policy, gov); 343 + if (!policy_dbs) 344 + return -ENOMEM; 345 + 346 + /* Protect gov->gdbs_data against concurrent updates. */ 347 + mutex_lock(&gov_dbs_data_mutex); 348 + 349 + dbs_data = gov->gdbs_data; 446 350 if (dbs_data) { 447 - if (WARN_ON(have_governor_per_policy())) 448 - return -EINVAL; 351 + if (WARN_ON(have_governor_per_policy())) { 352 + ret = -EINVAL; 353 + goto free_policy_dbs_info; 354 + } 355 + policy_dbs->dbs_data = dbs_data; 356 + policy->governor_data = policy_dbs; 449 357 450 - ret = alloc_common_dbs_info(policy, cdata); 451 - if (ret) 452 - return ret; 453 - 358 + mutex_lock(&dbs_data->mutex); 454 359 dbs_data->usage_count++; 455 - policy->governor_data = dbs_data; 456 - return 0; 360 + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); 361 + mutex_unlock(&dbs_data->mutex); 362 + goto out; 457 363 } 458 364 459 365 dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); 460 - if (!dbs_data) 461 - return -ENOMEM; 366 + if (!dbs_data) { 367 + ret = -ENOMEM; 368 + goto free_policy_dbs_info; 369 + } 462 370 463 - ret = alloc_common_dbs_info(policy, cdata); 371 + INIT_LIST_HEAD(&dbs_data->policy_dbs_list); 372 + mutex_init(&dbs_data->mutex); 373 + 374 + ret = gov->init(dbs_data, !policy->governor->initialized); 464 375 if (ret) 465 - goto free_dbs_data; 466 - 467 - dbs_data->cdata = cdata; 468 - dbs_data->usage_count = 1; 469 - 470 - ret = cdata->init(dbs_data, !policy->governor->initialized); 471 - if (ret) 472 - goto free_common_dbs_info; 376 + goto free_policy_dbs_info; 473 377 474 378 /* policy latency is in ns. Convert it to us first */ 475 379 latency = policy->cpuinfo.transition_latency / 1000; ··· 481 381 /* Bring kernel and HW constraints together */ 482 382 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, 483 383 MIN_LATENCY_MULTIPLIER * latency); 484 - set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, 485 - latency * LATENCY_MULTIPLIER)); 384 + dbs_data->sampling_rate = max(dbs_data->min_sampling_rate, 385 + LATENCY_MULTIPLIER * latency); 486 386 487 387 if (!have_governor_per_policy()) 488 - cdata->gdbs_data = dbs_data; 388 + gov->gdbs_data = dbs_data; 489 389 490 - policy->governor_data = dbs_data; 390 + policy->governor_data = policy_dbs; 491 391 492 - ret = sysfs_create_group(get_governor_parent_kobj(policy), 493 - get_sysfs_attr(dbs_data)); 494 - if (ret) 495 - goto reset_gdbs_data; 392 + policy_dbs->dbs_data = dbs_data; 393 + dbs_data->usage_count = 1; 394 + list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); 496 395 497 - return 0; 396 + gov->kobj_type.sysfs_ops = &governor_sysfs_ops; 397 + ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, 398 + get_governor_parent_kobj(policy), 399 + "%s", gov->gov.name); 400 + if (!ret) 401 + goto out; 498 402 499 - reset_gdbs_data: 403 + /* Failure, so roll back. */ 404 + pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret); 405 + 500 406 policy->governor_data = NULL; 501 407 502 408 if (!have_governor_per_policy()) 503 - cdata->gdbs_data = NULL; 504 - cdata->exit(dbs_data, !policy->governor->initialized); 505 - free_common_dbs_info: 506 - free_common_dbs_info(policy, cdata); 507 - free_dbs_data: 409 + gov->gdbs_data = NULL; 410 + gov->exit(dbs_data, !policy->governor->initialized); 508 411 kfree(dbs_data); 412 + 413 + free_policy_dbs_info: 414 + free_policy_dbs_info(policy_dbs, gov); 415 + 416 + out: 417 + mutex_unlock(&gov_dbs_data_mutex); 509 418 return ret; 510 419 } 511 420 512 - static int cpufreq_governor_exit(struct cpufreq_policy *policy, 513 - struct dbs_data *dbs_data) 421 + static int cpufreq_governor_exit(struct cpufreq_policy *policy) 514 422 { 515 - struct common_dbs_data *cdata = dbs_data->cdata; 516 - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); 423 + struct dbs_governor *gov = dbs_governor_of(policy); 424 + struct policy_dbs_info *policy_dbs = policy->governor_data; 425 + struct dbs_data *dbs_data = policy_dbs->dbs_data; 426 + int count; 517 427 518 - /* State should be equivalent to INIT */ 519 - if (!cdbs->shared || cdbs->shared->policy) 520 - return -EBUSY; 428 + /* Protect gov->gdbs_data against concurrent updates. */ 429 + mutex_lock(&gov_dbs_data_mutex); 521 430 522 - if (!--dbs_data->usage_count) { 523 - sysfs_remove_group(get_governor_parent_kobj(policy), 524 - get_sysfs_attr(dbs_data)); 431 + mutex_lock(&dbs_data->mutex); 432 + list_del(&policy_dbs->list); 433 + count = --dbs_data->usage_count; 434 + mutex_unlock(&dbs_data->mutex); 435 + 436 + if (!count) { 437 + kobject_put(&dbs_data->kobj); 525 438 526 439 policy->governor_data = NULL; 527 440 528 441 if (!have_governor_per_policy()) 529 - cdata->gdbs_data = NULL; 442 + gov->gdbs_data = NULL; 530 443 531 - cdata->exit(dbs_data, policy->governor->initialized == 1); 444 + gov->exit(dbs_data, policy->governor->initialized == 1); 445 + mutex_destroy(&dbs_data->mutex); 532 446 kfree(dbs_data); 533 447 } else { 534 448 policy->governor_data = NULL; 535 449 } 536 450 537 - free_common_dbs_info(policy, cdata); 451 + free_policy_dbs_info(policy_dbs, gov); 452 + 453 + mutex_unlock(&gov_dbs_data_mutex); 538 454 return 0; 539 455 } 540 456 541 - static int cpufreq_governor_start(struct cpufreq_policy *policy, 542 - struct dbs_data *dbs_data) 457 + static int cpufreq_governor_start(struct cpufreq_policy *policy) 543 458 { 544 - struct common_dbs_data *cdata = dbs_data->cdata; 545 - unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; 546 - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 547 - struct cpu_common_dbs_info *shared = cdbs->shared; 548 - int io_busy = 0; 459 + struct dbs_governor *gov = dbs_governor_of(policy); 460 + struct policy_dbs_info *policy_dbs = policy->governor_data; 461 + struct dbs_data *dbs_data = policy_dbs->dbs_data; 462 + unsigned int sampling_rate, ignore_nice, j; 463 + unsigned int io_busy; 549 464 550 465 if (!policy->cur) 551 466 return -EINVAL; 552 467 553 - /* State should be equivalent to INIT */ 554 - if (!shared || shared->policy) 555 - return -EBUSY; 468 + policy_dbs->is_shared = policy_is_shared(policy); 469 + policy_dbs->rate_mult = 1; 556 470 557 - if (cdata->governor == GOV_CONSERVATIVE) { 558 - struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 559 - 560 - sampling_rate = cs_tuners->sampling_rate; 561 - ignore_nice = cs_tuners->ignore_nice_load; 562 - } else { 563 - struct od_dbs_tuners *od_tuners = dbs_data->tuners; 564 - 565 - sampling_rate = od_tuners->sampling_rate; 566 - ignore_nice = od_tuners->ignore_nice_load; 567 - io_busy = od_tuners->io_is_busy; 568 - } 569 - 570 - shared->policy = policy; 571 - shared->time_stamp = ktime_get(); 471 + sampling_rate = dbs_data->sampling_rate; 472 + ignore_nice = dbs_data->ignore_nice_load; 473 + io_busy = dbs_data->io_is_busy; 572 474 573 475 for_each_cpu(j, policy->cpus) { 574 - struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); 476 + struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); 575 477 unsigned int prev_load; 576 478 577 - j_cdbs->prev_cpu_idle = 578 - get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); 479 + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); 579 480 580 - prev_load = (unsigned int)(j_cdbs->prev_cpu_wall - 581 - j_cdbs->prev_cpu_idle); 582 - j_cdbs->prev_load = 100 * prev_load / 583 - (unsigned int)j_cdbs->prev_cpu_wall; 481 + prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle; 482 + j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall; 584 483 585 484 if (ignore_nice) 586 485 j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 587 - 588 - __setup_timer(&j_cdbs->timer, dbs_timer_handler, 589 - (unsigned long)j_cdbs, 590 - TIMER_DEFERRABLE | TIMER_IRQSAFE); 591 486 } 592 487 593 - if (cdata->governor == GOV_CONSERVATIVE) { 594 - struct cs_cpu_dbs_info_s *cs_dbs_info = 595 - cdata->get_cpu_dbs_info_s(cpu); 488 + gov->start(policy); 596 489 597 - cs_dbs_info->down_skip = 0; 598 - cs_dbs_info->requested_freq = policy->cur; 599 - } else { 600 - struct od_ops *od_ops = cdata->gov_ops; 601 - struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu); 602 - 603 - od_dbs_info->rate_mult = 1; 604 - od_dbs_info->sample_type = OD_NORMAL_SAMPLE; 605 - od_ops->powersave_bias_init_cpu(cpu); 606 - } 607 - 608 - gov_add_timers(policy, delay_for_sampling_rate(sampling_rate)); 490 + gov_set_update_util(policy_dbs, sampling_rate); 609 491 return 0; 610 492 } 611 493 612 - static int cpufreq_governor_stop(struct cpufreq_policy *policy, 613 - struct dbs_data *dbs_data) 494 + static int cpufreq_governor_stop(struct cpufreq_policy *policy) 614 495 { 615 - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); 616 - struct cpu_common_dbs_info *shared = cdbs->shared; 496 + gov_cancel_work(policy); 497 + return 0; 498 + } 617 499 618 - /* State should be equivalent to START */ 619 - if (!shared || !shared->policy) 620 - return -EBUSY; 500 + static int cpufreq_governor_limits(struct cpufreq_policy *policy) 501 + { 502 + struct policy_dbs_info *policy_dbs = policy->governor_data; 621 503 622 - gov_cancel_work(shared); 623 - shared->policy = NULL; 504 + mutex_lock(&policy_dbs->timer_mutex); 505 + 506 + if (policy->max < policy->cur) 507 + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); 508 + else if (policy->min > policy->cur) 509 + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); 510 + 511 + gov_update_sample_delay(policy_dbs, 0); 512 + 513 + mutex_unlock(&policy_dbs->timer_mutex); 624 514 625 515 return 0; 626 516 } 627 517 628 - static int cpufreq_governor_limits(struct cpufreq_policy *policy, 629 - struct dbs_data *dbs_data) 518 + int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) 630 519 { 631 - struct common_dbs_data *cdata = dbs_data->cdata; 632 - unsigned int cpu = policy->cpu; 633 - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 634 - 635 - /* State should be equivalent to START */ 636 - if (!cdbs->shared || !cdbs->shared->policy) 637 - return -EBUSY; 638 - 639 - mutex_lock(&cdbs->shared->timer_mutex); 640 - if (policy->max < cdbs->shared->policy->cur) 641 - __cpufreq_driver_target(cdbs->shared->policy, policy->max, 642 - CPUFREQ_RELATION_H); 643 - else if (policy->min > cdbs->shared->policy->cur) 644 - __cpufreq_driver_target(cdbs->shared->policy, policy->min, 645 - CPUFREQ_RELATION_L); 646 - dbs_check_cpu(dbs_data, cpu); 647 - mutex_unlock(&cdbs->shared->timer_mutex); 648 - 649 - return 0; 650 - } 651 - 652 - int cpufreq_governor_dbs(struct cpufreq_policy *policy, 653 - struct common_dbs_data *cdata, unsigned int event) 654 - { 655 - struct dbs_data *dbs_data; 656 - int ret; 657 - 658 - /* Lock governor to block concurrent initialization of governor */ 659 - mutex_lock(&cdata->mutex); 660 - 661 - if (have_governor_per_policy()) 662 - dbs_data = policy->governor_data; 663 - else 664 - dbs_data = cdata->gdbs_data; 665 - 666 - if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { 667 - ret = -EINVAL; 668 - goto unlock; 520 + if (event == CPUFREQ_GOV_POLICY_INIT) { 521 + return cpufreq_governor_init(policy); 522 + } else if (policy->governor_data) { 523 + switch (event) { 524 + case CPUFREQ_GOV_POLICY_EXIT: 525 + return cpufreq_governor_exit(policy); 526 + case CPUFREQ_GOV_START: 527 + return cpufreq_governor_start(policy); 528 + case CPUFREQ_GOV_STOP: 529 + return cpufreq_governor_stop(policy); 530 + case CPUFREQ_GOV_LIMITS: 531 + return cpufreq_governor_limits(policy); 532 + } 669 533 } 670 - 671 - switch (event) { 672 - case CPUFREQ_GOV_POLICY_INIT: 673 - ret = cpufreq_governor_init(policy, dbs_data, cdata); 674 - break; 675 - case CPUFREQ_GOV_POLICY_EXIT: 676 - ret = cpufreq_governor_exit(policy, dbs_data); 677 - break; 678 - case CPUFREQ_GOV_START: 679 - ret = cpufreq_governor_start(policy, dbs_data); 680 - break; 681 - case CPUFREQ_GOV_STOP: 682 - ret = cpufreq_governor_stop(policy, dbs_data); 683 - break; 684 - case CPUFREQ_GOV_LIMITS: 685 - ret = cpufreq_governor_limits(policy, dbs_data); 686 - break; 687 - default: 688 - ret = -EINVAL; 689 - } 690 - 691 - unlock: 692 - mutex_unlock(&cdata->mutex); 693 - 694 - return ret; 534 + return -EINVAL; 695 535 } 696 536 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);

+91 -184

drivers/cpufreq/cpufreq_governor.h

··· 18 18 #define _CPUFREQ_GOVERNOR_H 19 19 20 20 #include <linux/atomic.h> 21 + #include <linux/irq_work.h> 21 22 #include <linux/cpufreq.h> 22 23 #include <linux/kernel_stat.h> 23 24 #include <linux/module.h> ··· 42 41 enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; 43 42 44 43 /* 45 - * Macro for creating governors sysfs routines 46 - * 47 - * - gov_sys: One governor instance per whole system 48 - * - gov_pol: One governor instance per policy 49 - */ 50 - 51 - /* Create attributes */ 52 - #define gov_sys_attr_ro(_name) \ 53 - static struct global_attr _name##_gov_sys = \ 54 - __ATTR(_name, 0444, show_##_name##_gov_sys, NULL) 55 - 56 - #define gov_sys_attr_rw(_name) \ 57 - static struct global_attr _name##_gov_sys = \ 58 - __ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) 59 - 60 - #define gov_pol_attr_ro(_name) \ 61 - static struct freq_attr _name##_gov_pol = \ 62 - __ATTR(_name, 0444, show_##_name##_gov_pol, NULL) 63 - 64 - #define gov_pol_attr_rw(_name) \ 65 - static struct freq_attr _name##_gov_pol = \ 66 - __ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) 67 - 68 - #define gov_sys_pol_attr_rw(_name) \ 69 - gov_sys_attr_rw(_name); \ 70 - gov_pol_attr_rw(_name) 71 - 72 - #define gov_sys_pol_attr_ro(_name) \ 73 - gov_sys_attr_ro(_name); \ 74 - gov_pol_attr_ro(_name) 75 - 76 - /* Create show/store routines */ 77 - #define show_one(_gov, file_name) \ 78 - static ssize_t show_##file_name##_gov_sys \ 79 - (struct kobject *kobj, struct attribute *attr, char *buf) \ 80 - { \ 81 - struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \ 82 - return sprintf(buf, "%u\n", tuners->file_name); \ 83 - } \ 84 - \ 85 - static ssize_t show_##file_name##_gov_pol \ 86 - (struct cpufreq_policy *policy, char *buf) \ 87 - { \ 88 - struct dbs_data *dbs_data = policy->governor_data; \ 89 - struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ 90 - return sprintf(buf, "%u\n", tuners->file_name); \ 91 - } 92 - 93 - #define store_one(_gov, file_name) \ 94 - static ssize_t store_##file_name##_gov_sys \ 95 - (struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ 96 - { \ 97 - struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ 98 - return store_##file_name(dbs_data, buf, count); \ 99 - } \ 100 - \ 101 - static ssize_t store_##file_name##_gov_pol \ 102 - (struct cpufreq_policy *policy, const char *buf, size_t count) \ 103 - { \ 104 - struct dbs_data *dbs_data = policy->governor_data; \ 105 - return store_##file_name(dbs_data, buf, count); \ 106 - } 107 - 108 - #define show_store_one(_gov, file_name) \ 109 - show_one(_gov, file_name); \ 110 - store_one(_gov, file_name) 111 - 112 - /* create helper routines */ 113 - #define define_get_cpu_dbs_routines(_dbs_info) \ 114 - static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ 115 - { \ 116 - return &per_cpu(_dbs_info, cpu).cdbs; \ 117 - } \ 118 - \ 119 - static void *get_cpu_dbs_info_s(int cpu) \ 120 - { \ 121 - return &per_cpu(_dbs_info, cpu); \ 122 - } 123 - 124 - /* 125 44 * Abbreviations: 126 45 * dbs: used as a shortform for demand based switching It helps to keep variable 127 46 * names smaller, simpler ··· 50 129 * cs_*: Conservative governor 51 130 */ 52 131 132 + /* Governor demand based switching data (per-policy or global). */ 133 + struct dbs_data { 134 + int usage_count; 135 + void *tuners; 136 + unsigned int min_sampling_rate; 137 + unsigned int ignore_nice_load; 138 + unsigned int sampling_rate; 139 + unsigned int sampling_down_factor; 140 + unsigned int up_threshold; 141 + unsigned int io_is_busy; 142 + 143 + struct kobject kobj; 144 + struct list_head policy_dbs_list; 145 + /* 146 + * Protect concurrent updates to governor tunables from sysfs, 147 + * policy_dbs_list and usage_count. 148 + */ 149 + struct mutex mutex; 150 + }; 151 + 152 + /* Governor's specific attributes */ 153 + struct dbs_data; 154 + struct governor_attr { 155 + struct attribute attr; 156 + ssize_t (*show)(struct dbs_data *dbs_data, char *buf); 157 + ssize_t (*store)(struct dbs_data *dbs_data, const char *buf, 158 + size_t count); 159 + }; 160 + 161 + #define gov_show_one(_gov, file_name) \ 162 + static ssize_t show_##file_name \ 163 + (struct dbs_data *dbs_data, char *buf) \ 164 + { \ 165 + struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ 166 + return sprintf(buf, "%u\n", tuners->file_name); \ 167 + } 168 + 169 + #define gov_show_one_common(file_name) \ 170 + static ssize_t show_##file_name \ 171 + (struct dbs_data *dbs_data, char *buf) \ 172 + { \ 173 + return sprintf(buf, "%u\n", dbs_data->file_name); \ 174 + } 175 + 176 + #define gov_attr_ro(_name) \ 177 + static struct governor_attr _name = \ 178 + __ATTR(_name, 0444, show_##_name, NULL) 179 + 180 + #define gov_attr_rw(_name) \ 181 + static struct governor_attr _name = \ 182 + __ATTR(_name, 0644, show_##_name, store_##_name) 183 + 53 184 /* Common to all CPUs of a policy */ 54 - struct cpu_common_dbs_info { 185 + struct policy_dbs_info { 55 186 struct cpufreq_policy *policy; 56 187 /* 57 188 * Per policy mutex that serializes load evaluation from limit-change ··· 111 138 */ 112 139 struct mutex timer_mutex; 113 140 114 - ktime_t time_stamp; 115 - atomic_t skip_work; 141 + u64 last_sample_time; 142 + s64 sample_delay_ns; 143 + atomic_t work_count; 144 + struct irq_work irq_work; 116 145 struct work_struct work; 146 + /* dbs_data may be shared between multiple policy objects */ 147 + struct dbs_data *dbs_data; 148 + struct list_head list; 149 + /* Multiplier for increasing sample delay temporarily. */ 150 + unsigned int rate_mult; 151 + /* Status indicators */ 152 + bool is_shared; /* This object is used by multiple CPUs */ 153 + bool work_in_progress; /* Work is being queued up or in progress */ 117 154 }; 155 + 156 + static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, 157 + unsigned int delay_us) 158 + { 159 + policy_dbs->sample_delay_ns = delay_us * NSEC_PER_USEC; 160 + } 118 161 119 162 /* Per cpu structures */ 120 163 struct cpu_dbs_info { ··· 144 155 * wake-up from idle. 145 156 */ 146 157 unsigned int prev_load; 147 - struct timer_list timer; 148 - struct cpu_common_dbs_info *shared; 149 - }; 150 - 151 - struct od_cpu_dbs_info_s { 152 - struct cpu_dbs_info cdbs; 153 - struct cpufreq_frequency_table *freq_table; 154 - unsigned int freq_lo; 155 - unsigned int freq_lo_jiffies; 156 - unsigned int freq_hi_jiffies; 157 - unsigned int rate_mult; 158 - unsigned int sample_type:1; 159 - }; 160 - 161 - struct cs_cpu_dbs_info_s { 162 - struct cpu_dbs_info cdbs; 163 - unsigned int down_skip; 164 - unsigned int requested_freq; 165 - }; 166 - 167 - /* Per policy Governors sysfs tunables */ 168 - struct od_dbs_tuners { 169 - unsigned int ignore_nice_load; 170 - unsigned int sampling_rate; 171 - unsigned int sampling_down_factor; 172 - unsigned int up_threshold; 173 - unsigned int powersave_bias; 174 - unsigned int io_is_busy; 175 - }; 176 - 177 - struct cs_dbs_tuners { 178 - unsigned int ignore_nice_load; 179 - unsigned int sampling_rate; 180 - unsigned int sampling_down_factor; 181 - unsigned int up_threshold; 182 - unsigned int down_threshold; 183 - unsigned int freq_step; 158 + struct update_util_data update_util; 159 + struct policy_dbs_info *policy_dbs; 184 160 }; 185 161 186 162 /* Common Governor data across policies */ 187 - struct dbs_data; 188 - struct common_dbs_data { 189 - /* Common across governors */ 190 - #define GOV_ONDEMAND 0 191 - #define GOV_CONSERVATIVE 1 192 - int governor; 193 - struct attribute_group *attr_group_gov_sys; /* one governor - system */ 194 - struct attribute_group *attr_group_gov_pol; /* one governor - policy */ 163 + struct dbs_governor { 164 + struct cpufreq_governor gov; 165 + struct kobj_type kobj_type; 195 166 196 167 /* 197 168 * Common data for platforms that don't set ··· 159 210 */ 160 211 struct dbs_data *gdbs_data; 161 212 162 - struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); 163 - void *(*get_cpu_dbs_info_s)(int cpu); 164 - unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy, 165 - bool modify_all); 166 - void (*gov_check_cpu)(int cpu, unsigned int load); 213 + unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); 214 + struct policy_dbs_info *(*alloc)(void); 215 + void (*free)(struct policy_dbs_info *policy_dbs); 167 216 int (*init)(struct dbs_data *dbs_data, bool notify); 168 217 void (*exit)(struct dbs_data *dbs_data, bool notify); 169 - 170 - /* Governor specific ops, see below */ 171 - void *gov_ops; 172 - 173 - /* 174 - * Protects governor's data (struct dbs_data and struct common_dbs_data) 175 - */ 176 - struct mutex mutex; 218 + void (*start)(struct cpufreq_policy *policy); 177 219 }; 178 220 179 - /* Governor Per policy data */ 180 - struct dbs_data { 181 - struct common_dbs_data *cdata; 182 - unsigned int min_sampling_rate; 183 - int usage_count; 184 - void *tuners; 185 - }; 221 + static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy) 222 + { 223 + return container_of(policy->governor, struct dbs_governor, gov); 224 + } 186 225 187 - /* Governor specific ops, will be passed to dbs_data->gov_ops */ 226 + /* Governor specific operations */ 188 227 struct od_ops { 189 - void (*powersave_bias_init_cpu)(int cpu); 190 228 unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, 191 229 unsigned int freq_next, unsigned int relation); 192 - void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq); 193 230 }; 194 231 195 - static inline int delay_for_sampling_rate(unsigned int sampling_rate) 196 - { 197 - int delay = usecs_to_jiffies(sampling_rate); 198 - 199 - /* We want all CPUs to do sampling nearly on same jiffy */ 200 - if (num_online_cpus() > 1) 201 - delay -= jiffies % delay; 202 - 203 - return delay; 204 - } 205 - 206 - #define declare_show_sampling_rate_min(_gov) \ 207 - static ssize_t show_sampling_rate_min_gov_sys \ 208 - (struct kobject *kobj, struct attribute *attr, char *buf) \ 209 - { \ 210 - struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ 211 - return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ 212 - } \ 213 - \ 214 - static ssize_t show_sampling_rate_min_gov_pol \ 215 - (struct cpufreq_policy *policy, char *buf) \ 216 - { \ 217 - struct dbs_data *dbs_data = policy->governor_data; \ 218 - return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ 219 - } 220 - 221 - extern struct mutex cpufreq_governor_lock; 222 - 223 - void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay); 224 - void gov_cancel_work(struct cpu_common_dbs_info *shared); 225 - void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); 226 - int cpufreq_governor_dbs(struct cpufreq_policy *policy, 227 - struct common_dbs_data *cdata, unsigned int event); 232 + unsigned int dbs_update(struct cpufreq_policy *policy); 233 + int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); 228 234 void od_register_powersave_bias_handler(unsigned int (*f) 229 235 (struct cpufreq_policy *, unsigned int, unsigned int), 230 236 unsigned int powersave_bias); 231 237 void od_unregister_powersave_bias_handler(void); 238 + ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, 239 + size_t count); 240 + void gov_update_cpu_data(struct dbs_data *dbs_data); 232 241 #endif /* _CPUFREQ_GOVERNOR_H */

+157 -290

drivers/cpufreq/cpufreq_ondemand.c

··· 16 16 #include <linux/percpu-defs.h> 17 17 #include <linux/slab.h> 18 18 #include <linux/tick.h> 19 - #include "cpufreq_governor.h" 19 + 20 + #include "cpufreq_ondemand.h" 20 21 21 22 /* On-demand governor macros */ 22 23 #define DEF_FREQUENCY_UP_THRESHOLD (80) ··· 28 27 #define MIN_FREQUENCY_UP_THRESHOLD (11) 29 28 #define MAX_FREQUENCY_UP_THRESHOLD (100) 30 29 31 - static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); 32 - 33 30 static struct od_ops od_ops; 34 31 35 - #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 36 - static struct cpufreq_governor cpufreq_gov_ondemand; 37 - #endif 38 - 39 32 static unsigned int default_powersave_bias; 40 - 41 - static void ondemand_powersave_bias_init_cpu(int cpu) 42 - { 43 - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 44 - 45 - dbs_info->freq_table = cpufreq_frequency_get_table(cpu); 46 - dbs_info->freq_lo = 0; 47 - } 48 33 49 34 /* 50 35 * Not all CPUs want IO time to be accounted as busy; this depends on how ··· 57 70 58 71 /* 59 72 * Find right freq to be set now with powersave_bias on. 60 - * Returns the freq_hi to be used right now and will set freq_hi_jiffies, 61 - * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. 73 + * Returns the freq_hi to be used right now and will set freq_hi_delay_us, 74 + * freq_lo, and freq_lo_delay_us in percpu area for averaging freqs. 62 75 */ 63 76 static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, 64 77 unsigned int freq_next, unsigned int relation) ··· 66 79 unsigned int freq_req, freq_reduc, freq_avg; 67 80 unsigned int freq_hi, freq_lo; 68 81 unsigned int index = 0; 69 - unsigned int jiffies_total, jiffies_hi, jiffies_lo; 70 - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 71 - policy->cpu); 72 - struct dbs_data *dbs_data = policy->governor_data; 82 + unsigned int delay_hi_us; 83 + struct policy_dbs_info *policy_dbs = policy->governor_data; 84 + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); 85 + struct dbs_data *dbs_data = policy_dbs->dbs_data; 73 86 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 74 87 75 88 if (!dbs_info->freq_table) { 76 89 dbs_info->freq_lo = 0; 77 - dbs_info->freq_lo_jiffies = 0; 90 + dbs_info->freq_lo_delay_us = 0; 78 91 return freq_next; 79 92 } 80 93 ··· 97 110 /* Find out how long we have to be in hi and lo freqs */ 98 111 if (freq_hi == freq_lo) { 99 112 dbs_info->freq_lo = 0; 100 - dbs_info->freq_lo_jiffies = 0; 113 + dbs_info->freq_lo_delay_us = 0; 101 114 return freq_lo; 102 115 } 103 - jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate); 104 - jiffies_hi = (freq_avg - freq_lo) * jiffies_total; 105 - jiffies_hi += ((freq_hi - freq_lo) / 2); 106 - jiffies_hi /= (freq_hi - freq_lo); 107 - jiffies_lo = jiffies_total - jiffies_hi; 116 + delay_hi_us = (freq_avg - freq_lo) * dbs_data->sampling_rate; 117 + delay_hi_us += (freq_hi - freq_lo) / 2; 118 + delay_hi_us /= freq_hi - freq_lo; 119 + dbs_info->freq_hi_delay_us = delay_hi_us; 108 120 dbs_info->freq_lo = freq_lo; 109 - dbs_info->freq_lo_jiffies = jiffies_lo; 110 - dbs_info->freq_hi_jiffies = jiffies_hi; 121 + dbs_info->freq_lo_delay_us = dbs_data->sampling_rate - delay_hi_us; 111 122 return freq_hi; 112 123 } 113 124 114 - static void ondemand_powersave_bias_init(void) 125 + static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) 115 126 { 116 - int i; 117 - for_each_online_cpu(i) { 118 - ondemand_powersave_bias_init_cpu(i); 119 - } 127 + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); 128 + 129 + dbs_info->freq_table = cpufreq_frequency_get_table(policy->cpu); 130 + dbs_info->freq_lo = 0; 120 131 } 121 132 122 133 static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) 123 134 { 124 - struct dbs_data *dbs_data = policy->governor_data; 135 + struct policy_dbs_info *policy_dbs = policy->governor_data; 136 + struct dbs_data *dbs_data = policy_dbs->dbs_data; 125 137 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 126 138 127 139 if (od_tuners->powersave_bias) ··· 138 152 * (default), then we try to increase frequency. Else, we adjust the frequency 139 153 * proportional to load. 140 154 */ 141 - static void od_check_cpu(int cpu, unsigned int load) 155 + static void od_update(struct cpufreq_policy *policy) 142 156 { 143 - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 144 - struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; 145 - struct dbs_data *dbs_data = policy->governor_data; 157 + struct policy_dbs_info *policy_dbs = policy->governor_data; 158 + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); 159 + struct dbs_data *dbs_data = policy_dbs->dbs_data; 146 160 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 161 + unsigned int load = dbs_update(policy); 147 162 148 163 dbs_info->freq_lo = 0; 149 164 150 165 /* Check for frequency increase */ 151 - if (load > od_tuners->up_threshold) { 166 + if (load > dbs_data->up_threshold) { 152 167 /* If switching to max speed, apply sampling_down_factor */ 153 168 if (policy->cur < policy->max) 154 - dbs_info->rate_mult = 155 - od_tuners->sampling_down_factor; 169 + policy_dbs->rate_mult = dbs_data->sampling_down_factor; 156 170 dbs_freq_increase(policy, policy->max); 157 171 } else { 158 172 /* Calculate the next frequency proportional to load */ ··· 163 177 freq_next = min_f + load * (max_f - min_f) / 100; 164 178 165 179 /* No longer fully busy, reset rate_mult */ 166 - dbs_info->rate_mult = 1; 180 + policy_dbs->rate_mult = 1; 167 181 168 - if (!od_tuners->powersave_bias) { 169 - __cpufreq_driver_target(policy, freq_next, 170 - CPUFREQ_RELATION_C); 171 - return; 172 - } 182 + if (od_tuners->powersave_bias) 183 + freq_next = od_ops.powersave_bias_target(policy, 184 + freq_next, 185 + CPUFREQ_RELATION_L); 173 186 174 - freq_next = od_ops.powersave_bias_target(policy, freq_next, 175 - CPUFREQ_RELATION_L); 176 187 __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C); 177 188 } 178 189 } 179 190 180 - static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all) 191 + static unsigned int od_dbs_timer(struct cpufreq_policy *policy) 181 192 { 182 - struct dbs_data *dbs_data = policy->governor_data; 183 - unsigned int cpu = policy->cpu; 184 - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 185 - cpu); 186 - struct od_dbs_tuners *od_tuners = dbs_data->tuners; 187 - int delay = 0, sample_type = dbs_info->sample_type; 188 - 189 - if (!modify_all) 190 - goto max_delay; 193 + struct policy_dbs_info *policy_dbs = policy->governor_data; 194 + struct dbs_data *dbs_data = policy_dbs->dbs_data; 195 + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); 196 + int sample_type = dbs_info->sample_type; 191 197 192 198 /* Common NORMAL_SAMPLE setup */ 193 199 dbs_info->sample_type = OD_NORMAL_SAMPLE; 194 - if (sample_type == OD_SUB_SAMPLE) { 195 - delay = dbs_info->freq_lo_jiffies; 200 + /* 201 + * OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore 202 + * it then. 203 + */ 204 + if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { 196 205 __cpufreq_driver_target(policy, dbs_info->freq_lo, 197 206 CPUFREQ_RELATION_H); 198 - } else { 199 - dbs_check_cpu(dbs_data, cpu); 200 - if (dbs_info->freq_lo) { 201 - /* Setup timer for SUB_SAMPLE */ 202 - dbs_info->sample_type = OD_SUB_SAMPLE; 203 - delay = dbs_info->freq_hi_jiffies; 204 - } 207 + return dbs_info->freq_lo_delay_us; 205 208 } 206 209 207 - max_delay: 208 - if (!delay) 209 - delay = delay_for_sampling_rate(od_tuners->sampling_rate 210 - * dbs_info->rate_mult); 210 + od_update(policy); 211 211 212 - return delay; 212 + if (dbs_info->freq_lo) { 213 + /* Setup timer for SUB_SAMPLE */ 214 + dbs_info->sample_type = OD_SUB_SAMPLE; 215 + return dbs_info->freq_hi_delay_us; 216 + } 217 + 218 + return dbs_data->sampling_rate * policy_dbs->rate_mult; 213 219 } 214 220 215 221 /************************** sysfs interface ************************/ 216 - static struct common_dbs_data od_dbs_cdata; 217 - 218 - /** 219 - * update_sampling_rate - update sampling rate effective immediately if needed. 220 - * @new_rate: new sampling rate 221 - * 222 - * If new rate is smaller than the old, simply updating 223 - * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the 224 - * original sampling_rate was 1 second and the requested new sampling rate is 10 225 - * ms because the user needs immediate reaction from ondemand governor, but not 226 - * sure if higher frequency will be required or not, then, the governor may 227 - * change the sampling rate too late; up to 1 second later. Thus, if we are 228 - * reducing the sampling rate, we need to make the new value effective 229 - * immediately. 230 - */ 231 - static void update_sampling_rate(struct dbs_data *dbs_data, 232 - unsigned int new_rate) 233 - { 234 - struct od_dbs_tuners *od_tuners = dbs_data->tuners; 235 - struct cpumask cpumask; 236 - int cpu; 237 - 238 - od_tuners->sampling_rate = new_rate = max(new_rate, 239 - dbs_data->min_sampling_rate); 240 - 241 - /* 242 - * Lock governor so that governor start/stop can't execute in parallel. 243 - */ 244 - mutex_lock(&od_dbs_cdata.mutex); 245 - 246 - cpumask_copy(&cpumask, cpu_online_mask); 247 - 248 - for_each_cpu(cpu, &cpumask) { 249 - struct cpufreq_policy *policy; 250 - struct od_cpu_dbs_info_s *dbs_info; 251 - struct cpu_dbs_info *cdbs; 252 - struct cpu_common_dbs_info *shared; 253 - unsigned long next_sampling, appointed_at; 254 - 255 - dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 256 - cdbs = &dbs_info->cdbs; 257 - shared = cdbs->shared; 258 - 259 - /* 260 - * A valid shared and shared->policy means governor hasn't 261 - * stopped or exited yet. 262 - */ 263 - if (!shared || !shared->policy) 264 - continue; 265 - 266 - policy = shared->policy; 267 - 268 - /* clear all CPUs of this policy */ 269 - cpumask_andnot(&cpumask, &cpumask, policy->cpus); 270 - 271 - /* 272 - * Update sampling rate for CPUs whose policy is governed by 273 - * dbs_data. In case of governor_per_policy, only a single 274 - * policy will be governed by dbs_data, otherwise there can be 275 - * multiple policies that are governed by the same dbs_data. 276 - */ 277 - if (dbs_data != policy->governor_data) 278 - continue; 279 - 280 - /* 281 - * Checking this for any CPU should be fine, timers for all of 282 - * them are scheduled together. 283 - */ 284 - next_sampling = jiffies + usecs_to_jiffies(new_rate); 285 - appointed_at = dbs_info->cdbs.timer.expires; 286 - 287 - if (time_before(next_sampling, appointed_at)) { 288 - gov_cancel_work(shared); 289 - gov_add_timers(policy, usecs_to_jiffies(new_rate)); 290 - 291 - } 292 - } 293 - 294 - mutex_unlock(&od_dbs_cdata.mutex); 295 - } 296 - 297 - static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, 298 - size_t count) 299 - { 300 - unsigned int input; 301 - int ret; 302 - ret = sscanf(buf, "%u", &input); 303 - if (ret != 1) 304 - return -EINVAL; 305 - 306 - update_sampling_rate(dbs_data, input); 307 - return count; 308 - } 222 + static struct dbs_governor od_dbs_gov; 309 223 310 224 static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, 311 225 size_t count) 312 226 { 313 - struct od_dbs_tuners *od_tuners = dbs_data->tuners; 314 227 unsigned int input; 315 228 int ret; 316 - unsigned int j; 317 229 318 230 ret = sscanf(buf, "%u", &input); 319 231 if (ret != 1) 320 232 return -EINVAL; 321 - od_tuners->io_is_busy = !!input; 233 + dbs_data->io_is_busy = !!input; 322 234 323 235 /* we need to re-evaluate prev_cpu_idle */ 324 - for_each_online_cpu(j) { 325 - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 326 - j); 327 - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, 328 - &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); 329 - } 236 + gov_update_cpu_data(dbs_data); 237 + 330 238 return count; 331 239 } 332 240 333 241 static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, 334 242 size_t count) 335 243 { 336 - struct od_dbs_tuners *od_tuners = dbs_data->tuners; 337 244 unsigned int input; 338 245 int ret; 339 246 ret = sscanf(buf, "%u", &input); ··· 236 357 return -EINVAL; 237 358 } 238 359 239 - od_tuners->up_threshold = input; 360 + dbs_data->up_threshold = input; 240 361 return count; 241 362 } 242 363 243 364 static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, 244 365 const char *buf, size_t count) 245 366 { 246 - struct od_dbs_tuners *od_tuners = dbs_data->tuners; 247 - unsigned int input, j; 367 + struct policy_dbs_info *policy_dbs; 368 + unsigned int input; 248 369 int ret; 249 370 ret = sscanf(buf, "%u", &input); 250 371 251 372 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 252 373 return -EINVAL; 253 - od_tuners->sampling_down_factor = input; 374 + 375 + dbs_data->sampling_down_factor = input; 254 376 255 377 /* Reset down sampling multiplier in case it was active */ 256 - for_each_online_cpu(j) { 257 - struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 258 - j); 259 - dbs_info->rate_mult = 1; 378 + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { 379 + /* 380 + * Doing this without locking might lead to using different 381 + * rate_mult values in od_update() and od_dbs_timer(). 382 + */ 383 + mutex_lock(&policy_dbs->timer_mutex); 384 + policy_dbs->rate_mult = 1; 385 + mutex_unlock(&policy_dbs->timer_mutex); 260 386 } 387 + 261 388 return count; 262 389 } 263 390 264 391 static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, 265 392 const char *buf, size_t count) 266 393 { 267 - struct od_dbs_tuners *od_tuners = dbs_data->tuners; 268 394 unsigned int input; 269 395 int ret; 270 - 271 - unsigned int j; 272 396 273 397 ret = sscanf(buf, "%u", &input); 274 398 if (ret != 1) ··· 280 398 if (input > 1) 281 399 input = 1; 282 400 283 - if (input == od_tuners->ignore_nice_load) { /* nothing to do */ 401 + if (input == dbs_data->ignore_nice_load) { /* nothing to do */ 284 402 return count; 285 403 } 286 - od_tuners->ignore_nice_load = input; 404 + dbs_data->ignore_nice_load = input; 287 405 288 406 /* we need to re-evaluate prev_cpu_idle */ 289 - for_each_online_cpu(j) { 290 - struct od_cpu_dbs_info_s *dbs_info; 291 - dbs_info = &per_cpu(od_cpu_dbs_info, j); 292 - dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, 293 - &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); 294 - if (od_tuners->ignore_nice_load) 295 - dbs_info->cdbs.prev_cpu_nice = 296 - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 407 + gov_update_cpu_data(dbs_data); 297 408 298 - } 299 409 return count; 300 410 } 301 411 ··· 295 421 size_t count) 296 422 { 297 423 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 424 + struct policy_dbs_info *policy_dbs; 298 425 unsigned int input; 299 426 int ret; 300 427 ret = sscanf(buf, "%u", &input); ··· 307 432 input = 1000; 308 433 309 434 od_tuners->powersave_bias = input; 310 - ondemand_powersave_bias_init(); 435 + 436 + list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) 437 + ondemand_powersave_bias_init(policy_dbs->policy); 438 + 311 439 return count; 312 440 } 313 441 314 - show_store_one(od, sampling_rate); 315 - show_store_one(od, io_is_busy); 316 - show_store_one(od, up_threshold); 317 - show_store_one(od, sampling_down_factor); 318 - show_store_one(od, ignore_nice_load); 319 - show_store_one(od, powersave_bias); 320 - declare_show_sampling_rate_min(od); 442 + gov_show_one_common(sampling_rate); 443 + gov_show_one_common(up_threshold); 444 + gov_show_one_common(sampling_down_factor); 445 + gov_show_one_common(ignore_nice_load); 446 + gov_show_one_common(min_sampling_rate); 447 + gov_show_one_common(io_is_busy); 448 + gov_show_one(od, powersave_bias); 321 449 322 - gov_sys_pol_attr_rw(sampling_rate); 323 - gov_sys_pol_attr_rw(io_is_busy); 324 - gov_sys_pol_attr_rw(up_threshold); 325 - gov_sys_pol_attr_rw(sampling_down_factor); 326 - gov_sys_pol_attr_rw(ignore_nice_load); 327 - gov_sys_pol_attr_rw(powersave_bias); 328 - gov_sys_pol_attr_ro(sampling_rate_min); 450 + gov_attr_rw(sampling_rate); 451 + gov_attr_rw(io_is_busy); 452 + gov_attr_rw(up_threshold); 453 + gov_attr_rw(sampling_down_factor); 454 + gov_attr_rw(ignore_nice_load); 455 + gov_attr_rw(powersave_bias); 456 + gov_attr_ro(min_sampling_rate); 329 457 330 - static struct attribute *dbs_attributes_gov_sys[] = { 331 - &sampling_rate_min_gov_sys.attr, 332 - &sampling_rate_gov_sys.attr, 333 - &up_threshold_gov_sys.attr, 334 - &sampling_down_factor_gov_sys.attr, 335 - &ignore_nice_load_gov_sys.attr, 336 - &powersave_bias_gov_sys.attr, 337 - &io_is_busy_gov_sys.attr, 458 + static struct attribute *od_attributes[] = { 459 + &min_sampling_rate.attr, 460 + &sampling_rate.attr, 461 + &up_threshold.attr, 462 + &sampling_down_factor.attr, 463 + &ignore_nice_load.attr, 464 + &powersave_bias.attr, 465 + &io_is_busy.attr, 338 466 NULL 339 - }; 340 - 341 - static struct attribute_group od_attr_group_gov_sys = { 342 - .attrs = dbs_attributes_gov_sys, 343 - .name = "ondemand", 344 - }; 345 - 346 - static struct attribute *dbs_attributes_gov_pol[] = { 347 - &sampling_rate_min_gov_pol.attr, 348 - &sampling_rate_gov_pol.attr, 349 - &up_threshold_gov_pol.attr, 350 - &sampling_down_factor_gov_pol.attr, 351 - &ignore_nice_load_gov_pol.attr, 352 - &powersave_bias_gov_pol.attr, 353 - &io_is_busy_gov_pol.attr, 354 - NULL 355 - }; 356 - 357 - static struct attribute_group od_attr_group_gov_pol = { 358 - .attrs = dbs_attributes_gov_pol, 359 - .name = "ondemand", 360 467 }; 361 468 362 469 /************************** sysfs end ************************/ 470 + 471 + static struct policy_dbs_info *od_alloc(void) 472 + { 473 + struct od_policy_dbs_info *dbs_info; 474 + 475 + dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL); 476 + return dbs_info ? &dbs_info->policy_dbs : NULL; 477 + } 478 + 479 + static void od_free(struct policy_dbs_info *policy_dbs) 480 + { 481 + kfree(to_dbs_info(policy_dbs)); 482 + } 363 483 364 484 static int od_init(struct dbs_data *dbs_data, bool notify) 365 485 { ··· 373 503 put_cpu(); 374 504 if (idle_time != -1ULL) { 375 505 /* Idle micro accounting is supported. Use finer thresholds */ 376 - tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 506 + dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 377 507 /* 378 508 * In nohz/micro accounting case we set the minimum frequency 379 509 * not depending on HZ, but fixed (very low). The deferred ··· 381 511 */ 382 512 dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; 383 513 } else { 384 - tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; 514 + dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; 385 515 386 516 /* For correct statistics, we need 10 ticks for each measure */ 387 517 dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * 388 518 jiffies_to_usecs(10); 389 519 } 390 520 391 - tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; 392 - tuners->ignore_nice_load = 0; 521 + dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; 522 + dbs_data->ignore_nice_load = 0; 393 523 tuners->powersave_bias = default_powersave_bias; 394 - tuners->io_is_busy = should_io_be_busy(); 524 + dbs_data->io_is_busy = should_io_be_busy(); 395 525 396 526 dbs_data->tuners = tuners; 397 527 return 0; ··· 402 532 kfree(dbs_data->tuners); 403 533 } 404 534 405 - define_get_cpu_dbs_routines(od_cpu_dbs_info); 535 + static void od_start(struct cpufreq_policy *policy) 536 + { 537 + struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); 538 + 539 + dbs_info->sample_type = OD_NORMAL_SAMPLE; 540 + ondemand_powersave_bias_init(policy); 541 + } 406 542 407 543 static struct od_ops od_ops = { 408 - .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, 409 544 .powersave_bias_target = generic_powersave_bias_target, 410 - .freq_increase = dbs_freq_increase, 411 545 }; 412 546 413 - static struct common_dbs_data od_dbs_cdata = { 414 - .governor = GOV_ONDEMAND, 415 - .attr_group_gov_sys = &od_attr_group_gov_sys, 416 - .attr_group_gov_pol = &od_attr_group_gov_pol, 417 - .get_cpu_cdbs = get_cpu_cdbs, 418 - .get_cpu_dbs_info_s = get_cpu_dbs_info_s, 547 + static struct dbs_governor od_dbs_gov = { 548 + .gov = { 549 + .name = "ondemand", 550 + .governor = cpufreq_governor_dbs, 551 + .max_transition_latency = TRANSITION_LATENCY_LIMIT, 552 + .owner = THIS_MODULE, 553 + }, 554 + .kobj_type = { .default_attrs = od_attributes }, 419 555 .gov_dbs_timer = od_dbs_timer, 420 - .gov_check_cpu = od_check_cpu, 421 - .gov_ops = &od_ops, 556 + .alloc = od_alloc, 557 + .free = od_free, 422 558 .init = od_init, 423 559 .exit = od_exit, 424 - .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex), 560 + .start = od_start, 425 561 }; 562 + 563 + #define CPU_FREQ_GOV_ONDEMAND (&od_dbs_gov.gov) 426 564 427 565 static void od_set_powersave_bias(unsigned int powersave_bias) 428 566 { 429 - struct cpufreq_policy *policy; 430 - struct dbs_data *dbs_data; 431 - struct od_dbs_tuners *od_tuners; 432 567 unsigned int cpu; 433 568 cpumask_t done; 434 569 ··· 442 567 443 568 get_online_cpus(); 444 569 for_each_online_cpu(cpu) { 445 - struct cpu_common_dbs_info *shared; 570 + struct cpufreq_policy *policy; 571 + struct policy_dbs_info *policy_dbs; 572 + struct dbs_data *dbs_data; 573 + struct od_dbs_tuners *od_tuners; 446 574 447 575 if (cpumask_test_cpu(cpu, &done)) 448 576 continue; 449 577 450 - shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared; 451 - if (!shared) 578 + policy = cpufreq_cpu_get_raw(cpu); 579 + if (!policy || policy->governor != CPU_FREQ_GOV_ONDEMAND) 452 580 continue; 453 581 454 - policy = shared->policy; 582 + policy_dbs = policy->governor_data; 583 + if (!policy_dbs) 584 + continue; 585 + 455 586 cpumask_or(&done, &done, policy->cpus); 456 587 457 - if (policy->governor != &cpufreq_gov_ondemand) 458 - continue; 459 - 460 - dbs_data = policy->governor_data; 588 + dbs_data = policy_dbs->dbs_data; 461 589 od_tuners = dbs_data->tuners; 462 590 od_tuners->powersave_bias = default_powersave_bias; 463 591 } ··· 483 605 } 484 606 EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); 485 607 486 - static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, 487 - unsigned int event) 488 - { 489 - return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); 490 - } 491 - 492 - #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 493 - static 494 - #endif 495 - struct cpufreq_governor cpufreq_gov_ondemand = { 496 - .name = "ondemand", 497 - .governor = od_cpufreq_governor_dbs, 498 - .max_transition_latency = TRANSITION_LATENCY_LIMIT, 499 - .owner = THIS_MODULE, 500 - }; 501 - 502 608 static int __init cpufreq_gov_dbs_init(void) 503 609 { 504 - return cpufreq_register_governor(&cpufreq_gov_ondemand); 610 + return cpufreq_register_governor(CPU_FREQ_GOV_ONDEMAND); 505 611 } 506 612 507 613 static void __exit cpufreq_gov_dbs_exit(void) 508 614 { 509 - cpufreq_unregister_governor(&cpufreq_gov_ondemand); 615 + cpufreq_unregister_governor(CPU_FREQ_GOV_ONDEMAND); 510 616 } 511 617 512 618 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); ··· 500 638 MODULE_LICENSE("GPL"); 501 639 502 640 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 641 + struct cpufreq_governor *cpufreq_default_governor(void) 642 + { 643 + return CPU_FREQ_GOV_ONDEMAND; 644 + } 645 + 503 646 fs_initcall(cpufreq_gov_dbs_init); 504 647 #else 505 648 module_init(cpufreq_gov_dbs_init);

+30

drivers/cpufreq/cpufreq_ondemand.h

··· 1 + /* 2 + * Header file for CPUFreq ondemand governor and related code. 3 + * 4 + * Copyright (C) 2016, Intel Corporation 5 + * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + */ 11 + 12 + #include "cpufreq_governor.h" 13 + 14 + struct od_policy_dbs_info { 15 + struct policy_dbs_info policy_dbs; 16 + struct cpufreq_frequency_table *freq_table; 17 + unsigned int freq_lo; 18 + unsigned int freq_lo_delay_us; 19 + unsigned int freq_hi_delay_us; 20 + unsigned int sample_type:1; 21 + }; 22 + 23 + static inline struct od_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) 24 + { 25 + return container_of(policy_dbs, struct od_policy_dbs_info, policy_dbs); 26 + } 27 + 28 + struct od_dbs_tuners { 29 + unsigned int powersave_bias; 30 + };

+14 -4

drivers/cpufreq/cpufreq_performance.c

··· 33 33 return 0; 34 34 } 35 35 36 - #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE 37 - static 38 - #endif 39 - struct cpufreq_governor cpufreq_gov_performance = { 36 + static struct cpufreq_governor cpufreq_gov_performance = { 40 37 .name = "performance", 41 38 .governor = cpufreq_governor_performance, 42 39 .owner = THIS_MODULE, ··· 48 51 { 49 52 cpufreq_unregister_governor(&cpufreq_gov_performance); 50 53 } 54 + 55 + #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE 56 + struct cpufreq_governor *cpufreq_default_governor(void) 57 + { 58 + return &cpufreq_gov_performance; 59 + } 60 + #endif 61 + #ifndef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE 62 + struct cpufreq_governor *cpufreq_fallback_governor(void) 63 + { 64 + return &cpufreq_gov_performance; 65 + } 66 + #endif 51 67 52 68 MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); 53 69 MODULE_DESCRIPTION("CPUfreq policy governor 'performance'");

+6 -4

drivers/cpufreq/cpufreq_powersave.c

··· 33 33 return 0; 34 34 } 35 35 36 - #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE 37 - static 38 - #endif 39 - struct cpufreq_governor cpufreq_gov_powersave = { 36 + static struct cpufreq_governor cpufreq_gov_powersave = { 40 37 .name = "powersave", 41 38 .governor = cpufreq_governor_powersave, 42 39 .owner = THIS_MODULE, ··· 54 57 MODULE_LICENSE("GPL"); 55 58 56 59 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE 60 + struct cpufreq_governor *cpufreq_default_governor(void) 61 + { 62 + return &cpufreq_gov_powersave; 63 + } 64 + 57 65 fs_initcall(cpufreq_gov_powersave_init); 58 66 #else 59 67 module_init(cpufreq_gov_powersave_init);

+6 -4

drivers/cpufreq/cpufreq_userspace.c

··· 89 89 return rc; 90 90 } 91 91 92 - #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE 93 - static 94 - #endif 95 - struct cpufreq_governor cpufreq_gov_userspace = { 92 + static struct cpufreq_governor cpufreq_gov_userspace = { 96 93 .name = "userspace", 97 94 .governor = cpufreq_governor_userspace, 98 95 .store_setspeed = cpufreq_set, ··· 113 116 MODULE_LICENSE("GPL"); 114 117 115 118 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE 119 + struct cpufreq_governor *cpufreq_default_governor(void) 120 + { 121 + return &cpufreq_gov_userspace; 122 + } 123 + 116 124 fs_initcall(cpufreq_gov_userspace_init); 117 125 #else 118 126 module_init(cpufreq_gov_userspace_init);

+92 -100

drivers/cpufreq/intel_pstate.c

··· 71 71 u64 mperf; 72 72 u64 tsc; 73 73 int freq; 74 - ktime_t time; 74 + u64 time; 75 75 }; 76 76 77 77 struct pstate_data { ··· 103 103 struct cpudata { 104 104 int cpu; 105 105 106 - struct timer_list timer; 106 + struct update_util_data update_util; 107 107 108 108 struct pstate_data pstate; 109 109 struct vid_data vid; 110 110 struct _pid pid; 111 111 112 - ktime_t last_sample_time; 112 + u64 last_sample_time; 113 113 u64 prev_aperf; 114 114 u64 prev_mperf; 115 115 u64 prev_tsc; ··· 120 120 static struct cpudata **all_cpu_data; 121 121 struct pstate_adjust_policy { 122 122 int sample_rate_ms; 123 + s64 sample_rate_ns; 123 124 int deadband; 124 125 int setpoint; 125 126 int p_gain_pct; ··· 198 197 199 198 static inline void pid_reset(struct _pid *pid, int setpoint, int busy, 200 199 int deadband, int integral) { 201 - pid->setpoint = setpoint; 202 - pid->deadband = deadband; 200 + pid->setpoint = int_tofp(setpoint); 201 + pid->deadband = int_tofp(deadband); 203 202 pid->integral = int_tofp(integral); 204 203 pid->last_err = int_tofp(setpoint) - int_tofp(busy); 205 204 } ··· 225 224 int32_t pterm, dterm, fp_error; 226 225 int32_t integral_limit; 227 226 228 - fp_error = int_tofp(pid->setpoint) - busy; 227 + fp_error = pid->setpoint - busy; 229 228 230 - if (abs(fp_error) <= int_tofp(pid->deadband)) 229 + if (abs(fp_error) <= pid->deadband) 231 230 return 0; 232 231 233 232 pterm = mul_fp(pid->p_gain, fp_error); ··· 287 286 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); 288 287 } 289 288 290 - static void intel_pstate_hwp_set(void) 289 + static void intel_pstate_hwp_set(const struct cpumask *cpumask) 291 290 { 292 291 int min, hw_min, max, hw_max, cpu, range, adj_range; 293 292 u64 value, cap; ··· 297 296 hw_max = HWP_HIGHEST_PERF(cap); 298 297 range = hw_max - hw_min; 299 298 300 - get_online_cpus(); 301 - 302 - for_each_online_cpu(cpu) { 299 + for_each_cpu(cpu, cpumask) { 303 300 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); 304 301 adj_range = limits->min_perf_pct * range / 100; 305 302 min = hw_min + adj_range; ··· 316 317 value |= HWP_MAX_PERF(max); 317 318 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); 318 319 } 320 + } 319 321 322 + static void intel_pstate_hwp_set_online_cpus(void) 323 + { 324 + get_online_cpus(); 325 + intel_pstate_hwp_set(cpu_online_mask); 320 326 put_online_cpus(); 321 327 } 322 328 ··· 443 439 limits->no_turbo = clamp_t(int, input, 0, 1); 444 440 445 441 if (hwp_active) 446 - intel_pstate_hwp_set(); 442 + intel_pstate_hwp_set_online_cpus(); 447 443 448 444 return count; 449 445 } ··· 469 465 int_tofp(100)); 470 466 471 467 if (hwp_active) 472 - intel_pstate_hwp_set(); 468 + intel_pstate_hwp_set_online_cpus(); 473 469 return count; 474 470 } 475 471 ··· 494 490 int_tofp(100)); 495 491 496 492 if (hwp_active) 497 - intel_pstate_hwp_set(); 493 + intel_pstate_hwp_set_online_cpus(); 498 494 return count; 499 495 } 500 496 ··· 535 531 536 532 static void intel_pstate_hwp_enable(struct cpudata *cpudata) 537 533 { 534 + /* First disable HWP notification interrupt as we don't process them */ 535 + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); 536 + 538 537 wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); 539 538 } 540 539 ··· 719 712 if (limits->no_turbo && !limits->turbo_disabled) 720 713 val |= (u64)1 << 32; 721 714 722 - wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); 715 + wrmsrl(MSR_IA32_PERF_CTL, val); 723 716 } 724 717 725 718 static int knl_get_turbo_pstate(void) ··· 831 824 * policy, or by cpu specific default values determined through 832 825 * experimentation. 833 826 */ 834 - max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf)); 827 + max_perf_adj = fp_toint(max_perf * limits->max_perf); 835 828 *max = clamp_t(int, max_perf_adj, 836 829 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); 837 830 838 - min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf)); 831 + min_perf = fp_toint(max_perf * limits->min_perf); 839 832 *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); 840 833 } 841 834 ··· 881 874 core_pct = int_tofp(sample->aperf) * int_tofp(100); 882 875 core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); 883 876 884 - sample->freq = fp_toint( 885 - mul_fp(int_tofp( 886 - cpu->pstate.max_pstate_physical * 887 - cpu->pstate.scaling / 100), 888 - core_pct)); 889 - 890 877 sample->core_pct_busy = (int32_t)core_pct; 891 878 } 892 879 893 - static inline void intel_pstate_sample(struct cpudata *cpu) 880 + static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) 894 881 { 895 882 u64 aperf, mperf; 896 883 unsigned long flags; ··· 894 893 rdmsrl(MSR_IA32_APERF, aperf); 895 894 rdmsrl(MSR_IA32_MPERF, mperf); 896 895 tsc = rdtsc(); 897 - if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) { 896 + if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) { 898 897 local_irq_restore(flags); 899 - return; 898 + return false; 900 899 } 901 900 local_irq_restore(flags); 902 901 903 902 cpu->last_sample_time = cpu->sample.time; 904 - cpu->sample.time = ktime_get(); 903 + cpu->sample.time = time; 905 904 cpu->sample.aperf = aperf; 906 905 cpu->sample.mperf = mperf; 907 906 cpu->sample.tsc = tsc; ··· 909 908 cpu->sample.mperf -= cpu->prev_mperf; 910 909 cpu->sample.tsc -= cpu->prev_tsc; 911 910 912 - intel_pstate_calc_busy(cpu); 913 - 914 911 cpu->prev_aperf = aperf; 915 912 cpu->prev_mperf = mperf; 916 913 cpu->prev_tsc = tsc; 914 + return true; 917 915 } 918 916 919 - static inline void intel_hwp_set_sample_time(struct cpudata *cpu) 917 + static inline int32_t get_avg_frequency(struct cpudata *cpu) 920 918 { 921 - int delay; 922 - 923 - delay = msecs_to_jiffies(50); 924 - mod_timer_pinned(&cpu->timer, jiffies + delay); 925 - } 926 - 927 - static inline void intel_pstate_set_sample_time(struct cpudata *cpu) 928 - { 929 - int delay; 930 - 931 - delay = msecs_to_jiffies(pid_params.sample_rate_ms); 932 - mod_timer_pinned(&cpu->timer, jiffies + delay); 919 + return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf * 920 + cpu->pstate.scaling, cpu->sample.mperf); 933 921 } 934 922 935 923 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) ··· 944 954 mperf = cpu->sample.mperf + delta_iowait_mperf; 945 955 cpu->prev_cummulative_iowait = cummulative_iowait; 946 956 947 - 948 957 /* 949 958 * The load can be estimated as the ratio of the mperf counter 950 959 * running at a constant frequency during active periods ··· 959 970 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) 960 971 { 961 972 int32_t core_busy, max_pstate, current_pstate, sample_ratio; 962 - s64 duration_us; 963 - u32 sample_time; 973 + u64 duration_ns; 974 + 975 + intel_pstate_calc_busy(cpu); 964 976 965 977 /* 966 978 * core_busy is the ratio of actual performance to max ··· 980 990 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); 981 991 982 992 /* 983 - * Since we have a deferred timer, it will not fire unless 984 - * we are in C0. So, determine if the actual elapsed time 985 - * is significantly greater (3x) than our sample interval. If it 986 - * is, then we were idle for a long enough period of time 987 - * to adjust our busyness. 993 + * Since our utilization update callback will not run unless we are 994 + * in C0, check if the actual elapsed time is significantly greater (3x) 995 + * than our sample interval. If it is, then we were idle for a long 996 + * enough period of time to adjust our busyness. 988 997 */ 989 - sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; 990 - duration_us = ktime_us_delta(cpu->sample.time, 991 - cpu->last_sample_time); 992 - if (duration_us > sample_time * 3) { 993 - sample_ratio = div_fp(int_tofp(sample_time), 994 - int_tofp(duration_us)); 998 + duration_ns = cpu->sample.time - cpu->last_sample_time; 999 + if ((s64)duration_ns > pid_params.sample_rate_ns * 3 1000 + && cpu->last_sample_time > 0) { 1001 + sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), 1002 + int_tofp(duration_ns)); 995 1003 core_busy = mul_fp(core_busy, sample_ratio); 996 1004 } 997 1005 ··· 1016 1028 sample->mperf, 1017 1029 sample->aperf, 1018 1030 sample->tsc, 1019 - sample->freq); 1031 + get_avg_frequency(cpu)); 1020 1032 } 1021 1033 1022 - static void intel_hwp_timer_func(unsigned long __data) 1034 + static void intel_pstate_update_util(struct update_util_data *data, u64 time, 1035 + unsigned long util, unsigned long max) 1023 1036 { 1024 - struct cpudata *cpu = (struct cpudata *) __data; 1037 + struct cpudata *cpu = container_of(data, struct cpudata, update_util); 1038 + u64 delta_ns = time - cpu->sample.time; 1025 1039 1026 - intel_pstate_sample(cpu); 1027 - intel_hwp_set_sample_time(cpu); 1028 - } 1040 + if ((s64)delta_ns >= pid_params.sample_rate_ns) { 1041 + bool sample_taken = intel_pstate_sample(cpu, time); 1029 1042 1030 - static void intel_pstate_timer_func(unsigned long __data) 1031 - { 1032 - struct cpudata *cpu = (struct cpudata *) __data; 1033 - 1034 - intel_pstate_sample(cpu); 1035 - 1036 - intel_pstate_adjust_busy_pstate(cpu); 1037 - 1038 - intel_pstate_set_sample_time(cpu); 1043 + if (sample_taken && !hwp_active) 1044 + intel_pstate_adjust_busy_pstate(cpu); 1045 + } 1039 1046 } 1040 1047 1041 1048 #define ICPU(model, policy) \ ··· 1078 1095 1079 1096 cpu->cpu = cpunum; 1080 1097 1081 - if (hwp_active) 1098 + if (hwp_active) { 1082 1099 intel_pstate_hwp_enable(cpu); 1100 + pid_params.sample_rate_ms = 50; 1101 + pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC; 1102 + } 1083 1103 1084 1104 intel_pstate_get_cpu_pstates(cpu); 1085 1105 1086 - init_timer_deferrable(&cpu->timer); 1087 - cpu->timer.data = (unsigned long)cpu; 1088 - cpu->timer.expires = jiffies + HZ/100; 1089 - 1090 - if (!hwp_active) 1091 - cpu->timer.function = intel_pstate_timer_func; 1092 - else 1093 - cpu->timer.function = intel_hwp_timer_func; 1094 - 1095 1106 intel_pstate_busy_pid_reset(cpu); 1096 - intel_pstate_sample(cpu); 1107 + intel_pstate_sample(cpu, 0); 1097 1108 1098 - add_timer_on(&cpu->timer, cpunum); 1109 + cpu->update_util.func = intel_pstate_update_util; 1110 + cpufreq_set_update_util_data(cpunum, &cpu->update_util); 1099 1111 1100 1112 pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); 1101 1113 ··· 1106 1128 if (!cpu) 1107 1129 return 0; 1108 1130 sample = &cpu->sample; 1109 - return sample->freq; 1131 + return get_avg_frequency(cpu); 1110 1132 } 1111 1133 1112 1134 static int intel_pstate_set_policy(struct cpufreq_policy *policy) ··· 1119 1141 pr_debug("intel_pstate: set performance\n"); 1120 1142 limits = &performance_limits; 1121 1143 if (hwp_active) 1122 - intel_pstate_hwp_set(); 1144 + intel_pstate_hwp_set(policy->cpus); 1123 1145 return 0; 1124 1146 } 1125 1147 ··· 1151 1173 int_tofp(100)); 1152 1174 1153 1175 if (hwp_active) 1154 - intel_pstate_hwp_set(); 1176 + intel_pstate_hwp_set(policy->cpus); 1155 1177 1156 1178 return 0; 1157 1179 } ··· 1174 1196 1175 1197 pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); 1176 1198 1177 - del_timer_sync(&all_cpu_data[cpu_num]->timer); 1199 + cpufreq_set_update_util_data(cpu_num, NULL); 1200 + synchronize_sched(); 1201 + 1178 1202 if (hwp_active) 1179 1203 return; 1180 1204 ··· 1240 1260 static void copy_pid_params(struct pstate_adjust_policy *policy) 1241 1261 { 1242 1262 pid_params.sample_rate_ms = policy->sample_rate_ms; 1263 + pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; 1243 1264 pid_params.p_gain_pct = policy->p_gain_pct; 1244 1265 pid_params.i_gain_pct = policy->i_gain_pct; 1245 1266 pid_params.d_gain_pct = policy->d_gain_pct; ··· 1378 1397 static inline bool intel_pstate_has_acpi_ppc(void) { return false; } 1379 1398 #endif /* CONFIG_ACPI */ 1380 1399 1400 + static const struct x86_cpu_id hwp_support_ids[] __initconst = { 1401 + { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, 1402 + {} 1403 + }; 1404 + 1381 1405 static int __init intel_pstate_init(void) 1382 1406 { 1383 1407 int cpu, rc = 0; ··· 1392 1406 if (no_load) 1393 1407 return -ENODEV; 1394 1408 1409 + if (x86_match_cpu(hwp_support_ids) && !no_hwp) { 1410 + copy_cpu_funcs(&core_params.funcs); 1411 + hwp_active++; 1412 + goto hwp_cpu_matched; 1413 + } 1414 + 1395 1415 id = x86_match_cpu(intel_pstate_cpu_ids); 1396 1416 if (!id) 1397 - return -ENODEV; 1398 - 1399 - /* 1400 - * The Intel pstate driver will be ignored if the platform 1401 - * firmware has its own power management modes. 1402 - */ 1403 - if (intel_pstate_platform_pwr_mgmt_exists()) 1404 1417 return -ENODEV; 1405 1418 1406 1419 cpu_def = (struct cpu_defaults *)id->driver_data; ··· 1410 1425 if (intel_pstate_msrs_not_valid()) 1411 1426 return -ENODEV; 1412 1427 1428 + hwp_cpu_matched: 1429 + /* 1430 + * The Intel pstate driver will be ignored if the platform 1431 + * firmware has its own power management modes. 1432 + */ 1433 + if (intel_pstate_platform_pwr_mgmt_exists()) 1434 + return -ENODEV; 1435 + 1413 1436 pr_info("Intel P-state driver initializing.\n"); 1414 1437 1415 1438 all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); 1416 1439 if (!all_cpu_data) 1417 1440 return -ENOMEM; 1418 - 1419 - if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) { 1420 - pr_info("intel_pstate: HWP enabled\n"); 1421 - hwp_active++; 1422 - } 1423 1441 1424 1442 if (!hwp_active && hwp_only) 1425 1443 goto out; ··· 1434 1446 intel_pstate_debug_expose_params(); 1435 1447 intel_pstate_sysfs_expose_params(); 1436 1448 1449 + if (hwp_active) 1450 + pr_info("intel_pstate: HWP enabled\n"); 1451 + 1437 1452 return rc; 1438 1453 out: 1439 1454 get_online_cpus(); 1440 1455 for_each_online_cpu(cpu) { 1441 1456 if (all_cpu_data[cpu]) { 1442 - del_timer_sync(&all_cpu_data[cpu]->timer); 1457 + cpufreq_set_update_util_data(cpu, NULL); 1458 + synchronize_sched(); 1443 1459 kfree(all_cpu_data[cpu]); 1444 1460 } 1445 1461 }

+94 -60

drivers/cpufreq/powernv-cpufreq.c

··· 28 28 #include <linux/of.h> 29 29 #include <linux/reboot.h> 30 30 #include <linux/slab.h> 31 + #include <linux/cpu.h> 32 + #include <trace/events/power.h> 31 33 32 34 #include <asm/cputhreads.h> 33 35 #include <asm/firmware.h> ··· 44 42 45 43 static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; 46 44 static bool rebooting, throttled, occ_reset; 45 + static unsigned int *core_to_chip_map; 46 + 47 + static const char * const throttle_reason[] = { 48 + "No throttling", 49 + "Power Cap", 50 + "Processor Over Temperature", 51 + "Power Supply Failure", 52 + "Over Current", 53 + "OCC Reset" 54 + }; 47 55 48 56 static struct chip { 49 57 unsigned int id; 50 58 bool throttled; 59 + bool restore; 60 + u8 throttle_reason; 51 61 cpumask_t mask; 52 62 struct work_struct throttle; 53 - bool restore; 54 63 } *chips; 55 64 56 65 static int nr_chips; ··· 325 312 static void powernv_cpufreq_throttle_check(void *data) 326 313 { 327 314 unsigned int cpu = smp_processor_id(); 315 + unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)]; 328 316 unsigned long pmsr; 329 317 int pmsr_pmax, i; 330 318 331 319 pmsr = get_pmspr(SPRN_PMSR); 332 320 333 321 for (i = 0; i < nr_chips; i++) 334 - if (chips[i].id == cpu_to_chip_id(cpu)) 322 + if (chips[i].id == chip_id) 335 323 break; 336 324 337 325 /* Check for Pmax Capping */ ··· 342 328 goto next; 343 329 chips[i].throttled = true; 344 330 if (pmsr_pmax < powernv_pstate_info.nominal) 345 - pr_crit("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", 346 - cpu, chips[i].id, pmsr_pmax, 347 - powernv_pstate_info.nominal); 348 - else 349 - pr_info("CPU %d on Chip %u has Pmax reduced below turbo frequency (%d < %d)\n", 350 - cpu, chips[i].id, pmsr_pmax, 351 - powernv_pstate_info.max); 331 + pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", 332 + cpu, chips[i].id, pmsr_pmax, 333 + powernv_pstate_info.nominal); 334 + trace_powernv_throttle(chips[i].id, 335 + throttle_reason[chips[i].throttle_reason], 336 + pmsr_pmax); 352 337 } else if (chips[i].throttled) { 353 338 chips[i].throttled = false; 354 - pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, 355 - chips[i].id, pmsr_pmax); 339 + trace_powernv_throttle(chips[i].id, 340 + throttle_reason[chips[i].throttle_reason], 341 + pmsr_pmax); 356 342 } 357 343 358 344 /* Check if Psafe_mode_active is set in PMSR. */ ··· 370 356 371 357 if (throttled) { 372 358 pr_info("PMSR = %16lx\n", pmsr); 373 - pr_crit("CPU Frequency could be throttled\n"); 359 + pr_warn("CPU Frequency could be throttled\n"); 374 360 } 375 361 } 376 362 ··· 437 423 { 438 424 struct chip *chip = container_of(work, struct chip, throttle); 439 425 unsigned int cpu; 440 - cpumask_var_t mask; 426 + cpumask_t mask; 441 427 442 - smp_call_function_any(&chip->mask, 428 + get_online_cpus(); 429 + cpumask_and(&mask, &chip->mask, cpu_online_mask); 430 + smp_call_function_any(&mask, 443 431 powernv_cpufreq_throttle_check, NULL, 0); 444 432 445 433 if (!chip->restore) 446 - return; 434 + goto out; 447 435 448 436 chip->restore = false; 449 - cpumask_copy(mask, &chip->mask); 450 - for_each_cpu_and(cpu, mask, cpu_online_mask) { 451 - int index, tcpu; 437 + for_each_cpu(cpu, &mask) { 438 + int index; 452 439 struct cpufreq_policy policy; 453 440 454 441 cpufreq_get_policy(&policy, cpu); ··· 457 442 policy.cur, 458 443 CPUFREQ_RELATION_C, &index); 459 444 powernv_cpufreq_target_index(&policy, index); 460 - for_each_cpu(tcpu, policy.cpus) 461 - cpumask_clear_cpu(tcpu, mask); 445 + cpumask_andnot(&mask, &mask, policy.cpus); 462 446 } 447 + out: 448 + put_online_cpus(); 463 449 } 464 - 465 - static char throttle_reason[][30] = { 466 - "No throttling", 467 - "Power Cap", 468 - "Processor Over Temperature", 469 - "Power Supply Failure", 470 - "Over Current", 471 - "OCC Reset" 472 - }; 473 450 474 451 static int powernv_cpufreq_occ_msg(struct notifier_block *nb, 475 452 unsigned long msg_type, void *_msg) ··· 488 481 */ 489 482 if (!throttled) { 490 483 throttled = true; 491 - pr_crit("CPU frequency is throttled for duration\n"); 484 + pr_warn("CPU frequency is throttled for duration\n"); 492 485 } 493 486 494 487 break; ··· 512 505 return 0; 513 506 } 514 507 515 - if (omsg.throttle_status && 516 - omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) 517 - pr_info("OCC: Chip %u Pmax reduced due to %s\n", 518 - (unsigned int)omsg.chip, 519 - throttle_reason[omsg.throttle_status]); 520 - else if (!omsg.throttle_status) 521 - pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip, 522 - throttle_reason[omsg.throttle_status]); 523 - else 524 - return 0; 525 - 526 508 for (i = 0; i < nr_chips; i++) 527 - if (chips[i].id == omsg.chip) { 528 - if (!omsg.throttle_status) 529 - chips[i].restore = true; 530 - schedule_work(&chips[i].throttle); 531 - } 509 + if (chips[i].id == omsg.chip) 510 + break; 511 + 512 + if (omsg.throttle_status >= 0 && 513 + omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) 514 + chips[i].throttle_reason = omsg.throttle_status; 515 + 516 + if (!omsg.throttle_status) 517 + chips[i].restore = true; 518 + 519 + schedule_work(&chips[i].throttle); 532 520 } 533 521 return 0; 534 522 } ··· 558 556 unsigned int chip[256]; 559 557 unsigned int cpu, i; 560 558 unsigned int prev_chip_id = UINT_MAX; 559 + cpumask_t cpu_mask; 560 + int ret = -ENOMEM; 561 561 562 - for_each_possible_cpu(cpu) { 562 + core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int), 563 + GFP_KERNEL); 564 + if (!core_to_chip_map) 565 + goto out; 566 + 567 + cpumask_copy(&cpu_mask, cpu_possible_mask); 568 + for_each_cpu(cpu, &cpu_mask) { 563 569 unsigned int id = cpu_to_chip_id(cpu); 564 570 565 571 if (prev_chip_id != id) { 566 572 prev_chip_id = id; 567 573 chip[nr_chips++] = id; 568 574 } 575 + core_to_chip_map[cpu_core_index_of_thread(cpu)] = id; 576 + cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu)); 569 577 } 570 578 571 - chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL); 579 + chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); 572 580 if (!chips) 573 - return -ENOMEM; 581 + goto free_chip_map; 574 582 575 583 for (i = 0; i < nr_chips; i++) { 576 584 chips[i].id = chip[i]; 577 - chips[i].throttled = false; 578 585 cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); 579 586 INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); 580 - chips[i].restore = false; 581 587 } 582 588 583 589 return 0; 590 + free_chip_map: 591 + kfree(core_to_chip_map); 592 + out: 593 + return ret; 594 + } 595 + 596 + static inline void clean_chip_info(void) 597 + { 598 + kfree(chips); 599 + kfree(core_to_chip_map); 600 + } 601 + 602 + static inline void unregister_all_notifiers(void) 603 + { 604 + opal_message_notifier_unregister(OPAL_MSG_OCC, 605 + &powernv_cpufreq_opal_nb); 606 + unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); 584 607 } 585 608 586 609 static int __init powernv_cpufreq_init(void) ··· 618 591 619 592 /* Discover pstates from device tree and init */ 620 593 rc = init_powernv_pstates(); 621 - if (rc) { 622 - pr_info("powernv-cpufreq disabled. System does not support PState control\n"); 623 - return rc; 624 - } 594 + if (rc) 595 + goto out; 625 596 626 597 /* Populate chip info */ 627 598 rc = init_chip_info(); 628 599 if (rc) 629 - return rc; 600 + goto out; 630 601 631 602 register_reboot_notifier(&powernv_cpufreq_reboot_nb); 632 603 opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); 633 - return cpufreq_register_driver(&powernv_cpufreq_driver); 604 + 605 + rc = cpufreq_register_driver(&powernv_cpufreq_driver); 606 + if (!rc) 607 + return 0; 608 + 609 + pr_info("Failed to register the cpufreq driver (%d)\n", rc); 610 + unregister_all_notifiers(); 611 + clean_chip_info(); 612 + out: 613 + pr_info("Platform driver disabled. System does not support PState control\n"); 614 + return rc; 634 615 } 635 616 module_init(powernv_cpufreq_init); 636 617 637 618 static void __exit powernv_cpufreq_exit(void) 638 619 { 639 - unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); 640 - opal_message_notifier_unregister(OPAL_MSG_OCC, 641 - &powernv_cpufreq_opal_nb); 642 620 cpufreq_unregister_driver(&powernv_cpufreq_driver); 621 + unregister_all_notifiers(); 622 + clean_chip_info(); 643 623 } 644 624 module_exit(powernv_cpufreq_exit); 645 625

+23 -24

drivers/cpuidle/governors/menu.c

······

+1

drivers/i2c/busses/i2c-designware-platdrv.c

··· 123 123 { "80860F41", 0 }, 124 124 { "808622C1", 0 }, 125 125 { "AMD0010", ACCESS_INTR_MASK }, 126 + { "AMDI0010", ACCESS_INTR_MASK }, 126 127 { "AMDI0510", 0 }, 127 128 { "APMC0D0F", 0 }, 128 129 { }

+104 -7

drivers/mailbox/pcc.c

··· 63 63 #include <linux/platform_device.h> 64 64 #include <linux/mailbox_controller.h> 65 65 #include <linux/mailbox_client.h> 66 + #include <linux/io-64-nonatomic-lo-hi.h> 66 67 67 68 #include "mailbox.h" 68 69 69 70 #define MAX_PCC_SUBSPACES 256 70 71 71 72 static struct mbox_chan *pcc_mbox_channels; 73 + 74 + /* Array of cached virtual address for doorbell registers */ 75 + static void __iomem **pcc_doorbell_vaddr; 72 76 73 77 static struct mbox_controller pcc_mbox_ctrl = {}; 74 78 /** ··· 164 160 } 165 161 EXPORT_SYMBOL_GPL(pcc_mbox_free_channel); 166 162 163 + /* 164 + * PCC can be used with perf critical drivers such as CPPC 165 + * So it makes sense to locally cache the virtual address and 166 + * use it to read/write to PCC registers such as doorbell register 167 + * 168 + * The below read_register and write_registers are used to read and 169 + * write from perf critical registers such as PCC doorbell register 170 + */ 171 + static int read_register(void __iomem *vaddr, u64 *val, unsigned int bit_width) 172 + { 173 + int ret_val = 0; 174 + 175 + switch (bit_width) { 176 + case 8: 177 + *val = readb(vaddr); 178 + break; 179 + case 16: 180 + *val = readw(vaddr); 181 + break; 182 + case 32: 183 + *val = readl(vaddr); 184 + break; 185 + case 64: 186 + *val = readq(vaddr); 187 + break; 188 + default: 189 + pr_debug("Error: Cannot read register of %u bit width", 190 + bit_width); 191 + ret_val = -EFAULT; 192 + break; 193 + } 194 + return ret_val; 195 + } 196 + 197 + static int write_register(void __iomem *vaddr, u64 val, unsigned int bit_width) 198 + { 199 + int ret_val = 0; 200 + 201 + switch (bit_width) { 202 + case 8: 203 + writeb(val, vaddr); 204 + break; 205 + case 16: 206 + writew(val, vaddr); 207 + break; 208 + case 32: 209 + writel(val, vaddr); 210 + break; 211 + case 64: 212 + writeq(val, vaddr); 213 + break; 214 + default: 215 + pr_debug("Error: Cannot write register of %u bit width", 216 + bit_width); 217 + ret_val = -EFAULT; 218 + break; 219 + } 220 + return ret_val; 221 + } 222 + 167 223 /** 168 224 * pcc_send_data - Called from Mailbox Controller code. Used 169 225 * here only to ring the channel doorbell. The PCC client ··· 239 175 static int pcc_send_data(struct mbox_chan *chan, void *data) 240 176 { 241 177 struct acpi_pcct_hw_reduced *pcct_ss = chan->con_priv; 242 - struct acpi_generic_address doorbell; 178 + struct acpi_generic_address *doorbell; 243 179 u64 doorbell_preserve; 244 180 u64 doorbell_val; 245 181 u64 doorbell_write; 182 + u32 id = chan - pcc_mbox_channels; 183 + int ret = 0; 246 184 247 - doorbell = pcct_ss->doorbell_register; 185 + if (id >= pcc_mbox_ctrl.num_chans) { 186 + pr_debug("pcc_send_data: Invalid mbox_chan passed\n"); 187 + return -ENOENT; 188 + } 189 + 190 + doorbell = &pcct_ss->doorbell_register; 248 191 doorbell_preserve = pcct_ss->preserve_mask; 249 192 doorbell_write = pcct_ss->write_mask; 250 193 251 194 /* Sync notification from OS to Platform. */ 252 - acpi_read(&doorbell_val, &doorbell); 253 - acpi_write((doorbell_val & doorbell_preserve) | doorbell_write, 254 - &doorbell); 255 - 256 - return 0; 195 + if (pcc_doorbell_vaddr[id]) { 196 + ret = read_register(pcc_doorbell_vaddr[id], &doorbell_val, 197 + doorbell->bit_width); 198 + if (ret) 199 + return ret; 200 + ret = write_register(pcc_doorbell_vaddr[id], 201 + (doorbell_val & doorbell_preserve) | doorbell_write, 202 + doorbell->bit_width); 203 + } else { 204 + ret = acpi_read(&doorbell_val, doorbell); 205 + if (ret) 206 + return ret; 207 + ret = acpi_write((doorbell_val & doorbell_preserve) | doorbell_write, 208 + doorbell); 209 + } 210 + return ret; 257 211 } 258 212 259 213 static const struct mbox_chan_ops pcc_chan_ops = { ··· 347 265 return -ENOMEM; 348 266 } 349 267 268 + pcc_doorbell_vaddr = kcalloc(count, sizeof(void *), GFP_KERNEL); 269 + if (!pcc_doorbell_vaddr) { 270 + kfree(pcc_mbox_channels); 271 + return -ENOMEM; 272 + } 273 + 350 274 /* Point to the first PCC subspace entry */ 351 275 pcct_entry = (struct acpi_subtable_header *) ( 352 276 (unsigned long) pcct_tbl + sizeof(struct acpi_table_pcct)); 353 277 354 278 for (i = 0; i < count; i++) { 279 + struct acpi_generic_address *db_reg; 280 + struct acpi_pcct_hw_reduced *pcct_ss; 355 281 pcc_mbox_channels[i].con_priv = pcct_entry; 356 282 pcct_entry = (struct acpi_subtable_header *) 357 283 ((unsigned long) pcct_entry + pcct_entry->length); 284 + 285 + /* If doorbell is in system memory cache the virt address */ 286 + pcct_ss = (struct acpi_pcct_hw_reduced *)pcct_entry; 287 + db_reg = &pcct_ss->doorbell_register; 288 + if (db_reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) 289 + pcc_doorbell_vaddr[i] = acpi_os_ioremap(db_reg->address, 290 + db_reg->bit_width/8); 358 291 } 359 292 360 293 pcc_mbox_ctrl.num_chans = count;

+4

drivers/pnp/pnpacpi/rsparser.c

··· 252 252 case ACPI_RESOURCE_TYPE_GENERIC_REGISTER: 253 253 break; 254 254 255 + case ACPI_RESOURCE_TYPE_SERIAL_BUS: 256 + /* serial bus connections (I2C/SPI/UART) are not pnp */ 257 + break; 258 + 255 259 default: 256 260 dev_warn(&dev->dev, "unknown resource type %d in _CRS\n", 257 261 res->type);

+116 -108

drivers/powercap/intel_rapl.c

··· 133 133 unsigned long timestamp; 134 134 }; 135 135 136 + struct msrl_action { 137 + u32 msr_no; 138 + u64 clear_mask; 139 + u64 set_mask; 140 + int err; 141 + }; 136 142 137 143 #define DOMAIN_STATE_INACTIVE BIT(0) 138 144 #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1) ··· 155 149 static const char pl1_name[] = "long_term"; 156 150 static const char pl2_name[] = "short_term"; 157 151 152 + struct rapl_package; 158 153 struct rapl_domain { 159 154 const char *name; 160 155 enum rapl_domain_type id; ··· 166 159 u64 attr_map; /* track capabilities */ 167 160 unsigned int state; 168 161 unsigned int domain_energy_unit; 169 - int package_id; 162 + struct rapl_package *rp; 170 163 }; 171 164 #define power_zone_to_rapl_domain(_zone) \ 172 165 container_of(_zone, struct rapl_domain, power_zone) ··· 191 184 * notify interrupt enable status. 192 185 */ 193 186 struct list_head plist; 187 + int lead_cpu; /* one active cpu per package for access */ 194 188 }; 195 189 196 190 struct rapl_defaults { ··· 239 231 static int rapl_write_data_raw(struct rapl_domain *rd, 240 232 enum rapl_primitives prim, 241 233 unsigned long long value); 242 - static u64 rapl_unit_xlate(struct rapl_domain *rd, int package, 234 + static u64 rapl_unit_xlate(struct rapl_domain *rd, 243 235 enum unit_type type, u64 value, 244 236 int to_raw); 245 - static void package_power_limit_irq_save(int package_id); 237 + static void package_power_limit_irq_save(struct rapl_package *rp); 246 238 247 239 static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */ 248 240 ··· 266 258 } 267 259 268 260 return NULL; 269 - } 270 - 271 - /* caller to ensure CPU hotplug lock is held */ 272 - static int find_active_cpu_on_package(int package_id) 273 - { 274 - int i; 275 - 276 - for_each_online_cpu(i) { 277 - if (topology_physical_package_id(i) == package_id) 278 - return i; 279 - } 280 - /* all CPUs on this package are offline */ 281 - 282 - return -ENODEV; 283 261 } 284 262 285 263 /* caller must hold cpu hotplug lock */ ··· 306 312 { 307 313 struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev); 308 314 309 - *energy = rapl_unit_xlate(rd, 0, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); 315 + *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); 310 316 return 0; 311 317 } 312 318 313 319 static int release_zone(struct powercap_zone *power_zone) 314 320 { 315 321 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 316 - struct rapl_package *rp; 322 + struct rapl_package *rp = rd->rp; 317 323 318 324 /* package zone is the last zone of a package, we can free 319 325 * memory here since all children has been unregistered. 320 326 */ 321 327 if (rd->id == RAPL_DOMAIN_PACKAGE) { 322 - rp = find_package_by_id(rd->package_id); 323 - if (!rp) { 324 - dev_warn(&power_zone->dev, "no package id %s\n", 325 - rd->name); 326 - return -ENODEV; 327 - } 328 328 kfree(rd); 329 329 rp->domains = NULL; 330 330 } ··· 420 432 421 433 get_online_cpus(); 422 434 rd = power_zone_to_rapl_domain(power_zone); 423 - rp = find_package_by_id(rd->package_id); 424 - if (!rp) { 425 - ret = -ENODEV; 426 - goto set_exit; 427 - } 435 + rp = rd->rp; 428 436 429 437 if (rd->state & DOMAIN_STATE_BIOS_LOCKED) { 430 438 dev_warn(&power_zone->dev, "%s locked by BIOS, monitoring only\n", ··· 440 456 ret = -EINVAL; 441 457 } 442 458 if (!ret) 443 - package_power_limit_irq_save(rd->package_id); 459 + package_power_limit_irq_save(rp); 444 460 set_exit: 445 461 put_online_cpus(); 446 462 return ret; ··· 639 655 break; 640 656 } 641 657 if (mask) { 642 - rd->package_id = rp->id; 658 + rd->rp = rp; 643 659 rd++; 644 660 } 645 661 } 646 662 } 647 663 648 - static u64 rapl_unit_xlate(struct rapl_domain *rd, int package, 649 - enum unit_type type, u64 value, 650 - int to_raw) 664 + static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, 665 + u64 value, int to_raw) 651 666 { 652 667 u64 units = 1; 653 - struct rapl_package *rp; 668 + struct rapl_package *rp = rd->rp; 654 669 u64 scale = 1; 655 - 656 - rp = find_package_by_id(package); 657 - if (!rp) 658 - return value; 659 670 660 671 switch (type) { 661 672 case POWER_UNIT: ··· 748 769 msr = rd->msrs[rp->id]; 749 770 if (!msr) 750 771 return -EINVAL; 751 - /* use physical package id to look up active cpus */ 752 - cpu = find_active_cpu_on_package(rd->package_id); 753 - if (cpu < 0) 754 - return cpu; 772 + 773 + cpu = rd->rp->lead_cpu; 755 774 756 775 /* special-case package domain, which uses a different bit*/ 757 776 if (prim == FW_LOCK && rd->id == RAPL_DOMAIN_PACKAGE) { ··· 770 793 final = value & rp->mask; 771 794 final = final >> rp->shift; 772 795 if (xlate) 773 - *data = rapl_unit_xlate(rd, rd->package_id, rp->unit, final, 0); 796 + *data = rapl_unit_xlate(rd, rp->unit, final, 0); 774 797 else 775 798 *data = final; 776 799 777 800 return 0; 801 + } 802 + 803 + 804 + static int msrl_update_safe(u32 msr_no, u64 clear_mask, u64 set_mask) 805 + { 806 + int err; 807 + u64 val; 808 + 809 + err = rdmsrl_safe(msr_no, &val); 810 + if (err) 811 + goto out; 812 + 813 + val &= ~clear_mask; 814 + val |= set_mask; 815 + 816 + err = wrmsrl_safe(msr_no, val); 817 + 818 + out: 819 + return err; 820 + } 821 + 822 + static void msrl_update_func(void *info) 823 + { 824 + struct msrl_action *ma = info; 825 + 826 + ma->err = msrl_update_safe(ma->msr_no, ma->clear_mask, ma->set_mask); 778 827 } 779 828 780 829 /* Similar use of primitive info in the read counterpart */ ··· 808 805 enum rapl_primitives prim, 809 806 unsigned long long value) 810 807 { 811 - u64 msr_val; 812 - u32 msr; 813 808 struct rapl_primitive_info *rp = &rpi[prim]; 814 809 int cpu; 810 + u64 bits; 811 + struct msrl_action ma; 812 + int ret; 815 813 816 - cpu = find_active_cpu_on_package(rd->package_id); 817 - if (cpu < 0) 818 - return cpu; 819 - msr = rd->msrs[rp->id]; 820 - if (rdmsrl_safe_on_cpu(cpu, msr, &msr_val)) { 821 - dev_dbg(&rd->power_zone.dev, 822 - "failed to read msr 0x%x on cpu %d\n", msr, cpu); 823 - return -EIO; 824 - } 825 - value = rapl_unit_xlate(rd, rd->package_id, rp->unit, value, 1); 826 - msr_val &= ~rp->mask; 827 - msr_val |= value << rp->shift; 828 - if (wrmsrl_safe_on_cpu(cpu, msr, msr_val)) { 829 - dev_dbg(&rd->power_zone.dev, 830 - "failed to write msr 0x%x on cpu %d\n", msr, cpu); 831 - return -EIO; 832 - } 814 + cpu = rd->rp->lead_cpu; 815 + bits = rapl_unit_xlate(rd, rp->unit, value, 1); 816 + bits |= bits << rp->shift; 817 + memset(&ma, 0, sizeof(ma)); 833 818 834 - return 0; 819 + ma.msr_no = rd->msrs[rp->id]; 820 + ma.clear_mask = rp->mask; 821 + ma.set_mask = bits; 822 + 823 + ret = smp_call_function_single(cpu, msrl_update_func, &ma, 1); 824 + if (ret) 825 + WARN_ON_ONCE(ret); 826 + else 827 + ret = ma.err; 828 + 829 + return ret; 835 830 } 836 831 837 832 /* ··· 894 893 return 0; 895 894 } 896 895 896 + static void power_limit_irq_save_cpu(void *info) 897 + { 898 + u32 l, h = 0; 899 + struct rapl_package *rp = (struct rapl_package *)info; 900 + 901 + /* save the state of PLN irq mask bit before disabling it */ 902 + rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); 903 + if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) { 904 + rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE; 905 + rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED; 906 + } 907 + l &= ~PACKAGE_THERM_INT_PLN_ENABLE; 908 + wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 909 + } 910 + 897 911 898 912 /* REVISIT: 899 913 * When package power limit is set artificially low by RAPL, LVT ··· 920 904 * to do by adding an atomic notifier. 921 905 */ 922 906 923 - static void package_power_limit_irq_save(int package_id) 907 + static void package_power_limit_irq_save(struct rapl_package *rp) 924 908 { 925 - u32 l, h = 0; 926 - int cpu; 927 - struct rapl_package *rp; 928 - 929 - rp = find_package_by_id(package_id); 930 - if (!rp) 931 - return; 932 - 933 909 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 934 910 return; 935 911 936 - cpu = find_active_cpu_on_package(package_id); 937 - if (cpu < 0) 938 - return; 939 - /* save the state of PLN irq mask bit before disabling it */ 940 - rdmsr_safe_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); 941 - if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) { 942 - rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE; 943 - rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED; 944 - } 945 - l &= ~PACKAGE_THERM_INT_PLN_ENABLE; 946 - wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 912 + smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1); 947 913 } 948 914 949 - /* restore per package power limit interrupt enable state */ 950 - static void package_power_limit_irq_restore(int package_id) 915 + static void power_limit_irq_restore_cpu(void *info) 951 916 { 952 - u32 l, h; 953 - int cpu; 954 - struct rapl_package *rp; 917 + u32 l, h = 0; 918 + struct rapl_package *rp = (struct rapl_package *)info; 955 919 956 - rp = find_package_by_id(package_id); 957 - if (!rp) 958 - return; 959 - 960 - if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 961 - return; 962 - 963 - cpu = find_active_cpu_on_package(package_id); 964 - if (cpu < 0) 965 - return; 966 - 967 - /* irq enable state not saved, nothing to restore */ 968 - if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) 969 - return; 970 - rdmsr_safe_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); 920 + rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); 971 921 972 922 if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE) 973 923 l |= PACKAGE_THERM_INT_PLN_ENABLE; 974 924 else 975 925 l &= ~PACKAGE_THERM_INT_PLN_ENABLE; 976 926 977 - wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 927 + wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 928 + } 929 + 930 + /* restore per package power limit interrupt enable state */ 931 + static void package_power_limit_irq_restore(struct rapl_package *rp) 932 + { 933 + if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 934 + return; 935 + 936 + /* irq enable state not saved, nothing to restore */ 937 + if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) 938 + return; 939 + 940 + smp_call_function_single(rp->lead_cpu, power_limit_irq_restore_cpu, rp, 1); 978 941 } 979 942 980 943 static void set_floor_freq_default(struct rapl_domain *rd, bool mode) ··· 1136 1141 * hotplug lock held 1137 1142 */ 1138 1143 list_for_each_entry(rp, &rapl_packages, plist) { 1139 - package_power_limit_irq_restore(rp->id); 1144 + package_power_limit_irq_restore(rp); 1140 1145 1141 1146 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; 1142 1147 rd++) { ··· 1387 1392 /* add the new package to the list */ 1388 1393 new_package->id = phy_package_id; 1389 1394 new_package->nr_cpus = 1; 1390 - 1395 + /* use the first active cpu of the package to access */ 1396 + new_package->lead_cpu = i; 1391 1397 /* check if the package contains valid domains */ 1392 1398 if (rapl_detect_domains(new_package, i) || 1393 1399 rapl_defaults->check_unit(new_package, i)) { ··· 1444 1448 /* add the new package to the list */ 1445 1449 rp->id = phy_package_id; 1446 1450 rp->nr_cpus = 1; 1451 + rp->lead_cpu = cpu; 1452 + 1447 1453 /* check if the package contains valid domains */ 1448 1454 if (rapl_detect_domains(rp, cpu) || 1449 1455 rapl_defaults->check_unit(rp, cpu)) { ··· 1478 1480 unsigned long cpu = (unsigned long)hcpu; 1479 1481 int phy_package_id; 1480 1482 struct rapl_package *rp; 1483 + int lead_cpu; 1481 1484 1482 1485 phy_package_id = topology_physical_package_id(cpu); 1483 1486 switch (action) { ··· 1499 1500 break; 1500 1501 if (--rp->nr_cpus == 0) 1501 1502 rapl_remove_package(rp); 1503 + else if (cpu == rp->lead_cpu) { 1504 + /* choose another active cpu in the package */ 1505 + lead_cpu = cpumask_any_but(topology_core_cpumask(cpu), cpu); 1506 + if (lead_cpu < nr_cpu_ids) 1507 + rp->lead_cpu = lead_cpu; 1508 + else /* should never go here */ 1509 + pr_err("no active cpu available for package %d\n", 1510 + phy_package_id); 1511 + } 1502 1512 } 1503 1513 1504 1514 return NOTIFY_OK;

+1 -1

include/acpi/acoutput.h

··· 262 262 #define ACPI_GET_FUNCTION_NAME _acpi_function_name 263 263 264 264 /* 265 - * The Name parameter should be the procedure name as a quoted string. 265 + * The Name parameter should be the procedure name as a non-quoted string. 266 266 * The function name is also used by the function exit macros below. 267 267 * Note: (const char) is used to be compatible with the debug interfaces 268 268 * and macros such as __func__.

+2 -4

include/acpi/acpixf.h

··· 897 897 acpi_warning(const char *module_name, 898 898 u32 line_number, 899 899 const char *format, ...)) 900 - ACPI_MSG_DEPENDENT_RETURN_VOID(ACPI_PRINTF_LIKE(3) 900 + ACPI_MSG_DEPENDENT_RETURN_VOID(ACPI_PRINTF_LIKE(1) 901 901 void ACPI_INTERNAL_VAR_XFACE 902 - acpi_info(const char *module_name, 903 - u32 line_number, 904 - const char *format, ...)) 902 + acpi_info(const char *format, ...)) 905 903 ACPI_MSG_DEPENDENT_RETURN_VOID(ACPI_PRINTF_LIKE(3) 906 904 void ACPI_INTERNAL_VAR_XFACE 907 905 acpi_bios_error(const char *module_name,

+1 -8

include/acpi/processor.h

··· 9 9 #define ACPI_PROCESSOR_CLASS "processor" 10 10 #define ACPI_PROCESSOR_DEVICE_NAME "Processor" 11 11 #define ACPI_PROCESSOR_DEVICE_HID "ACPI0007" 12 + #define ACPI_PROCESSOR_CONTAINER_HID "ACPI0010" 12 13 13 14 #define ACPI_PROCESSOR_BUSY_METRIC 10 14 15 ··· 394 393 return -ENODEV; 395 394 } 396 395 #endif /* CONFIG_ACPI_PROCESSOR_IDLE */ 397 - 398 - #if defined(CONFIG_PM_SLEEP) & defined(CONFIG_ACPI_PROCESSOR_IDLE) 399 - void acpi_processor_syscore_init(void); 400 - void acpi_processor_syscore_exit(void); 401 - #else 402 - static inline void acpi_processor_syscore_init(void) {} 403 - static inline void acpi_processor_syscore_exit(void) {} 404 - #endif 405 396 406 397 /* in processor_thermal.c */ 407 398 int acpi_processor_get_limit_info(struct acpi_processor *pr);

+7 -40

include/linux/cpufreq.h

··· 80 80 unsigned int last_policy; /* policy before unplug */ 81 81 struct cpufreq_governor *governor; /* see below */ 82 82 void *governor_data; 83 - bool governor_enabled; /* governor start/stop flag */ 84 83 char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */ 85 84 86 85 struct work_struct update; /* if update_policy() needs to be ··· 99 100 * - Any routine that will write to the policy structure and/or may take away 100 101 * the policy altogether (eg. CPU hotplug), will hold this lock in write 101 102 * mode before doing so. 102 - * 103 - * Additional rules: 104 - * - Lock should not be held across 105 - * __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); 106 103 */ 107 104 struct rw_semaphore rwsem; 108 105 ··· 459 464 int cpufreq_register_governor(struct cpufreq_governor *governor); 460 465 void cpufreq_unregister_governor(struct cpufreq_governor *governor); 461 466 462 - /* CPUFREQ DEFAULT GOVERNOR */ 463 - /* 464 - * Performance governor is fallback governor if any other gov failed to auto 465 - * load due latency restrictions 466 - */ 467 - #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE 468 - extern struct cpufreq_governor cpufreq_gov_performance; 469 - #endif 470 - #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE 471 - #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_performance) 472 - #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE) 473 - extern struct cpufreq_governor cpufreq_gov_powersave; 474 - #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_powersave) 475 - #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE) 476 - extern struct cpufreq_governor cpufreq_gov_userspace; 477 - #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_userspace) 478 - #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND) 479 - extern struct cpufreq_governor cpufreq_gov_ondemand; 480 - #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_ondemand) 481 - #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) 482 - extern struct cpufreq_governor cpufreq_gov_conservative; 483 - #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) 484 - #endif 467 + struct cpufreq_governor *cpufreq_default_governor(void); 468 + struct cpufreq_governor *cpufreq_fallback_governor(void); 485 469 486 470 /********************************************************************* 487 471 * FREQUENCY TABLE HELPERS * ··· 499 525 } 500 526 #endif 501 527 502 - static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos) 503 - { 504 - while ((*pos)->frequency != CPUFREQ_TABLE_END) 505 - if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID) 506 - return true; 507 - else 508 - (*pos)++; 509 - return false; 510 - } 511 - 512 528 /* 513 529 * cpufreq_for_each_entry - iterate over a cpufreq_frequency_table 514 530 * @pos: the cpufreq_frequency_table * to use as a loop cursor. ··· 515 551 * @table: the cpufreq_frequency_table * to iterate over. 516 552 */ 517 553 518 - #define cpufreq_for_each_valid_entry(pos, table) \ 519 - for (pos = table; cpufreq_next_valid(&pos); pos++) 554 + #define cpufreq_for_each_valid_entry(pos, table) \ 555 + for (pos = table; pos->frequency != CPUFREQ_TABLE_END; pos++) \ 556 + if (pos->frequency == CPUFREQ_ENTRY_INVALID) \ 557 + continue; \ 558 + else 520 559 521 560 int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, 522 561 struct cpufreq_frequency_table *table);

+10

include/linux/interrupt.h

··· 125 125 126 126 extern irqreturn_t no_action(int cpl, void *dev_id); 127 127 128 + /* 129 + * If a (PCI) device interrupt is not connected we set dev->irq to 130 + * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we 131 + * can distingiush that case from other error returns. 132 + * 133 + * 0x80000000 is guaranteed to be outside the available range of interrupts 134 + * and easy to distinguish from other possible incorrect values. 135 + */ 136 + #define IRQ_NOTCONNECTED (1U << 31) 137 + 128 138 extern int __must_check 129 139 request_threaded_irq(unsigned int irq, irq_handler_t handler, 130 140 irq_handler_t thread_fn,

+11 -2

include/linux/pm_domain.h

··· 19 19 /* Defines used for the flags field in the struct generic_pm_domain */ 20 20 #define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ 21 21 22 + #define GENPD_MAX_NUM_STATES 8 /* Number of possible low power states */ 23 + 22 24 enum gpd_status { 23 25 GPD_STATE_ACTIVE = 0, /* PM domain is active */ 24 26 GPD_STATE_POWER_OFF, /* PM domain is off */ ··· 37 35 int (*save_state)(struct device *dev); 38 36 int (*restore_state)(struct device *dev); 39 37 bool (*active_wakeup)(struct device *dev); 38 + }; 39 + 40 + struct genpd_power_state { 41 + s64 power_off_latency_ns; 42 + s64 power_on_latency_ns; 40 43 }; 41 44 42 45 struct generic_pm_domain { ··· 61 54 unsigned int prepared_count; /* Suspend counter of prepared devices */ 62 55 bool suspend_power_off; /* Power status before system suspend */ 63 56 int (*power_off)(struct generic_pm_domain *domain); 64 - s64 power_off_latency_ns; 65 57 int (*power_on)(struct generic_pm_domain *domain); 66 - s64 power_on_latency_ns; 67 58 struct gpd_dev_ops dev_ops; 68 59 s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ 69 60 bool max_off_time_changed; ··· 71 66 void (*detach_dev)(struct generic_pm_domain *domain, 72 67 struct device *dev); 73 68 unsigned int flags; /* Bit field of configs for genpd */ 69 + struct genpd_power_state states[GENPD_MAX_NUM_STATES]; 70 + unsigned int state_count; /* number of states */ 71 + unsigned int state_idx; /* state that genpd will go to when off */ 72 + 74 73 }; 75 74 76 75 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)

+27

include/linux/pm_opp.h

··· 34 34 35 35 int dev_pm_opp_get_opp_count(struct device *dev); 36 36 unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); 37 + unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev); 38 + unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev); 37 39 struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev); 38 40 39 41 struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, ··· 62 60 void dev_pm_opp_put_supported_hw(struct device *dev); 63 61 int dev_pm_opp_set_prop_name(struct device *dev, const char *name); 64 62 void dev_pm_opp_put_prop_name(struct device *dev); 63 + int dev_pm_opp_set_regulator(struct device *dev, const char *name); 64 + void dev_pm_opp_put_regulator(struct device *dev); 65 + int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); 65 66 #else 66 67 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) 67 68 { ··· 87 82 } 88 83 89 84 static inline unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) 85 + { 86 + return 0; 87 + } 88 + 89 + static inline unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev) 90 + { 91 + return 0; 92 + } 93 + 94 + static inline unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev) 90 95 { 91 96 return 0; 92 97 } ··· 165 150 } 166 151 167 152 static inline void dev_pm_opp_put_prop_name(struct device *dev) {} 153 + 154 + static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name) 155 + { 156 + return -EINVAL; 157 + } 158 + 159 + static inline void dev_pm_opp_put_regulator(struct device *dev) {} 160 + 161 + static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) 162 + { 163 + return -EINVAL; 164 + } 168 165 169 166 #endif /* CONFIG_PM_OPP */ 170 167

+9

include/linux/sched.h

··· 3212 3212 return task_rlimit_max(current, limit); 3213 3213 } 3214 3214 3215 + #ifdef CONFIG_CPU_FREQ 3216 + struct update_util_data { 3217 + void (*func)(struct update_util_data *data, 3218 + u64 time, unsigned long util, unsigned long max); 3219 + }; 3220 + 3221 + void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); 3222 + #endif /* CONFIG_CPU_FREQ */ 3223 + 3215 3224 #endif

+22

include/trace/events/power.h

··· 38 38 TP_ARGS(state, cpu_id) 39 39 ); 40 40 41 + TRACE_EVENT(powernv_throttle, 42 + 43 + TP_PROTO(int chip_id, const char *reason, int pmax), 44 + 45 + TP_ARGS(chip_id, reason, pmax), 46 + 47 + TP_STRUCT__entry( 48 + __field(int, chip_id) 49 + __string(reason, reason) 50 + __field(int, pmax) 51 + ), 52 + 53 + TP_fast_assign( 54 + __entry->chip_id = chip_id; 55 + __assign_str(reason, reason); 56 + __entry->pmax = pmax; 57 + ), 58 + 59 + TP_printk("Chip %d Pmax %d %s", __entry->chip_id, 60 + __entry->pmax, __get_str(reason)) 61 + ); 62 + 41 63 TRACE_EVENT(pstate_sample, 42 64 43 65 TP_PROTO(u32 core_busy,

+8 -1

kernel/irq/manage.c

··· 1607 1607 struct irq_desc *desc; 1608 1608 int retval; 1609 1609 1610 + if (irq == IRQ_NOTCONNECTED) 1611 + return -ENOTCONN; 1612 + 1610 1613 /* 1611 1614 * Sanity-check: shared interrupts must pass in a real dev-ID, 1612 1615 * otherwise we'll have trouble later trying to figure out ··· 1700 1697 int request_any_context_irq(unsigned int irq, irq_handler_t handler, 1701 1698 unsigned long flags, const char *name, void *dev_id) 1702 1699 { 1703 - struct irq_desc *desc = irq_to_desc(irq); 1700 + struct irq_desc *desc; 1704 1701 int ret; 1705 1702 1703 + if (irq == IRQ_NOTCONNECTED) 1704 + return -ENOTCONN; 1705 + 1706 + desc = irq_to_desc(irq); 1706 1707 if (!desc) 1707 1708 return -EINVAL; 1708 1709

+5 -7

kernel/power/process.c

··· 30 30 unsigned long end_time; 31 31 unsigned int todo; 32 32 bool wq_busy = false; 33 - struct timeval start, end; 34 - u64 elapsed_msecs64; 33 + ktime_t start, end, elapsed; 35 34 unsigned int elapsed_msecs; 36 35 bool wakeup = false; 37 36 int sleep_usecs = USEC_PER_MSEC; 38 37 39 - do_gettimeofday(&start); 38 + start = ktime_get_boottime(); 40 39 41 40 end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs); 42 41 ··· 77 78 sleep_usecs *= 2; 78 79 } 79 80 80 - do_gettimeofday(&end); 81 - elapsed_msecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); 82 - do_div(elapsed_msecs64, NSEC_PER_MSEC); 83 - elapsed_msecs = elapsed_msecs64; 81 + end = ktime_get_boottime(); 82 + elapsed = ktime_sub(end, start); 83 + elapsed_msecs = ktime_to_ms(elapsed); 84 84 85 85 if (todo) { 86 86 pr_cont("\n");

+3 -3

kernel/power/suspend.c

··· 248 248 { 249 249 #ifdef CONFIG_PM_DEBUG 250 250 if (pm_test_level == level) { 251 - printk(KERN_INFO "suspend debug: Waiting for %d second(s).\n", 251 + pr_info("suspend debug: Waiting for %d second(s).\n", 252 252 pm_test_delay); 253 253 mdelay(pm_test_delay * 1000); 254 254 return 1; ··· 320 320 321 321 error = dpm_suspend_late(PMSG_SUSPEND); 322 322 if (error) { 323 - printk(KERN_ERR "PM: late suspend of devices failed\n"); 323 + pr_err("PM: late suspend of devices failed\n"); 324 324 goto Platform_finish; 325 325 } 326 326 error = platform_suspend_prepare_late(state); ··· 329 329 330 330 error = dpm_suspend_noirq(PMSG_SUSPEND); 331 331 if (error) { 332 - printk(KERN_ERR "PM: noirq suspend of devices failed\n"); 332 + pr_err("PM: noirq suspend of devices failed\n"); 333 333 goto Platform_early_resume; 334 334 } 335 335 error = platform_suspend_prepare_noirq(state);

+1

kernel/sched/Makefile

··· 19 19 obj-$(CONFIG_SCHEDSTATS) += stats.o 20 20 obj-$(CONFIG_SCHED_DEBUG) += debug.o 21 21 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o 22 + obj-$(CONFIG_CPU_FREQ) += cpufreq.o

+37

kernel/sched/cpufreq.c

··· 1 + /* 2 + * Scheduler code and data structures related to cpufreq. 3 + * 4 + * Copyright (C) 2016, Intel Corporation 5 + * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + */ 11 + 12 + #include "sched.h" 13 + 14 + DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); 15 + 16 + /** 17 + * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer. 18 + * @cpu: The CPU to set the pointer for. 19 + * @data: New pointer value. 20 + * 21 + * Set and publish the update_util_data pointer for the given CPU. That pointer 22 + * points to a struct update_util_data object containing a callback function 23 + * to call from cpufreq_update_util(). That function will be called from an RCU 24 + * read-side critical section, so it must not sleep. 25 + * 26 + * Callers must use RCU-sched callbacks to free any memory that might be 27 + * accessed via the old update_util_data pointer or invoke synchronize_sched() 28 + * right after this function to avoid use-after-free. 29 + */ 30 + void cpufreq_set_update_util_data(int cpu, struct update_util_data *data) 31 + { 32 + if (WARN_ON(data && !data->func)) 33 + return; 34 + 35 + rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data); 36 + } 37 + EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);

+4

kernel/sched/deadline.c

··· 717 717 if (!dl_task(curr) || !on_dl_rq(dl_se)) 718 718 return; 719 719 720 + /* Kick cpufreq (see the comment in linux/cpufreq.h). */ 721 + if (cpu_of(rq) == smp_processor_id()) 722 + cpufreq_trigger_update(rq_clock(rq)); 723 + 720 724 /* 721 725 * Consumed budget is computed considering the time as 722 726 * observed by schedulable tasks (excluding time spent

+25 -1

kernel/sched/fair.c

··· 2856 2856 { 2857 2857 struct cfs_rq *cfs_rq = cfs_rq_of(se); 2858 2858 u64 now = cfs_rq_clock_task(cfs_rq); 2859 - int cpu = cpu_of(rq_of(cfs_rq)); 2859 + struct rq *rq = rq_of(cfs_rq); 2860 + int cpu = cpu_of(rq); 2860 2861 2861 2862 /* 2862 2863 * Track task load average for carrying it to new CPU after migrated, and ··· 2869 2868 2870 2869 if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) 2871 2870 update_tg_load_avg(cfs_rq, 0); 2871 + 2872 + if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) { 2873 + unsigned long max = rq->cpu_capacity_orig; 2874 + 2875 + /* 2876 + * There are a few boundary cases this might miss but it should 2877 + * get called often enough that that should (hopefully) not be 2878 + * a real problem -- added to that it only calls on the local 2879 + * CPU, so if we enqueue remotely we'll miss an update, but 2880 + * the next tick/schedule should update. 2881 + * 2882 + * It will not get called when we go idle, because the idle 2883 + * thread is a different class (!fair), nor will the utilization 2884 + * number include things like RT tasks. 2885 + * 2886 + * As is, the util number is not freq-invariant (we'd have to 2887 + * implement arch_scale_freq_capacity() for that). 2888 + * 2889 + * See cpu_util(). 2890 + */ 2891 + cpufreq_update_util(rq_clock(rq), 2892 + min(cfs_rq->avg.util_avg, max), max); 2893 + } 2872 2894 } 2873 2895 2874 2896 static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)

+4

kernel/sched/rt.c

··· 953 953 if (curr->sched_class != &rt_sched_class) 954 954 return; 955 955 956 + /* Kick cpufreq (see the comment in linux/cpufreq.h). */ 957 + if (cpu_of(rq) == smp_processor_id()) 958 + cpufreq_trigger_update(rq_clock(rq)); 959 + 956 960 delta_exec = rq_clock_task(rq) - curr->se.exec_start; 957 961 if (unlikely((s64)delta_exec <= 0)) 958 962 return;

+48

kernel/sched/sched.h

··· 1793 1793 } 1794 1794 #endif /* CONFIG_64BIT */ 1795 1795 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 1796 + 1797 + #ifdef CONFIG_CPU_FREQ 1798 + DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); 1799 + 1800 + /** 1801 + * cpufreq_update_util - Take a note about CPU utilization changes. 1802 + * @time: Current time. 1803 + * @util: Current utilization. 1804 + * @max: Utilization ceiling. 1805 + * 1806 + * This function is called by the scheduler on every invocation of 1807 + * update_load_avg() on the CPU whose utilization is being updated. 1808 + * 1809 + * It can only be called from RCU-sched read-side critical sections. 1810 + */ 1811 + static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) 1812 + { 1813 + struct update_util_data *data; 1814 + 1815 + data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); 1816 + if (data) 1817 + data->func(data, time, util, max); 1818 + } 1819 + 1820 + /** 1821 + * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. 1822 + * @time: Current time. 1823 + * 1824 + * The way cpufreq is currently arranged requires it to evaluate the CPU 1825 + * performance state (frequency/voltage) on a regular basis to prevent it from 1826 + * being stuck in a completely inadequate performance level for too long. 1827 + * That is not guaranteed to happen if the updates are only triggered from CFS, 1828 + * though, because they may not be coming in if RT or deadline tasks are active 1829 + * all the time (or there are RT and DL tasks only). 1830 + * 1831 + * As a workaround for that issue, this function is called by the RT and DL 1832 + * sched classes to trigger extra cpufreq updates to prevent it from stalling, 1833 + * but that really is a band-aid. Going forward it should be replaced with 1834 + * solutions targeted more specifically at RT and DL tasks. 1835 + */ 1836 + static inline void cpufreq_trigger_update(u64 time) 1837 + { 1838 + cpufreq_update_util(time, ULONG_MAX, 0); 1839 + } 1840 + #else 1841 + static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {} 1842 + static inline void cpufreq_trigger_update(u64 time) {} 1843 + #endif /* CONFIG_CPU_FREQ */

+1

kernel/trace/power-traces.c

··· 15 15 16 16 EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume); 17 17 EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); 18 + EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle); 18 19

+19 -13

tools/power/x86/turbostat/turbostat.8

··· 34 34 \fB--debug\fP displays additional system configuration information. Invoking this parameter 35 35 more than once may also enable internal turbostat debug information. 36 36 .PP 37 - \fB--interval seconds\fP overrides the default 5-second measurement interval. 37 + \fB--interval seconds\fP overrides the default 5.0 second measurement interval. 38 + .PP 39 + \fB--out output_file\fP turbostat output is written to the specified output_file. 40 + The file is truncated if it already exists, and it is created if it does not exist. 38 41 .PP 39 42 \fB--help\fP displays usage for the most common parameters. 40 43 .PP ··· 64 61 .nf 65 62 \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. 66 63 \fBAVG_MHz\fP number of cycles executed divided by time elapsed. 67 - \fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. 64 + \fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. 68 65 \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). 69 66 \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. 70 67 .fi ··· 86 83 \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. 87 84 .fi 88 85 .PP 89 - .SH EXAMPLE 86 + .SH PERIODIC EXAMPLE 90 87 Without any parameters, turbostat displays statistics ever 5 seconds. 91 - (override interval with "-i sec" option, or specify a command 92 - for turbostat to fork). 88 + Periodic output goes to stdout, by default, unless --out is used to specify an output file. 89 + The 5-second interval can be changed with th "-i sec" option. 90 + Or a command may be specified as in "FORK EXAMPLE" below. 93 91 .nf 94 92 [root@hsw]# ./turbostat 95 - CPU Avg_MHz %Busy Bzy_MHz TSC_MHz 93 + CPU Avg_MHz Busy% Bzy_MHz TSC_MHz 96 94 - 488 12.51 3898 3498 97 95 0 0 0.01 3885 3498 98 96 4 3897 99.99 3898 3498 ··· 149 145 cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) 150 146 cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) 151 147 cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) 152 - Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt 148 + Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt 153 149 - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 154 150 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 155 151 0 4 3897 99.98 3898 3498 0 0.02 ··· 175 171 See the field definitions above. 176 172 .SH FORK EXAMPLE 177 173 If turbostat is invoked with a command, it will fork that command 178 - and output the statistics gathered when the command exits. 174 + and output the statistics gathered after the command exits. 175 + In this case, turbostat output goes to stderr, by default. 176 + Output can instead be saved to a file using the --out option. 179 177 eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds 180 178 until ^C while the other CPUs are mostly idle: 181 179 182 180 .nf 183 181 root@hsw: turbostat cat /dev/zero > /dev/null 184 182 ^C 185 - CPU Avg_MHz %Busy Bzy_MHz TSC_MHz 183 + CPU Avg_MHz Busy% Bzy_MHz TSC_MHz 186 184 - 482 12.51 3854 3498 187 185 0 0 0.01 1960 3498 188 186 4 0 0.00 2128 3498 ··· 198 192 199 193 .fi 200 194 Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. 201 - The first row shows the average MHz and %Busy across all the processors in the system. 195 + The first row shows the average MHz and Busy% across all the processors in the system. 202 196 203 197 Note that the Avg_MHz column reflects the total number of cycles executed 204 - divided by the measurement interval. If the %Busy column is 100%, 198 + divided by the measurement interval. If the Busy% column is 100%, 205 199 then the processor was running at that speed the entire interval. 206 - The Avg_MHz multiplied by the %Busy results in the Bzy_MHz -- 200 + The Avg_MHz multiplied by the Busy% results in the Bzy_MHz -- 207 201 which is the average frequency while the processor was executing -- 208 202 not including any non-busy idle time. 209 203 ··· 239 233 the TSC stops in idle, TSC_MHz will drop 240 234 below the processor's base frequency. 241 235 242 - %Busy = MPERF_delta/TSC_delta 236 + Busy% = MPERF_delta/TSC_delta 243 237 244 238 Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval 245 239

+699 -178

tools/power/x86/turbostat/turbostat.c

··· 38 38 #include <string.h> 39 39 #include <ctype.h> 40 40 #include <sched.h> 41 + #include <time.h> 41 42 #include <cpuid.h> 42 43 #include <linux/capability.h> 43 44 #include <errno.h> 44 45 45 46 char *proc_stat = "/proc/stat"; 46 - unsigned int interval_sec = 5; 47 + FILE *outf; 48 + int *fd_percpu; 49 + struct timespec interval_ts = {5, 0}; 47 50 unsigned int debug; 48 51 unsigned int rapl_joules; 49 52 unsigned int summary_only; ··· 75 72 unsigned int extra_delta_offset32; 76 73 unsigned int extra_delta_offset64; 77 74 unsigned int aperf_mperf_multiplier = 1; 75 + int do_irq = 1; 78 76 int do_smi; 79 77 double bclk; 80 78 double base_hz; ··· 90 86 unsigned int do_rapl; 91 87 unsigned int do_dts; 92 88 unsigned int do_ptm; 89 + unsigned int do_gfx_rc6_ms; 90 + unsigned long long gfx_cur_rc6_ms; 91 + unsigned int do_gfx_mhz; 92 + unsigned int gfx_cur_mhz; 93 93 unsigned int tcc_activation_temp; 94 94 unsigned int tcc_activation_temp_override; 95 95 double rapl_power_units, rapl_time_units; ··· 106 98 unsigned long long tsc_hz; 107 99 int base_cpu; 108 100 double discover_bclk(unsigned int family, unsigned int model); 101 + unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ 102 + /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ 103 + unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ 104 + unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ 105 + unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 106 + unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 109 107 110 108 #define RAPL_PKG (1 << 0) 111 109 /* 0x610 MSR_PKG_POWER_LIMIT */ ··· 159 145 unsigned long long extra_delta64; 160 146 unsigned long long extra_msr32; 161 147 unsigned long long extra_delta32; 148 + unsigned int irq_count; 162 149 unsigned int smi_count; 163 150 unsigned int cpu_id; 164 151 unsigned int flags; ··· 187 172 unsigned long long pkg_any_core_c0; 188 173 unsigned long long pkg_any_gfxe_c0; 189 174 unsigned long long pkg_both_core_gfxe_c0; 175 + unsigned long long gfx_rc6_ms; 176 + unsigned int gfx_mhz; 190 177 unsigned int package_id; 191 178 unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 192 179 unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ ··· 228 211 } topo; 229 212 230 213 struct timeval tv_even, tv_odd, tv_delta; 214 + 215 + int *irq_column_2_cpu; /* /proc/interrupts column numbers */ 216 + int *irqs_per_cpu; /* indexed by cpu_num */ 231 217 232 218 void setup_all_buffers(void); 233 219 ··· 282 262 else 283 263 return 0; 284 264 } 285 - 286 - int get_msr(int cpu, off_t offset, unsigned long long *msr) 265 + int get_msr_fd(int cpu) 287 266 { 288 - ssize_t retval; 289 267 char pathname[32]; 290 268 int fd; 269 + 270 + fd = fd_percpu[cpu]; 271 + 272 + if (fd) 273 + return fd; 291 274 292 275 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 293 276 fd = open(pathname, O_RDONLY); 294 277 if (fd < 0) 295 278 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); 296 279 297 - retval = pread(fd, msr, sizeof *msr, offset); 298 - close(fd); 280 + fd_percpu[cpu] = fd; 281 + 282 + return fd; 283 + } 284 + 285 + int get_msr(int cpu, off_t offset, unsigned long long *msr) 286 + { 287 + ssize_t retval; 288 + 289 + retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); 299 290 300 291 if (retval != sizeof *msr) 301 - err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); 292 + err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset); 302 293 303 294 return 0; 304 295 } ··· 317 286 /* 318 287 * Example Format w/ field column widths: 319 288 * 320 - * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt 321 - * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 289 + * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt 290 + * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 322 291 */ 323 292 324 293 void print_header(void) ··· 332 301 if (has_aperf) 333 302 outp += sprintf(outp, " Avg_MHz"); 334 303 if (has_aperf) 335 - outp += sprintf(outp, " %%Busy"); 304 + outp += sprintf(outp, " Busy%%"); 336 305 if (has_aperf) 337 306 outp += sprintf(outp, " Bzy_MHz"); 338 307 outp += sprintf(outp, " TSC_MHz"); ··· 349 318 if (!debug) 350 319 goto done; 351 320 321 + if (do_irq) 322 + outp += sprintf(outp, " IRQ"); 352 323 if (do_smi) 353 324 outp += sprintf(outp, " SMI"); 354 325 ··· 367 334 outp += sprintf(outp, " CoreTmp"); 368 335 if (do_ptm) 369 336 outp += sprintf(outp, " PkgTmp"); 337 + 338 + if (do_gfx_rc6_ms) 339 + outp += sprintf(outp, " GFX%%rc6"); 340 + 341 + if (do_gfx_mhz) 342 + outp += sprintf(outp, " GFXMHz"); 370 343 371 344 if (do_skl_residency) { 372 345 outp += sprintf(outp, " Totl%%C0"); ··· 448 409 extra_msr_offset32, t->extra_msr32); 449 410 outp += sprintf(outp, "msr0x%x: %016llX\n", 450 411 extra_msr_offset64, t->extra_msr64); 412 + if (do_irq) 413 + outp += sprintf(outp, "IRQ: %08X\n", t->irq_count); 451 414 if (do_smi) 452 415 outp += sprintf(outp, "SMI: %08X\n", t->smi_count); 453 416 } ··· 545 504 outp += sprintf(outp, "%8.0f", 546 505 1.0 / units * t->aperf / interval_float); 547 506 548 - /* %Busy */ 507 + /* Busy% */ 549 508 if (has_aperf) { 550 509 if (!skip_c0) 551 510 outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); ··· 583 542 if (!debug) 584 543 goto done; 585 544 545 + /* IRQ */ 546 + if (do_irq) 547 + outp += sprintf(outp, "%8d", t->irq_count); 548 + 586 549 /* SMI */ 587 550 if (do_smi) 588 551 outp += sprintf(outp, "%8d", t->smi_count); ··· 619 574 /* PkgTmp */ 620 575 if (do_ptm) 621 576 outp += sprintf(outp, "%8d", p->pkg_temp_c); 577 + 578 + /* GFXrc6 */ 579 + if (do_gfx_rc6_ms) 580 + outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float); 581 + 582 + /* GFXMHz */ 583 + if (do_gfx_mhz) 584 + outp += sprintf(outp, "%8d", p->gfx_mhz); 622 585 623 586 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 624 587 if (do_skl_residency) { ··· 698 645 return 0; 699 646 } 700 647 701 - void flush_stdout() 648 + void flush_output_stdout(void) 702 649 { 703 - fputs(output_buffer, stdout); 704 - fflush(stdout); 650 + FILE *filep; 651 + 652 + if (outf == stderr) 653 + filep = stdout; 654 + else 655 + filep = outf; 656 + 657 + fputs(output_buffer, filep); 658 + fflush(filep); 659 + 705 660 outp = output_buffer; 706 661 } 707 - void flush_stderr() 662 + void flush_output_stderr(void) 708 663 { 709 - fputs(output_buffer, stderr); 664 + fputs(output_buffer, outf); 665 + fflush(outf); 710 666 outp = output_buffer; 711 667 } 712 668 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) ··· 766 704 old->pc10 = new->pc10 - old->pc10; 767 705 old->pkg_temp_c = new->pkg_temp_c; 768 706 707 + old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; 708 + old->gfx_mhz = new->gfx_mhz; 709 + 769 710 DELTA_WRAP32(new->energy_pkg, old->energy_pkg); 770 711 DELTA_WRAP32(new->energy_cores, old->energy_cores); 771 712 DELTA_WRAP32(new->energy_gfx, old->energy_gfx); ··· 810 745 } else { 811 746 812 747 if (!aperf_mperf_unstable) { 813 - fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); 814 - fprintf(stderr, "* Frequency results do not cover entire interval *\n"); 815 - fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); 748 + fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname); 749 + fprintf(outf, "* Frequency results do not cover entire interval *\n"); 750 + fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n"); 816 751 817 752 aperf_mperf_unstable = 1; 818 753 } ··· 847 782 } 848 783 849 784 if (old->mperf == 0) { 850 - if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); 785 + if (debug > 1) 786 + fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); 851 787 old->mperf = 1; /* divide by 0 protection */ 852 788 } 853 789 ··· 862 796 */ 863 797 old->extra_msr32 = new->extra_msr32; 864 798 old->extra_msr64 = new->extra_msr64; 799 + 800 + if (do_irq) 801 + old->irq_count = new->irq_count - old->irq_count; 865 802 866 803 if (do_smi) 867 804 old->smi_count = new->smi_count - old->smi_count; ··· 895 826 t->mperf = 0; 896 827 t->c1 = 0; 897 828 898 - t->smi_count = 0; 899 829 t->extra_delta32 = 0; 900 830 t->extra_delta64 = 0; 831 + 832 + t->irq_count = 0; 833 + t->smi_count = 0; 901 834 902 835 /* tells format_counters to dump all fields from this set */ 903 836 t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; ··· 932 861 p->rapl_pkg_perf_status = 0; 933 862 p->rapl_dram_perf_status = 0; 934 863 p->pkg_temp_c = 0; 864 + 865 + p->gfx_rc6_ms = 0; 866 + p->gfx_mhz = 0; 935 867 } 936 868 int sum_counters(struct thread_data *t, struct core_data *c, 937 869 struct pkg_data *p) ··· 946 872 947 873 average.threads.extra_delta32 += t->extra_delta32; 948 874 average.threads.extra_delta64 += t->extra_delta64; 875 + 876 + average.threads.irq_count += t->irq_count; 877 + average.threads.smi_count += t->smi_count; 949 878 950 879 /* sum per-core values only for 1st thread in core */ 951 880 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) ··· 986 909 average.packages.energy_dram += p->energy_dram; 987 910 average.packages.energy_cores += p->energy_cores; 988 911 average.packages.energy_gfx += p->energy_gfx; 912 + 913 + average.packages.gfx_rc6_ms = p->gfx_rc6_ms; 914 + average.packages.gfx_mhz = p->gfx_mhz; 989 915 990 916 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 991 917 ··· 1050 970 return low | ((unsigned long long)high) << 32; 1051 971 } 1052 972 1053 - 1054 973 /* 1055 974 * get_counters(...) 1056 975 * migrate to cpu ··· 1059 980 { 1060 981 int cpu = t->cpu_id; 1061 982 unsigned long long msr; 983 + int aperf_mperf_retry_count = 0; 1062 984 1063 985 if (cpu_migrate(cpu)) { 1064 - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 986 + fprintf(outf, "Could not migrate to CPU %d\n", cpu); 1065 987 return -1; 1066 988 } 1067 989 990 + retry: 1068 991 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 1069 992 1070 993 if (has_aperf) { 994 + unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; 995 + 996 + /* 997 + * The TSC, APERF and MPERF must be read together for 998 + * APERF/MPERF and MPERF/TSC to give accurate results. 999 + * 1000 + * Unfortunately, APERF and MPERF are read by 1001 + * individual system call, so delays may occur 1002 + * between them. If the time to read them 1003 + * varies by a large amount, we re-read them. 1004 + */ 1005 + 1006 + /* 1007 + * This initial dummy APERF read has been seen to 1008 + * reduce jitter in the subsequent reads. 1009 + */ 1010 + 1071 1011 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) 1072 1012 return -3; 1013 + 1014 + t->tsc = rdtsc(); /* re-read close to APERF */ 1015 + 1016 + tsc_before = t->tsc; 1017 + 1018 + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) 1019 + return -3; 1020 + 1021 + tsc_between = rdtsc(); 1022 + 1073 1023 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) 1074 1024 return -4; 1025 + 1026 + tsc_after = rdtsc(); 1027 + 1028 + aperf_time = tsc_between - tsc_before; 1029 + mperf_time = tsc_after - tsc_between; 1030 + 1031 + /* 1032 + * If the system call latency to read APERF and MPERF 1033 + * differ by more than 2x, then try again. 1034 + */ 1035 + if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { 1036 + aperf_mperf_retry_count++; 1037 + if (aperf_mperf_retry_count < 5) 1038 + goto retry; 1039 + else 1040 + warnx("cpu%d jitter %lld %lld", 1041 + cpu, aperf_time, mperf_time); 1042 + } 1043 + aperf_mperf_retry_count = 0; 1044 + 1075 1045 t->aperf = t->aperf * aperf_mperf_multiplier; 1076 1046 t->mperf = t->mperf * aperf_mperf_multiplier; 1077 1047 } 1078 1048 1049 + if (do_irq) 1050 + t->irq_count = irqs_per_cpu[cpu]; 1079 1051 if (do_smi) { 1080 1052 if (get_msr(cpu, MSR_SMI_COUNT, &msr)) 1081 1053 return -5; ··· 1254 1124 return -17; 1255 1125 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); 1256 1126 } 1127 + 1128 + if (do_gfx_rc6_ms) 1129 + p->gfx_rc6_ms = gfx_cur_rc6_ms; 1130 + 1131 + if (do_gfx_mhz) 1132 + p->gfx_mhz = gfx_cur_mhz; 1133 + 1257 1134 return 0; 1258 1135 } 1259 1136 ··· 1312 1175 1313 1176 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 1314 1177 1315 - fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 1178 + fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 1316 1179 1317 1180 ratio = (msr >> 40) & 0xFF; 1318 - fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", 1181 + fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n", 1319 1182 ratio, bclk, ratio * bclk); 1320 1183 1321 1184 ratio = (msr >> 8) & 0xFF; 1322 - fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", 1185 + fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n", 1323 1186 ratio, bclk, ratio * bclk); 1324 1187 1325 1188 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 1326 - fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 1189 + fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 1327 1190 base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); 1328 1191 1329 1192 return; ··· 1337 1200 1338 1201 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); 1339 1202 1340 - fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); 1203 + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); 1341 1204 1342 1205 ratio = (msr >> 8) & 0xFF; 1343 1206 if (ratio) 1344 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", 1207 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", 1345 1208 ratio, bclk, ratio * bclk); 1346 1209 1347 1210 ratio = (msr >> 0) & 0xFF; 1348 1211 if (ratio) 1349 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", 1212 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", 1350 1213 ratio, bclk, ratio * bclk); 1351 1214 return; 1352 1215 } ··· 1359 1222 1360 1223 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); 1361 1224 1362 - fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); 1225 + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); 1363 1226 1364 1227 ratio = (msr >> 56) & 0xFF; 1365 1228 if (ratio) 1366 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", 1229 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", 1367 1230 ratio, bclk, ratio * bclk); 1368 1231 1369 1232 ratio = (msr >> 48) & 0xFF; 1370 1233 if (ratio) 1371 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", 1234 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", 1372 1235 ratio, bclk, ratio * bclk); 1373 1236 1374 1237 ratio = (msr >> 40) & 0xFF; 1375 1238 if (ratio) 1376 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", 1239 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", 1377 1240 ratio, bclk, ratio * bclk); 1378 1241 1379 1242 ratio = (msr >> 32) & 0xFF; 1380 1243 if (ratio) 1381 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", 1244 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", 1382 1245 ratio, bclk, ratio * bclk); 1383 1246 1384 1247 ratio = (msr >> 24) & 0xFF; 1385 1248 if (ratio) 1386 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", 1249 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", 1387 1250 ratio, bclk, ratio * bclk); 1388 1251 1389 1252 ratio = (msr >> 16) & 0xFF; 1390 1253 if (ratio) 1391 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", 1254 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", 1392 1255 ratio, bclk, ratio * bclk); 1393 1256 1394 1257 ratio = (msr >> 8) & 0xFF; 1395 1258 if (ratio) 1396 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", 1259 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", 1397 1260 ratio, bclk, ratio * bclk); 1398 1261 1399 1262 ratio = (msr >> 0) & 0xFF; 1400 1263 if (ratio) 1401 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", 1264 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", 1402 1265 ratio, bclk, ratio * bclk); 1403 1266 return; 1404 1267 } ··· 1411 1274 1412 1275 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 1413 1276 1414 - fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 1277 + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 1415 1278 1416 1279 ratio = (msr >> 56) & 0xFF; 1417 1280 if (ratio) 1418 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", 1281 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", 1419 1282 ratio, bclk, ratio * bclk); 1420 1283 1421 1284 ratio = (msr >> 48) & 0xFF; 1422 1285 if (ratio) 1423 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", 1286 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", 1424 1287 ratio, bclk, ratio * bclk); 1425 1288 1426 1289 ratio = (msr >> 40) & 0xFF; 1427 1290 if (ratio) 1428 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", 1291 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", 1429 1292 ratio, bclk, ratio * bclk); 1430 1293 1431 1294 ratio = (msr >> 32) & 0xFF; 1432 1295 if (ratio) 1433 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", 1296 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", 1434 1297 ratio, bclk, ratio * bclk); 1435 1298 1436 1299 ratio = (msr >> 24) & 0xFF; 1437 1300 if (ratio) 1438 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", 1301 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", 1439 1302 ratio, bclk, ratio * bclk); 1440 1303 1441 1304 ratio = (msr >> 16) & 0xFF; 1442 1305 if (ratio) 1443 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", 1306 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", 1444 1307 ratio, bclk, ratio * bclk); 1445 1308 1446 1309 ratio = (msr >> 8) & 0xFF; 1447 1310 if (ratio) 1448 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", 1311 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", 1449 1312 ratio, bclk, ratio * bclk); 1450 1313 1451 1314 ratio = (msr >> 0) & 0xFF; 1452 1315 if (ratio) 1453 - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", 1316 + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", 1454 1317 ratio, bclk, ratio * bclk); 1455 1318 return; 1456 1319 } ··· 1458 1321 static void 1459 1322 dump_knl_turbo_ratio_limits(void) 1460 1323 { 1461 - int cores; 1462 - unsigned int ratio; 1324 + const unsigned int buckets_no = 7; 1325 + 1463 1326 unsigned long long msr; 1464 - int delta_cores; 1465 - int delta_ratio; 1466 - int i; 1327 + int delta_cores, delta_ratio; 1328 + int i, b_nr; 1329 + unsigned int cores[buckets_no]; 1330 + unsigned int ratio[buckets_no]; 1467 1331 1468 1332 get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); 1469 1333 1470 - fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", 1334 + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", 1471 1335 base_cpu, msr); 1472 1336 1473 1337 /** 1474 1338 * Turbo encoding in KNL is as follows: 1475 - * [7:0] -- Base value of number of active cores of bucket 1. 1339 + * [0] -- Reserved 1340 + * [7:1] -- Base value of number of active cores of bucket 1. 1476 1341 * [15:8] -- Base value of freq ratio of bucket 1. 1477 1342 * [20:16] -- +ve delta of number of active cores of bucket 2. 1478 1343 * i.e. active cores of bucket 2 = ··· 1493 1354 * [60:56]-- +ve delta of number of active cores of bucket 7. 1494 1355 * [63:61]-- -ve delta of freq ratio of bucket 7. 1495 1356 */ 1496 - cores = msr & 0xFF; 1497 - ratio = (msr >> 8) && 0xFF; 1498 - if (ratio > 0) 1499 - fprintf(stderr, 1500 - "%d * %.0f = %.0f MHz max turbo %d active cores\n", 1501 - ratio, bclk, ratio * bclk, cores); 1502 1357 1503 - for (i = 16; i < 64; i = i + 8) { 1358 + b_nr = 0; 1359 + cores[b_nr] = (msr & 0xFF) >> 1; 1360 + ratio[b_nr] = (msr >> 8) & 0xFF; 1361 + 1362 + for (i = 16; i < 64; i += 8) { 1504 1363 delta_cores = (msr >> i) & 0x1F; 1505 - delta_ratio = (msr >> (i + 5)) && 0x7; 1506 - if (!delta_cores || !delta_ratio) 1507 - return; 1508 - cores = cores + delta_cores; 1509 - ratio = ratio - delta_ratio; 1364 + delta_ratio = (msr >> (i + 5)) & 0x7; 1510 1365 1511 - /** -ve ratios will make successive ratio calculations 1512 - * negative. Hence return instead of carrying on. 1513 - */ 1514 - if (ratio > 0) 1515 - fprintf(stderr, 1516 - "%d * %.0f = %.0f MHz max turbo %d active cores\n", 1517 - ratio, bclk, ratio * bclk, cores); 1366 + cores[b_nr + 1] = cores[b_nr] + delta_cores; 1367 + ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; 1368 + b_nr++; 1518 1369 } 1370 + 1371 + for (i = buckets_no - 1; i >= 0; i--) 1372 + if (i > 0 ? ratio[i] != ratio[i - 1] : 1) 1373 + fprintf(outf, 1374 + "%d * %.0f = %.0f MHz max turbo %d active cores\n", 1375 + ratio[i], bclk, ratio[i] * bclk, cores[i]); 1519 1376 } 1520 1377 1521 1378 static void ··· 1524 1389 #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) 1525 1390 #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) 1526 1391 1527 - fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); 1392 + fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); 1528 1393 1529 - fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", 1394 + fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", 1530 1395 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 1531 1396 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 1532 1397 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 1533 1398 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 1534 1399 (msr & (1 << 15)) ? "" : "UN", 1535 - (unsigned int)msr & 7, 1400 + (unsigned int)msr & 0xF, 1536 1401 pkg_cstate_limit_strings[pkg_cstate_limit]); 1537 1402 return; 1538 1403 } ··· 1543 1408 unsigned long long msr; 1544 1409 1545 1410 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); 1546 - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); 1547 - fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF); 1411 + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); 1412 + fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); 1548 1413 1549 1414 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); 1550 - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); 1415 + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); 1551 1416 if (msr) { 1552 - fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF); 1553 - fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF); 1554 - fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); 1555 - fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF); 1417 + fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 1418 + fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 1419 + fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 1420 + fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); 1556 1421 } 1557 - fprintf(stderr, ")\n"); 1422 + fprintf(outf, ")\n"); 1558 1423 1559 1424 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); 1560 - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); 1425 + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); 1561 1426 if (msr) { 1562 - fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF); 1563 - fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF); 1564 - fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); 1565 - fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF); 1427 + fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 1428 + fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 1429 + fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 1430 + fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); 1566 1431 } 1567 - fprintf(stderr, ")\n"); 1432 + fprintf(outf, ")\n"); 1568 1433 1569 1434 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); 1570 - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); 1435 + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); 1571 1436 if ((msr) & 0x3) 1572 - fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); 1573 - fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); 1574 - fprintf(stderr, ")\n"); 1575 - 1437 + fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); 1438 + fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 1439 + fprintf(outf, ")\n"); 1440 + 1576 1441 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); 1577 - fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); 1578 - fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); 1579 - fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); 1580 - fprintf(stderr, ")\n"); 1442 + fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); 1443 + fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); 1444 + fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 1445 + fprintf(outf, ")\n"); 1446 + } 1447 + void free_fd_percpu(void) 1448 + { 1449 + int i; 1450 + 1451 + for (i = 0; i < topo.max_cpu_num; ++i) { 1452 + if (fd_percpu[i] != 0) 1453 + close(fd_percpu[i]); 1454 + } 1455 + 1456 + free(fd_percpu); 1581 1457 } 1582 1458 1583 1459 void free_all_buffers(void) 1584 1460 { 1585 1461 CPU_FREE(cpu_present_set); 1586 1462 cpu_present_set = NULL; 1587 - cpu_present_set = 0; 1463 + cpu_present_setsize = 0; 1588 1464 1589 1465 CPU_FREE(cpu_affinity_set); 1590 1466 cpu_affinity_set = NULL; ··· 1620 1474 free(output_buffer); 1621 1475 output_buffer = NULL; 1622 1476 outp = NULL; 1477 + 1478 + free_fd_percpu(); 1479 + 1480 + free(irq_column_2_cpu); 1481 + free(irqs_per_cpu); 1623 1482 } 1624 1483 1625 1484 /* ··· 1632 1481 */ 1633 1482 FILE *fopen_or_die(const char *path, const char *mode) 1634 1483 { 1635 - FILE *filep = fopen(path, "r"); 1484 + FILE *filep = fopen(path, mode); 1636 1485 if (!filep) 1637 1486 err(1, "%s: open failed", path); 1638 1487 return filep; ··· 1847 1696 return 0; 1848 1697 } 1849 1698 1699 + /* 1700 + * snapshot_proc_interrupts() 1701 + * 1702 + * read and record summary of /proc/interrupts 1703 + * 1704 + * return 1 if config change requires a restart, else return 0 1705 + */ 1706 + int snapshot_proc_interrupts(void) 1707 + { 1708 + static FILE *fp; 1709 + int column, retval; 1710 + 1711 + if (fp == NULL) 1712 + fp = fopen_or_die("/proc/interrupts", "r"); 1713 + else 1714 + rewind(fp); 1715 + 1716 + /* read 1st line of /proc/interrupts to get cpu* name for each column */ 1717 + for (column = 0; column < topo.num_cpus; ++column) { 1718 + int cpu_number; 1719 + 1720 + retval = fscanf(fp, " CPU%d", &cpu_number); 1721 + if (retval != 1) 1722 + break; 1723 + 1724 + if (cpu_number > topo.max_cpu_num) { 1725 + warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); 1726 + return 1; 1727 + } 1728 + 1729 + irq_column_2_cpu[column] = cpu_number; 1730 + irqs_per_cpu[cpu_number] = 0; 1731 + } 1732 + 1733 + /* read /proc/interrupt count lines and sum up irqs per cpu */ 1734 + while (1) { 1735 + int column; 1736 + char buf[64]; 1737 + 1738 + retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ 1739 + if (retval != 1) 1740 + break; 1741 + 1742 + /* read the count per cpu */ 1743 + for (column = 0; column < topo.num_cpus; ++column) { 1744 + 1745 + int cpu_number, irq_count; 1746 + 1747 + retval = fscanf(fp, " %d", &irq_count); 1748 + if (retval != 1) 1749 + break; 1750 + 1751 + cpu_number = irq_column_2_cpu[column]; 1752 + irqs_per_cpu[cpu_number] += irq_count; 1753 + 1754 + } 1755 + 1756 + while (getc(fp) != '\n') 1757 + ; /* flush interrupt description */ 1758 + 1759 + } 1760 + return 0; 1761 + } 1762 + /* 1763 + * snapshot_gfx_rc6_ms() 1764 + * 1765 + * record snapshot of 1766 + * /sys/class/drm/card0/power/rc6_residency_ms 1767 + * 1768 + * return 1 if config change requires a restart, else return 0 1769 + */ 1770 + int snapshot_gfx_rc6_ms(void) 1771 + { 1772 + FILE *fp; 1773 + int retval; 1774 + 1775 + fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r"); 1776 + 1777 + retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms); 1778 + if (retval != 1) 1779 + err(1, "GFX rc6"); 1780 + 1781 + fclose(fp); 1782 + 1783 + return 0; 1784 + } 1785 + /* 1786 + * snapshot_gfx_mhz() 1787 + * 1788 + * record snapshot of 1789 + * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz 1790 + * 1791 + * return 1 if config change requires a restart, else return 0 1792 + */ 1793 + int snapshot_gfx_mhz(void) 1794 + { 1795 + static FILE *fp; 1796 + int retval; 1797 + 1798 + if (fp == NULL) 1799 + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); 1800 + else 1801 + rewind(fp); 1802 + 1803 + retval = fscanf(fp, "%d", &gfx_cur_mhz); 1804 + if (retval != 1) 1805 + err(1, "GFX MHz"); 1806 + 1807 + return 0; 1808 + } 1809 + 1810 + /* 1811 + * snapshot /proc and /sys files 1812 + * 1813 + * return 1 if configuration restart needed, else return 0 1814 + */ 1815 + int snapshot_proc_sysfs_files(void) 1816 + { 1817 + if (snapshot_proc_interrupts()) 1818 + return 1; 1819 + 1820 + if (do_gfx_rc6_ms) 1821 + snapshot_gfx_rc6_ms(); 1822 + 1823 + if (do_gfx_mhz) 1824 + snapshot_gfx_mhz(); 1825 + 1826 + return 0; 1827 + } 1828 + 1850 1829 void turbostat_loop() 1851 1830 { 1852 1831 int retval; ··· 1985 1704 restart: 1986 1705 restarted++; 1987 1706 1707 + snapshot_proc_sysfs_files(); 1988 1708 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1989 1709 if (retval < -1) { 1990 1710 exit(retval); ··· 2004 1722 re_initialize(); 2005 1723 goto restart; 2006 1724 } 2007 - sleep(interval_sec); 1725 + nanosleep(&interval_ts, NULL); 1726 + if (snapshot_proc_sysfs_files()) 1727 + goto restart; 2008 1728 retval = for_all_cpus(get_counters, ODD_COUNTERS); 2009 1729 if (retval < -1) { 2010 1730 exit(retval); ··· 2019 1735 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 2020 1736 compute_average(EVEN_COUNTERS); 2021 1737 format_all_counters(EVEN_COUNTERS); 2022 - flush_stdout(); 2023 - sleep(interval_sec); 1738 + flush_output_stdout(); 1739 + nanosleep(&interval_ts, NULL); 1740 + if (snapshot_proc_sysfs_files()) 1741 + goto restart; 2024 1742 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 2025 1743 if (retval < -1) { 2026 1744 exit(retval); ··· 2035 1749 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); 2036 1750 compute_average(ODD_COUNTERS); 2037 1751 format_all_counters(ODD_COUNTERS); 2038 - flush_stdout(); 1752 + flush_output_stdout(); 2039 1753 } 2040 1754 } 2041 1755 ··· 2175 1889 /* Nehalem compatible, but do not include turbo-ratio limit support */ 2176 1890 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 2177 1891 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 1892 + case 0x57: /* PHI - Knights Landing (different MSR definition) */ 2178 1893 return 0; 2179 1894 default: 2180 1895 return 1; ··· 2303 2016 return 0; 2304 2017 2305 2018 if (cpu_migrate(cpu)) { 2306 - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 2019 + fprintf(outf, "Could not migrate to CPU %d\n", cpu); 2307 2020 return -1; 2308 2021 } 2309 2022 ··· 2324 2037 epb_string = "custom"; 2325 2038 break; 2326 2039 } 2327 - fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); 2040 + fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); 2041 + 2042 + return 0; 2043 + } 2044 + /* 2045 + * print_hwp() 2046 + * Decode the MSR_HWP_CAPABILITIES 2047 + */ 2048 + int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2049 + { 2050 + unsigned long long msr; 2051 + int cpu; 2052 + 2053 + if (!has_hwp) 2054 + return 0; 2055 + 2056 + cpu = t->cpu_id; 2057 + 2058 + /* MSR_HWP_CAPABILITIES is per-package */ 2059 + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 2060 + return 0; 2061 + 2062 + if (cpu_migrate(cpu)) { 2063 + fprintf(outf, "Could not migrate to CPU %d\n", cpu); 2064 + return -1; 2065 + } 2066 + 2067 + if (get_msr(cpu, MSR_PM_ENABLE, &msr)) 2068 + return 0; 2069 + 2070 + fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", 2071 + cpu, msr, (msr & (1 << 0)) ? "" : "No-"); 2072 + 2073 + /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ 2074 + if ((msr & (1 << 0)) == 0) 2075 + return 0; 2076 + 2077 + if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) 2078 + return 0; 2079 + 2080 + fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " 2081 + "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n", 2082 + cpu, msr, 2083 + (unsigned int)HWP_HIGHEST_PERF(msr), 2084 + (unsigned int)HWP_GUARANTEED_PERF(msr), 2085 + (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), 2086 + (unsigned int)HWP_LOWEST_PERF(msr)); 2087 + 2088 + if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) 2089 + return 0; 2090 + 2091 + fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " 2092 + "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n", 2093 + cpu, msr, 2094 + (unsigned int)(((msr) >> 0) & 0xff), 2095 + (unsigned int)(((msr) >> 8) & 0xff), 2096 + (unsigned int)(((msr) >> 16) & 0xff), 2097 + (unsigned int)(((msr) >> 24) & 0xff), 2098 + (unsigned int)(((msr) >> 32) & 0xff3), 2099 + (unsigned int)(((msr) >> 42) & 0x1)); 2100 + 2101 + if (has_hwp_pkg) { 2102 + if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) 2103 + return 0; 2104 + 2105 + fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " 2106 + "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n", 2107 + cpu, msr, 2108 + (unsigned int)(((msr) >> 0) & 0xff), 2109 + (unsigned int)(((msr) >> 8) & 0xff), 2110 + (unsigned int)(((msr) >> 16) & 0xff), 2111 + (unsigned int)(((msr) >> 24) & 0xff), 2112 + (unsigned int)(((msr) >> 32) & 0xff3)); 2113 + } 2114 + if (has_hwp_notify) { 2115 + if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) 2116 + return 0; 2117 + 2118 + fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " 2119 + "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", 2120 + cpu, msr, 2121 + ((msr) & 0x1) ? "EN" : "Dis", 2122 + ((msr) & 0x2) ? "EN" : "Dis"); 2123 + } 2124 + if (get_msr(cpu, MSR_HWP_STATUS, &msr)) 2125 + return 0; 2126 + 2127 + fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " 2128 + "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", 2129 + cpu, msr, 2130 + ((msr) & 0x1) ? "" : "No-", 2131 + ((msr) & 0x2) ? "" : "No-"); 2328 2132 2329 2133 return 0; 2330 2134 } ··· 2435 2057 return 0; 2436 2058 2437 2059 if (cpu_migrate(cpu)) { 2438 - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 2060 + fprintf(outf, "Could not migrate to CPU %d\n", cpu); 2439 2061 return -1; 2440 2062 } 2441 2063 2442 2064 if (do_core_perf_limit_reasons) { 2443 2065 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); 2444 - fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 2445 - fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 2066 + fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 2067 + fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 2446 2068 (msr & 1 << 15) ? "bit15, " : "", 2447 2069 (msr & 1 << 14) ? "bit14, " : "", 2448 2070 (msr & 1 << 13) ? "Transitions, " : "", ··· 2457 2079 (msr & 1 << 2) ? "bit2, " : "", 2458 2080 (msr & 1 << 1) ? "ThermStatus, " : "", 2459 2081 (msr & 1 << 0) ? "PROCHOT, " : ""); 2460 - fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 2082 + fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 2461 2083 (msr & 1 << 31) ? "bit31, " : "", 2462 2084 (msr & 1 << 30) ? "bit30, " : "", 2463 2085 (msr & 1 << 29) ? "Transitions, " : "", ··· 2476 2098 } 2477 2099 if (do_gfx_perf_limit_reasons) { 2478 2100 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); 2479 - fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 2480 - fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", 2101 + fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 2102 + fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", 2481 2103 (msr & 1 << 0) ? "PROCHOT, " : "", 2482 2104 (msr & 1 << 1) ? "ThermStatus, " : "", 2483 2105 (msr & 1 << 4) ? "Graphics, " : "", ··· 2486 2108 (msr & 1 << 9) ? "GFXPwr, " : "", 2487 2109 (msr & 1 << 10) ? "PkgPwrL1, " : "", 2488 2110 (msr & 1 << 11) ? "PkgPwrL2, " : ""); 2489 - fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", 2111 + fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", 2490 2112 (msr & 1 << 16) ? "PROCHOT, " : "", 2491 2113 (msr & 1 << 17) ? "ThermStatus, " : "", 2492 2114 (msr & 1 << 20) ? "Graphics, " : "", ··· 2498 2120 } 2499 2121 if (do_ring_perf_limit_reasons) { 2500 2122 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); 2501 - fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 2502 - fprintf(stderr, " (Active: %s%s%s%s%s%s)", 2123 + fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 2124 + fprintf(outf, " (Active: %s%s%s%s%s%s)", 2503 2125 (msr & 1 << 0) ? "PROCHOT, " : "", 2504 2126 (msr & 1 << 1) ? "ThermStatus, " : "", 2505 2127 (msr & 1 << 6) ? "VR-Therm, " : "", 2506 2128 (msr & 1 << 8) ? "Amps, " : "", 2507 2129 (msr & 1 << 10) ? "PkgPwrL1, " : "", 2508 2130 (msr & 1 << 11) ? "PkgPwrL2, " : ""); 2509 - fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", 2131 + fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", 2510 2132 (msr & 1 << 16) ? "PROCHOT, " : "", 2511 2133 (msr & 1 << 17) ? "ThermStatus, " : "", 2512 2134 (msr & 1 << 22) ? "VR-Therm, " : "", ··· 2629 2251 2630 2252 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 2631 2253 if (debug) 2632 - fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 2254 + fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 2633 2255 2634 2256 return; 2635 2257 } ··· 2671 2293 return 0; 2672 2294 2673 2295 if (cpu_migrate(cpu)) { 2674 - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 2296 + fprintf(outf, "Could not migrate to CPU %d\n", cpu); 2675 2297 return -1; 2676 2298 } 2677 2299 ··· 2680 2302 return 0; 2681 2303 2682 2304 dts = (msr >> 16) & 0x7F; 2683 - fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", 2305 + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", 2684 2306 cpu, msr, tcc_activation_temp - dts); 2685 2307 2686 2308 #ifdef THERM_DEBUG ··· 2689 2311 2690 2312 dts = (msr >> 16) & 0x7F; 2691 2313 dts2 = (msr >> 8) & 0x7F; 2692 - fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 2314 + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 2693 2315 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); 2694 2316 #endif 2695 2317 } ··· 2703 2325 2704 2326 dts = (msr >> 16) & 0x7F; 2705 2327 resolution = (msr >> 27) & 0xF; 2706 - fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 2328 + fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 2707 2329 cpu, msr, tcc_activation_temp - dts, resolution); 2708 2330 2709 2331 #ifdef THERM_DEBUG ··· 2712 2334 2713 2335 dts = (msr >> 16) & 0x7F; 2714 2336 dts2 = (msr >> 8) & 0x7F; 2715 - fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 2337 + fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 2716 2338 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); 2717 2339 #endif 2718 2340 } 2719 2341 2720 2342 return 0; 2721 2343 } 2722 - 2344 + 2723 2345 void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 2724 2346 { 2725 - fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", 2347 + fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", 2726 2348 cpu, label, 2727 2349 ((msr >> 15) & 1) ? "EN" : "DIS", 2728 2350 ((msr >> 0) & 0x7FFF) * rapl_power_units, ··· 2746 2368 2747 2369 cpu = t->cpu_id; 2748 2370 if (cpu_migrate(cpu)) { 2749 - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 2371 + fprintf(outf, "Could not migrate to CPU %d\n", cpu); 2750 2372 return -1; 2751 2373 } 2752 2374 ··· 2754 2376 return -1; 2755 2377 2756 2378 if (debug) { 2757 - fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " 2379 + fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " 2758 2380 "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, 2759 2381 rapl_power_units, rapl_energy_units, rapl_time_units); 2760 2382 } ··· 2764 2386 return -5; 2765 2387 2766 2388 2767 - fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 2389 + fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 2768 2390 cpu, msr, 2769 2391 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 2770 2392 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, ··· 2777 2399 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 2778 2400 return -9; 2779 2401 2780 - fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 2402 + fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 2781 2403 cpu, msr, (msr >> 63) & 1 ? "": "UN"); 2782 2404 2783 2405 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 2784 - fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", 2406 + fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", 2785 2407 cpu, 2786 2408 ((msr >> 47) & 1) ? "EN" : "DIS", 2787 2409 ((msr >> 32) & 0x7FFF) * rapl_power_units, ··· 2793 2415 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 2794 2416 return -6; 2795 2417 2796 - fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 2418 + fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 2797 2419 cpu, msr, 2798 2420 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 2799 2421 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, ··· 2803 2425 if (do_rapl & RAPL_DRAM) { 2804 2426 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 2805 2427 return -9; 2806 - fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 2428 + fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 2807 2429 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 2808 2430 2809 2431 print_power_limit_msr(cpu, msr, "DRAM Limit"); ··· 2813 2435 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 2814 2436 return -7; 2815 2437 2816 - fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 2438 + fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 2817 2439 } 2818 2440 } 2819 2441 if (do_rapl & RAPL_CORES) { ··· 2821 2443 2822 2444 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 2823 2445 return -9; 2824 - fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 2446 + fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 2825 2447 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 2826 2448 print_power_limit_msr(cpu, msr, "Cores Limit"); 2827 2449 } ··· 2831 2453 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 2832 2454 return -8; 2833 2455 2834 - fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 2456 + fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 2835 2457 2836 2458 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 2837 2459 return -9; 2838 - fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 2460 + fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 2839 2461 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 2840 2462 print_power_limit_msr(cpu, msr, "GFX Limit"); 2841 2463 } ··· 2961 2583 double freq; 2962 2584 2963 2585 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) 2964 - fprintf(stderr, "SLM BCLK: unknown\n"); 2586 + fprintf(outf, "SLM BCLK: unknown\n"); 2965 2587 2966 2588 i = msr & 0xf; 2967 2589 if (i >= SLM_BCLK_FREQS) { 2968 - fprintf(stderr, "SLM BCLK[%d] invalid\n", i); 2590 + fprintf(outf, "SLM BCLK[%d] invalid\n", i); 2969 2591 msr = 3; 2970 2592 } 2971 2593 freq = slm_freq_table[i]; 2972 2594 2973 - fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq); 2595 + fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); 2974 2596 2975 2597 return freq; 2976 2598 } 2977 2599 2978 2600 double discover_bclk(unsigned int family, unsigned int model) 2979 2601 { 2980 - if (has_snb_msrs(family, model)) 2602 + if (has_snb_msrs(family, model) || is_knl(family, model)) 2981 2603 return 100.00; 2982 2604 else if (is_slm(family, model)) 2983 2605 return slm_bclk(); ··· 3013 2635 3014 2636 cpu = t->cpu_id; 3015 2637 if (cpu_migrate(cpu)) { 3016 - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 2638 + fprintf(outf, "Could not migrate to CPU %d\n", cpu); 3017 2639 return -1; 3018 2640 } 3019 2641 3020 2642 if (tcc_activation_temp_override != 0) { 3021 2643 tcc_activation_temp = tcc_activation_temp_override; 3022 - fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", 2644 + fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", 3023 2645 cpu, tcc_activation_temp); 3024 2646 return 0; 3025 2647 } ··· 3034 2656 target_c_local = (msr >> 16) & 0xFF; 3035 2657 3036 2658 if (debug) 3037 - fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", 2659 + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", 3038 2660 cpu, msr, target_c_local); 3039 2661 3040 2662 if (!target_c_local) ··· 3046 2668 3047 2669 guess: 3048 2670 tcc_activation_temp = TJMAX_DEFAULT; 3049 - fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", 2671 + fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", 3050 2672 cpu, tcc_activation_temp); 3051 2673 3052 2674 return 0; 3053 2675 } 2676 + 2677 + void decode_feature_control_msr(void) 2678 + { 2679 + unsigned long long msr; 2680 + 2681 + if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr)) 2682 + fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", 2683 + base_cpu, msr, 2684 + msr & FEATURE_CONTROL_LOCKED ? "" : "UN-", 2685 + msr & (1 << 18) ? "SGX" : ""); 2686 + } 2687 + 2688 + void decode_misc_enable_msr(void) 2689 + { 2690 + unsigned long long msr; 2691 + 2692 + if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) 2693 + fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n", 2694 + base_cpu, msr, 2695 + msr & (1 << 3) ? "TCC" : "", 2696 + msr & (1 << 16) ? "EIST" : "", 2697 + msr & (1 << 18) ? "MONITOR" : ""); 2698 + } 2699 + 2700 + /* 2701 + * Decode MSR_MISC_PWR_MGMT 2702 + * 2703 + * Decode the bits according to the Nehalem documentation 2704 + * bit[0] seems to continue to have same meaning going forward 2705 + * bit[1] less so... 2706 + */ 2707 + void decode_misc_pwr_mgmt_msr(void) 2708 + { 2709 + unsigned long long msr; 2710 + 2711 + if (!do_nhm_platform_info) 2712 + return; 2713 + 2714 + if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) 2715 + fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n", 2716 + base_cpu, msr, 2717 + msr & (1 << 0) ? "DIS" : "EN", 2718 + msr & (1 << 1) ? "EN" : "DIS"); 2719 + } 2720 + 3054 2721 void process_cpuid() 3055 2722 { 3056 - unsigned int eax, ebx, ecx, edx, max_level; 2723 + unsigned int eax, ebx, ecx, edx, max_level, max_extended_level; 3057 2724 unsigned int fms, family, model, stepping; 3058 2725 3059 2726 eax = ebx = ecx = edx = 0; 3060 2727 3061 - __get_cpuid(0, &max_level, &ebx, &ecx, &edx); 2728 + __cpuid(0, max_level, ebx, ecx, edx); 3062 2729 3063 2730 if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) 3064 2731 genuine_intel = 1; 3065 2732 3066 2733 if (debug) 3067 - fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", 2734 + fprintf(outf, "CPUID(0): %.4s%.4s%.4s ", 3068 2735 (char *)&ebx, (char *)&edx, (char *)&ecx); 3069 2736 3070 - __get_cpuid(1, &fms, &ebx, &ecx, &edx); 2737 + __cpuid(1, fms, ebx, ecx, edx); 3071 2738 family = (fms >> 8) & 0xf; 3072 2739 model = (fms >> 4) & 0xf; 3073 2740 stepping = fms & 0xf; 3074 2741 if (family == 6 || family == 0xf) 3075 2742 model += ((fms >> 16) & 0xf) << 4; 3076 2743 3077 - if (debug) 3078 - fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 2744 + if (debug) { 2745 + fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 3079 2746 max_level, family, model, stepping, family, model, stepping); 2747 + fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n", 2748 + ecx & (1 << 0) ? "SSE3" : "-", 2749 + ecx & (1 << 3) ? "MONITOR" : "-", 2750 + ecx & (1 << 6) ? "SMX" : "-", 2751 + ecx & (1 << 7) ? "EIST" : "-", 2752 + ecx & (1 << 8) ? "TM2" : "-", 2753 + edx & (1 << 4) ? "TSC" : "-", 2754 + edx & (1 << 5) ? "MSR" : "-", 2755 + edx & (1 << 22) ? "ACPI-TM" : "-", 2756 + edx & (1 << 29) ? "TM" : "-"); 2757 + } 3080 2758 3081 2759 if (!(edx & (1 << 5))) 3082 2760 errx(1, "CPUID: no MSR"); ··· 3143 2709 * This check is valid for both Intel and AMD. 3144 2710 */ 3145 2711 ebx = ecx = edx = 0; 3146 - __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); 2712 + __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); 3147 2713 3148 - if (max_level >= 0x80000007) { 2714 + if (max_extended_level >= 0x80000007) { 3149 2715 3150 2716 /* 3151 2717 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 3152 2718 * this check is valid for both Intel and AMD 3153 2719 */ 3154 - __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); 2720 + __cpuid(0x80000007, eax, ebx, ecx, edx); 3155 2721 has_invariant_tsc = edx & (1 << 8); 3156 2722 } 3157 2723 ··· 3160 2726 * this check is valid for both Intel and AMD 3161 2727 */ 3162 2728 3163 - __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); 2729 + __cpuid(0x6, eax, ebx, ecx, edx); 3164 2730 has_aperf = ecx & (1 << 0); 3165 2731 do_dts = eax & (1 << 0); 3166 2732 do_ptm = eax & (1 << 6); 2733 + has_hwp = eax & (1 << 7); 2734 + has_hwp_notify = eax & (1 << 8); 2735 + has_hwp_activity_window = eax & (1 << 9); 2736 + has_hwp_epp = eax & (1 << 10); 2737 + has_hwp_pkg = eax & (1 << 11); 3167 2738 has_epb = ecx & (1 << 3); 3168 2739 3169 2740 if (debug) 3170 - fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", 3171 - has_aperf ? "" : "No ", 3172 - do_dts ? "" : "No ", 3173 - do_ptm ? "" : "No ", 3174 - has_epb ? "" : "No "); 2741 + fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, " 2742 + "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", 2743 + has_aperf ? "" : "No-", 2744 + do_dts ? "" : "No-", 2745 + do_ptm ? "" : "No-", 2746 + has_hwp ? "" : "No-", 2747 + has_hwp_notify ? "" : "No-", 2748 + has_hwp_activity_window ? "" : "No-", 2749 + has_hwp_epp ? "" : "No-", 2750 + has_hwp_pkg ? "" : "No-", 2751 + has_epb ? "" : "No-"); 3175 2752 3176 - if (max_level > 0x15) { 2753 + if (debug) 2754 + decode_misc_enable_msr(); 2755 + 2756 + if (max_level >= 0x7) { 2757 + int has_sgx; 2758 + 2759 + ecx = 0; 2760 + 2761 + __cpuid_count(0x7, 0, eax, ebx, ecx, edx); 2762 + 2763 + has_sgx = ebx & (1 << 2); 2764 + fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-"); 2765 + 2766 + if (has_sgx) 2767 + decode_feature_control_msr(); 2768 + } 2769 + 2770 + if (max_level >= 0x15) { 3177 2771 unsigned int eax_crystal; 3178 2772 unsigned int ebx_tsc; 3179 2773 ··· 3209 2747 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz 3210 2748 */ 3211 2749 eax_crystal = ebx_tsc = crystal_hz = edx = 0; 3212 - __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); 2750 + __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); 3213 2751 3214 2752 if (ebx_tsc != 0) { 3215 2753 3216 2754 if (debug && (ebx != 0)) 3217 - fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", 2755 + fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", 3218 2756 eax_crystal, ebx_tsc, crystal_hz); 3219 2757 3220 2758 if (crystal_hz == 0) ··· 3230 2768 if (crystal_hz) { 3231 2769 tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; 3232 2770 if (debug) 3233 - fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 2771 + fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 3234 2772 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 3235 2773 } 3236 2774 } 2775 + } 2776 + if (max_level >= 0x16) { 2777 + unsigned int base_mhz, max_mhz, bus_mhz, edx; 2778 + 2779 + /* 2780 + * CPUID 16H Base MHz, Max MHz, Bus MHz 2781 + */ 2782 + base_mhz = max_mhz = bus_mhz = edx = 0; 2783 + 2784 + __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); 2785 + if (debug) 2786 + fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", 2787 + base_mhz, max_mhz, bus_mhz); 3237 2788 } 3238 2789 3239 2790 if (has_aperf) ··· 3263 2788 do_slm_cstates = is_slm(family, model); 3264 2789 do_knl_cstates = is_knl(family, model); 3265 2790 2791 + if (debug) 2792 + decode_misc_pwr_mgmt_msr(); 2793 + 3266 2794 rapl_probe(family, model); 3267 2795 perf_limit_reasons_probe(family, model); 3268 2796 ··· 3275 2797 if (has_skl_msrs(family, model)) 3276 2798 calculate_tsc_tweak(); 3277 2799 2800 + do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK); 2801 + 2802 + do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK); 2803 + 3278 2804 return; 3279 2805 } 3280 2806 3281 2807 void help() 3282 2808 { 3283 - fprintf(stderr, 2809 + fprintf(outf, 3284 2810 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 3285 2811 "\n" 3286 2812 "Turbostat forks the specified COMMAND and prints statistics\n" ··· 3296 2814 "--help print this help message\n" 3297 2815 "--counter msr print 32-bit counter at address \"msr\"\n" 3298 2816 "--Counter msr print 64-bit Counter at address \"msr\"\n" 2817 + "--out file create or truncate \"file\" for all output\n" 3299 2818 "--msr msr print 32-bit value at address \"msr\"\n" 3300 2819 "--MSR msr print 64-bit Value at address \"msr\"\n" 3301 2820 "--version print version information\n" ··· 3341 2858 show_cpu = 1; 3342 2859 3343 2860 if (debug > 1) 3344 - fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 2861 + fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 3345 2862 3346 2863 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 3347 2864 if (cpus == NULL) ··· 3376 2893 3377 2894 if (cpu_is_not_present(i)) { 3378 2895 if (debug > 1) 3379 - fprintf(stderr, "cpu%d NOT PRESENT\n", i); 2896 + fprintf(outf, "cpu%d NOT PRESENT\n", i); 3380 2897 continue; 3381 2898 } 3382 2899 cpus[i].core_id = get_core_id(i); ··· 3391 2908 if (siblings > max_siblings) 3392 2909 max_siblings = siblings; 3393 2910 if (debug > 1) 3394 - fprintf(stderr, "cpu %d pkg %d core %d\n", 2911 + fprintf(outf, "cpu %d pkg %d core %d\n", 3395 2912 i, cpus[i].physical_package_id, cpus[i].core_id); 3396 2913 } 3397 2914 topo.num_cores_per_pkg = max_core_id + 1; 3398 2915 if (debug > 1) 3399 - fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", 2916 + fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", 3400 2917 max_core_id, topo.num_cores_per_pkg); 3401 2918 if (debug && !summary_only && topo.num_cores_per_pkg > 1) 3402 2919 show_core = 1; 3403 2920 3404 2921 topo.num_packages = max_package_id + 1; 3405 2922 if (debug > 1) 3406 - fprintf(stderr, "max_package_id %d, sizing for %d packages\n", 2923 + fprintf(outf, "max_package_id %d, sizing for %d packages\n", 3407 2924 max_package_id, topo.num_packages); 3408 2925 if (debug && !summary_only && topo.num_packages > 1) 3409 2926 show_pkg = 1; 3410 2927 3411 2928 topo.num_threads_per_core = max_siblings; 3412 2929 if (debug > 1) 3413 - fprintf(stderr, "max_siblings %d\n", max_siblings); 2930 + fprintf(outf, "max_siblings %d\n", max_siblings); 3414 2931 3415 2932 free(cpus); 3416 2933 } ··· 3502 3019 if (outp == NULL) 3503 3020 err(-1, "calloc output buffer"); 3504 3021 } 3022 + void allocate_fd_percpu(void) 3023 + { 3024 + fd_percpu = calloc(topo.max_cpu_num, sizeof(int)); 3025 + if (fd_percpu == NULL) 3026 + err(-1, "calloc fd_percpu"); 3027 + } 3028 + void allocate_irq_buffers(void) 3029 + { 3030 + irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); 3031 + if (irq_column_2_cpu == NULL) 3032 + err(-1, "calloc %d", topo.num_cpus); 3505 3033 3034 + irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int)); 3035 + if (irqs_per_cpu == NULL) 3036 + err(-1, "calloc %d", topo.max_cpu_num); 3037 + } 3506 3038 void setup_all_buffers(void) 3507 3039 { 3508 3040 topology_probe(); 3041 + allocate_irq_buffers(); 3042 + allocate_fd_percpu(); 3509 3043 allocate_counters(&thread_even, &core_even, &package_even); 3510 3044 allocate_counters(&thread_odd, &core_odd, &package_odd); 3511 3045 allocate_output_buffer(); ··· 3536 3036 err(-ENODEV, "No valid cpus found"); 3537 3037 3538 3038 if (debug > 1) 3539 - fprintf(stderr, "base_cpu = %d\n", base_cpu); 3039 + fprintf(outf, "base_cpu = %d\n", base_cpu); 3540 3040 } 3541 3041 3542 3042 void turbostat_init() ··· 3547 3047 check_permissions(); 3548 3048 process_cpuid(); 3549 3049 3050 + 3051 + if (debug) 3052 + for_all_cpus(print_hwp, ODD_COUNTERS); 3550 3053 3551 3054 if (debug) 3552 3055 for_all_cpus(print_epb, ODD_COUNTERS); ··· 3603 3100 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 3604 3101 compute_average(EVEN_COUNTERS); 3605 3102 format_all_counters(EVEN_COUNTERS); 3606 - flush_stderr(); 3607 3103 3608 - fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 3104 + fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 3105 + 3106 + flush_output_stderr(); 3609 3107 3610 3108 return status; 3611 3109 } ··· 3623 3119 if (status) 3624 3120 return status; 3625 3121 3626 - flush_stdout(); 3122 + flush_output_stdout(); 3627 3123 3628 3124 return status; 3629 3125 } 3630 3126 3631 3127 void print_version() { 3632 - fprintf(stderr, "turbostat version 4.8 26-Sep, 2015" 3128 + fprintf(outf, "turbostat version 4.11 27 Feb 2016" 3633 3129 " - Len Brown <lenb@kernel.org>\n"); 3634 3130 } 3635 3131 ··· 3647 3143 {"Joules", no_argument, 0, 'J'}, 3648 3144 {"MSR", required_argument, 0, 'M'}, 3649 3145 {"msr", required_argument, 0, 'm'}, 3146 + {"out", required_argument, 0, 'o'}, 3650 3147 {"Package", no_argument, 0, 'p'}, 3651 3148 {"processor", no_argument, 0, 'p'}, 3652 3149 {"Summary", no_argument, 0, 'S'}, ··· 3658 3153 3659 3154 progname = argv[0]; 3660 3155 3661 - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v", 3156 + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v", 3662 3157 long_options, &option_index)) != -1) { 3663 3158 switch (opt) { 3664 3159 case 'C': ··· 3678 3173 help(); 3679 3174 exit(1); 3680 3175 case 'i': 3681 - interval_sec = atoi(optarg); 3176 + { 3177 + double interval = strtod(optarg, NULL); 3178 + 3179 + if (interval < 0.001) { 3180 + fprintf(outf, "interval %f seconds is too small\n", 3181 + interval); 3182 + exit(2); 3183 + } 3184 + 3185 + interval_ts.tv_sec = interval; 3186 + interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; 3187 + } 3682 3188 break; 3683 3189 case 'J': 3684 3190 rapl_joules++; ··· 3699 3183 break; 3700 3184 case 'm': 3701 3185 sscanf(optarg, "%x", &extra_msr_offset32); 3186 + break; 3187 + case 'o': 3188 + outf = fopen_or_die(optarg, "w"); 3702 3189 break; 3703 3190 case 'P': 3704 3191 show_pkg_only++; ··· 3725 3206 3726 3207 int main(int argc, char **argv) 3727 3208 { 3209 + outf = stderr; 3210 + 3728 3211 cmdline(argc, argv); 3729 3212 3730 3213 if (debug)