Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen/pcpu: Xen physical cpus online/offline sys interface

This patch provide Xen physical cpus online/offline sys interface.
User can use it for their own purpose, like power saving:
by offlining some cpus when light workload it save power greatly.

Its basic workflow is, user online/offline cpu via sys interface,
then hypercall xen to implement, after done xen inject virq back to dom0,
and then dom0 sync cpu status.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

authored by

Liu, Jinsong and committed by
Konrad Rzeszutek Wilk
f65c9bb3 05e36006

+401
+20
Documentation/ABI/testing/sysfs-devices-system-xen_cpu
··· 1 + What: /sys/devices/system/xen_cpu/ 2 + Date: May 2012 3 + Contact: Liu, Jinsong <jinsong.liu@intel.com> 4 + Description: 5 + A collection of global/individual Xen physical cpu attributes 6 + 7 + Individual physical cpu attributes are contained in 8 + subdirectories named by the Xen's logical cpu number, e.g.: 9 + /sys/devices/system/xen_cpu/xen_cpu#/ 10 + 11 + 12 + What: /sys/devices/system/xen_cpu/xen_cpu#/online 13 + Date: May 2012 14 + Contact: Liu, Jinsong <jinsong.liu@intel.com> 15 + Description: 16 + Interface to online/offline Xen physical cpus 17 + 18 + When running under Xen platform, it provide user interface 19 + to online/offline physical cpus, except cpu0 due to several 20 + logic restrictions and assumptions.
+1
drivers/xen/Makefile
··· 17 17 obj-$(CONFIG_XEN_PVHVM) += platform-pci.o 18 18 obj-$(CONFIG_XEN_TMEM) += tmem.o 19 19 obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o 20 + obj-$(CONFIG_XEN_DOM0) += pcpu.o 20 21 obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o 21 22 obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o 22 23 obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
+371
drivers/xen/pcpu.c
··· 1 + /****************************************************************************** 2 + * pcpu.c 3 + * Management physical cpu in dom0, get pcpu info and provide sys interface 4 + * 5 + * Copyright (c) 2012 Intel Corporation 6 + * Author: Liu, Jinsong <jinsong.liu@intel.com> 7 + * Author: Jiang, Yunhong <yunhong.jiang@intel.com> 8 + * 9 + * This program is free software; you can redistribute it and/or 10 + * modify it under the terms of the GNU General Public License version 2 11 + * as published by the Free Software Foundation; or, when distributed 12 + * separately from the Linux kernel or incorporated into other 13 + * software packages, subject to the following license: 14 + * 15 + * Permission is hereby granted, free of charge, to any person obtaining a copy 16 + * of this source file (the "Software"), to deal in the Software without 17 + * restriction, including without limitation the rights to use, copy, modify, 18 + * merge, publish, distribute, sublicense, and/or sell copies of the Software, 19 + * and to permit persons to whom the Software is furnished to do so, subject to 20 + * the following conditions: 21 + * 22 + * The above copyright notice and this permission notice shall be included in 23 + * all copies or substantial portions of the Software. 24 + * 25 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 31 + * IN THE SOFTWARE. 32 + */ 33 + 34 + #include <linux/interrupt.h> 35 + #include <linux/spinlock.h> 36 + #include <linux/cpu.h> 37 + #include <linux/stat.h> 38 + #include <linux/capability.h> 39 + 40 + #include <xen/xen.h> 41 + #include <xen/xenbus.h> 42 + #include <xen/events.h> 43 + #include <xen/interface/platform.h> 44 + #include <asm/xen/hypervisor.h> 45 + #include <asm/xen/hypercall.h> 46 + 47 + #define XEN_PCPU "xen_cpu: " 48 + 49 + /* 50 + * @cpu_id: Xen physical cpu logic number 51 + * @flags: Xen physical cpu status flag 52 + * - XEN_PCPU_FLAGS_ONLINE: cpu is online 53 + * - XEN_PCPU_FLAGS_INVALID: cpu is not present 54 + */ 55 + struct pcpu { 56 + struct list_head list; 57 + struct device dev; 58 + uint32_t cpu_id; 59 + uint32_t flags; 60 + }; 61 + 62 + static struct bus_type xen_pcpu_subsys = { 63 + .name = "xen_cpu", 64 + .dev_name = "xen_cpu", 65 + }; 66 + 67 + static DEFINE_MUTEX(xen_pcpu_lock); 68 + 69 + static LIST_HEAD(xen_pcpus); 70 + 71 + static int xen_pcpu_down(uint32_t cpu_id) 72 + { 73 + struct xen_platform_op op = { 74 + .cmd = XENPF_cpu_offline, 75 + .interface_version = XENPF_INTERFACE_VERSION, 76 + .u.cpu_ol.cpuid = cpu_id, 77 + }; 78 + 79 + return HYPERVISOR_dom0_op(&op); 80 + } 81 + 82 + static int xen_pcpu_up(uint32_t cpu_id) 83 + { 84 + struct xen_platform_op op = { 85 + .cmd = XENPF_cpu_online, 86 + .interface_version = XENPF_INTERFACE_VERSION, 87 + .u.cpu_ol.cpuid = cpu_id, 88 + }; 89 + 90 + return HYPERVISOR_dom0_op(&op); 91 + } 92 + 93 + static ssize_t show_online(struct device *dev, 94 + struct device_attribute *attr, 95 + char *buf) 96 + { 97 + struct pcpu *cpu = container_of(dev, struct pcpu, dev); 98 + 99 + return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE)); 100 + } 101 + 102 + static ssize_t __ref store_online(struct device *dev, 103 + struct device_attribute *attr, 104 + const char *buf, size_t count) 105 + { 106 + struct pcpu *pcpu = container_of(dev, struct pcpu, dev); 107 + unsigned long long val; 108 + ssize_t ret; 109 + 110 + if (!capable(CAP_SYS_ADMIN)) 111 + return -EPERM; 112 + 113 + if (kstrtoull(buf, 0, &val) < 0) 114 + return -EINVAL; 115 + 116 + switch (val) { 117 + case 0: 118 + ret = xen_pcpu_down(pcpu->cpu_id); 119 + break; 120 + case 1: 121 + ret = xen_pcpu_up(pcpu->cpu_id); 122 + break; 123 + default: 124 + ret = -EINVAL; 125 + } 126 + 127 + if (ret >= 0) 128 + ret = count; 129 + return ret; 130 + } 131 + static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online); 132 + 133 + static bool xen_pcpu_online(uint32_t flags) 134 + { 135 + return !!(flags & XEN_PCPU_FLAGS_ONLINE); 136 + } 137 + 138 + static void pcpu_online_status(struct xenpf_pcpuinfo *info, 139 + struct pcpu *pcpu) 140 + { 141 + if (xen_pcpu_online(info->flags) && 142 + !xen_pcpu_online(pcpu->flags)) { 143 + /* the pcpu is onlined */ 144 + pcpu->flags |= XEN_PCPU_FLAGS_ONLINE; 145 + kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE); 146 + } else if (!xen_pcpu_online(info->flags) && 147 + xen_pcpu_online(pcpu->flags)) { 148 + /* The pcpu is offlined */ 149 + pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE; 150 + kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE); 151 + } 152 + } 153 + 154 + static struct pcpu *get_pcpu(uint32_t cpu_id) 155 + { 156 + struct pcpu *pcpu; 157 + 158 + list_for_each_entry(pcpu, &xen_pcpus, list) { 159 + if (pcpu->cpu_id == cpu_id) 160 + return pcpu; 161 + } 162 + 163 + return NULL; 164 + } 165 + 166 + static void pcpu_release(struct device *dev) 167 + { 168 + struct pcpu *pcpu = container_of(dev, struct pcpu, dev); 169 + 170 + list_del(&pcpu->list); 171 + kfree(pcpu); 172 + } 173 + 174 + static void unregister_and_remove_pcpu(struct pcpu *pcpu) 175 + { 176 + struct device *dev; 177 + 178 + if (!pcpu) 179 + return; 180 + 181 + dev = &pcpu->dev; 182 + if (dev->id) 183 + device_remove_file(dev, &dev_attr_online); 184 + 185 + /* pcpu remove would be implicitly done */ 186 + device_unregister(dev); 187 + } 188 + 189 + static int register_pcpu(struct pcpu *pcpu) 190 + { 191 + struct device *dev; 192 + int err = -EINVAL; 193 + 194 + if (!pcpu) 195 + return err; 196 + 197 + dev = &pcpu->dev; 198 + dev->bus = &xen_pcpu_subsys; 199 + dev->id = pcpu->cpu_id; 200 + dev->release = pcpu_release; 201 + 202 + err = device_register(dev); 203 + if (err) { 204 + pcpu_release(dev); 205 + return err; 206 + } 207 + 208 + /* 209 + * Xen never offline cpu0 due to several restrictions 210 + * and assumptions. This basically doesn't add a sys control 211 + * to user, one cannot attempt to offline BSP. 212 + */ 213 + if (dev->id) { 214 + err = device_create_file(dev, &dev_attr_online); 215 + if (err) { 216 + device_unregister(dev); 217 + return err; 218 + } 219 + } 220 + 221 + return 0; 222 + } 223 + 224 + static struct pcpu *create_and_register_pcpu(struct xenpf_pcpuinfo *info) 225 + { 226 + struct pcpu *pcpu; 227 + int err; 228 + 229 + if (info->flags & XEN_PCPU_FLAGS_INVALID) 230 + return ERR_PTR(-ENODEV); 231 + 232 + pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL); 233 + if (!pcpu) 234 + return ERR_PTR(-ENOMEM); 235 + 236 + INIT_LIST_HEAD(&pcpu->list); 237 + pcpu->cpu_id = info->xen_cpuid; 238 + pcpu->flags = info->flags; 239 + 240 + /* Need hold on xen_pcpu_lock before pcpu list manipulations */ 241 + list_add_tail(&pcpu->list, &xen_pcpus); 242 + 243 + err = register_pcpu(pcpu); 244 + if (err) { 245 + pr_warning(XEN_PCPU "Failed to register pcpu%u\n", 246 + info->xen_cpuid); 247 + return ERR_PTR(-ENOENT); 248 + } 249 + 250 + return pcpu; 251 + } 252 + 253 + /* 254 + * Caller should hold the xen_pcpu_lock 255 + */ 256 + static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu) 257 + { 258 + int ret; 259 + struct pcpu *pcpu = NULL; 260 + struct xenpf_pcpuinfo *info; 261 + struct xen_platform_op op = { 262 + .cmd = XENPF_get_cpuinfo, 263 + .interface_version = XENPF_INTERFACE_VERSION, 264 + .u.pcpu_info.xen_cpuid = cpu, 265 + }; 266 + 267 + ret = HYPERVISOR_dom0_op(&op); 268 + if (ret) 269 + return ret; 270 + 271 + info = &op.u.pcpu_info; 272 + if (max_cpu) 273 + *max_cpu = info->max_present; 274 + 275 + pcpu = get_pcpu(cpu); 276 + 277 + /* 278 + * Only those at cpu present map has its sys interface. 279 + */ 280 + if (info->flags & XEN_PCPU_FLAGS_INVALID) { 281 + if (pcpu) 282 + unregister_and_remove_pcpu(pcpu); 283 + return 0; 284 + } 285 + 286 + if (!pcpu) { 287 + pcpu = create_and_register_pcpu(info); 288 + if (IS_ERR_OR_NULL(pcpu)) 289 + return -ENODEV; 290 + } else 291 + pcpu_online_status(info, pcpu); 292 + 293 + return 0; 294 + } 295 + 296 + /* 297 + * Sync dom0's pcpu information with xen hypervisor's 298 + */ 299 + static int xen_sync_pcpus(void) 300 + { 301 + /* 302 + * Boot cpu always have cpu_id 0 in xen 303 + */ 304 + uint32_t cpu = 0, max_cpu = 0; 305 + int err = 0; 306 + struct pcpu *pcpu, *tmp; 307 + 308 + mutex_lock(&xen_pcpu_lock); 309 + 310 + while (!err && (cpu <= max_cpu)) { 311 + err = sync_pcpu(cpu, &max_cpu); 312 + cpu++; 313 + } 314 + 315 + if (err) 316 + list_for_each_entry_safe(pcpu, tmp, &xen_pcpus, list) 317 + unregister_and_remove_pcpu(pcpu); 318 + 319 + mutex_unlock(&xen_pcpu_lock); 320 + 321 + return err; 322 + } 323 + 324 + static void xen_pcpu_work_fn(struct work_struct *work) 325 + { 326 + xen_sync_pcpus(); 327 + } 328 + static DECLARE_WORK(xen_pcpu_work, xen_pcpu_work_fn); 329 + 330 + static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id) 331 + { 332 + schedule_work(&xen_pcpu_work); 333 + return IRQ_HANDLED; 334 + } 335 + 336 + static int __init xen_pcpu_init(void) 337 + { 338 + int irq, ret; 339 + 340 + if (!xen_initial_domain()) 341 + return -ENODEV; 342 + 343 + irq = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0, 344 + xen_pcpu_interrupt, 0, 345 + "xen-pcpu", NULL); 346 + if (irq < 0) { 347 + pr_warning(XEN_PCPU "Failed to bind pcpu virq\n"); 348 + return irq; 349 + } 350 + 351 + ret = subsys_system_register(&xen_pcpu_subsys, NULL); 352 + if (ret) { 353 + pr_warning(XEN_PCPU "Failed to register pcpu subsys\n"); 354 + goto err1; 355 + } 356 + 357 + ret = xen_sync_pcpus(); 358 + if (ret) { 359 + pr_warning(XEN_PCPU "Failed to sync pcpu info\n"); 360 + goto err2; 361 + } 362 + 363 + return 0; 364 + 365 + err2: 366 + bus_unregister(&xen_pcpu_subsys); 367 + err1: 368 + unbind_from_irqhandler(irq, NULL); 369 + return ret; 370 + } 371 + arch_initcall(xen_pcpu_init);
+8
include/xen/interface/platform.h
··· 314 314 }; 315 315 DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo); 316 316 317 + #define XENPF_cpu_online 56 318 + #define XENPF_cpu_offline 57 319 + struct xenpf_cpu_ol { 320 + uint32_t cpuid; 321 + }; 322 + DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol); 323 + 317 324 struct xen_platform_op { 318 325 uint32_t cmd; 319 326 uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ ··· 337 330 struct xenpf_getidletime getidletime; 338 331 struct xenpf_set_processor_pminfo set_pminfo; 339 332 struct xenpf_pcpuinfo pcpu_info; 333 + struct xenpf_cpu_ol cpu_ol; 340 334 uint8_t pad[128]; 341 335 } u; 342 336 };
+1
include/xen/interface/xen.h
··· 80 80 #define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ 81 81 #define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ 82 82 #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ 83 + #define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */ 83 84 84 85 /* Architecture-specific VIRQ definitions. */ 85 86 #define VIRQ_ARCH_0 16