at v4.10 534 lines 14 kB view raw
1/* 2 * Copyright (C) 2012 ARM Ltd. 3 * Author: Marc Zyngier <marc.zyngier@arm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License version 2 as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19#include <linux/cpu.h> 20#include <linux/kvm.h> 21#include <linux/kvm_host.h> 22#include <linux/interrupt.h> 23#include <linux/irq.h> 24 25#include <clocksource/arm_arch_timer.h> 26#include <asm/arch_timer.h> 27#include <asm/kvm_hyp.h> 28 29#include <kvm/arm_vgic.h> 30#include <kvm/arm_arch_timer.h> 31 32#include "trace.h" 33 34static struct timecounter *timecounter; 35static unsigned int host_vtimer_irq; 36static u32 host_vtimer_irq_flags; 37 38void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 39{ 40 vcpu->arch.timer_cpu.active_cleared_last = false; 41} 42 43static u64 kvm_phys_timer_read(void) 44{ 45 return timecounter->cc->read(timecounter->cc); 46} 47 48static bool timer_is_armed(struct arch_timer_cpu *timer) 49{ 50 return timer->armed; 51} 52 53/* timer_arm: as in "arm the timer", not as in ARM the company */ 54static void timer_arm(struct arch_timer_cpu *timer, u64 ns) 55{ 56 timer->armed = true; 57 hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), 58 HRTIMER_MODE_ABS); 59} 60 61static void timer_disarm(struct arch_timer_cpu *timer) 62{ 63 if (timer_is_armed(timer)) { 64 hrtimer_cancel(&timer->timer); 65 cancel_work_sync(&timer->expired); 66 timer->armed = false; 67 } 68} 69 70static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 71{ 72 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 73 74 /* 75 * We disable the timer in the world switch and let it be 76 * handled by kvm_timer_sync_hwstate(). Getting a timer 77 * interrupt at this point is a sure sign of some major 78 * breakage. 79 */ 80 pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); 81 return IRQ_HANDLED; 82} 83 84/* 85 * Work function for handling the backup timer that we schedule when a vcpu is 86 * no longer running, but had a timer programmed to fire in the future. 87 */ 88static void kvm_timer_inject_irq_work(struct work_struct *work) 89{ 90 struct kvm_vcpu *vcpu; 91 92 vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); 93 94 /* 95 * If the vcpu is blocked we want to wake it up so that it will see 96 * the timer has expired when entering the guest. 97 */ 98 kvm_vcpu_kick(vcpu); 99} 100 101static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) 102{ 103 u64 cval, now; 104 105 cval = vcpu->arch.timer_cpu.cntv_cval; 106 now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; 107 108 if (now < cval) { 109 u64 ns; 110 111 ns = cyclecounter_cyc2ns(timecounter->cc, 112 cval - now, 113 timecounter->mask, 114 &timecounter->frac); 115 return ns; 116 } 117 118 return 0; 119} 120 121static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) 122{ 123 struct arch_timer_cpu *timer; 124 struct kvm_vcpu *vcpu; 125 u64 ns; 126 127 timer = container_of(hrt, struct arch_timer_cpu, timer); 128 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 129 130 /* 131 * Check that the timer has really expired from the guest's 132 * PoV (NTP on the host may have forced it to expire 133 * early). If we should have slept longer, restart it. 134 */ 135 ns = kvm_timer_compute_delta(vcpu); 136 if (unlikely(ns)) { 137 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 138 return HRTIMER_RESTART; 139 } 140 141 schedule_work(&timer->expired); 142 return HRTIMER_NORESTART; 143} 144 145static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) 146{ 147 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 148 149 return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && 150 (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); 151} 152 153bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) 154{ 155 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 156 u64 cval, now; 157 158 if (!kvm_timer_irq_can_fire(vcpu)) 159 return false; 160 161 cval = timer->cntv_cval; 162 now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; 163 164 return cval <= now; 165} 166 167static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) 168{ 169 int ret; 170 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 171 172 BUG_ON(!vgic_initialized(vcpu->kvm)); 173 174 timer->active_cleared_last = false; 175 timer->irq.level = new_level; 176 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq, 177 timer->irq.level); 178 ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, 179 timer->irq.irq, 180 timer->irq.level); 181 WARN_ON(ret); 182} 183 184/* 185 * Check if there was a change in the timer state (should we raise or lower 186 * the line level to the GIC). 187 */ 188static int kvm_timer_update_state(struct kvm_vcpu *vcpu) 189{ 190 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 191 192 /* 193 * If userspace modified the timer registers via SET_ONE_REG before 194 * the vgic was initialized, we mustn't set the timer->irq.level value 195 * because the guest would never see the interrupt. Instead wait 196 * until we call this function from kvm_timer_flush_hwstate. 197 */ 198 if (!vgic_initialized(vcpu->kvm) || !timer->enabled) 199 return -ENODEV; 200 201 if (kvm_timer_should_fire(vcpu) != timer->irq.level) 202 kvm_timer_update_irq(vcpu, !timer->irq.level); 203 204 return 0; 205} 206 207/* 208 * Schedule the background timer before calling kvm_vcpu_block, so that this 209 * thread is removed from its waitqueue and made runnable when there's a timer 210 * interrupt to handle. 211 */ 212void kvm_timer_schedule(struct kvm_vcpu *vcpu) 213{ 214 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 215 216 BUG_ON(timer_is_armed(timer)); 217 218 /* 219 * No need to schedule a background timer if the guest timer has 220 * already expired, because kvm_vcpu_block will return before putting 221 * the thread to sleep. 222 */ 223 if (kvm_timer_should_fire(vcpu)) 224 return; 225 226 /* 227 * If the timer is not capable of raising interrupts (disabled or 228 * masked), then there's no more work for us to do. 229 */ 230 if (!kvm_timer_irq_can_fire(vcpu)) 231 return; 232 233 /* The timer has not yet expired, schedule a background timer */ 234 timer_arm(timer, kvm_timer_compute_delta(vcpu)); 235} 236 237void kvm_timer_unschedule(struct kvm_vcpu *vcpu) 238{ 239 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 240 timer_disarm(timer); 241} 242 243/** 244 * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu 245 * @vcpu: The vcpu pointer 246 * 247 * Check if the virtual timer has expired while we were running in the host, 248 * and inject an interrupt if that was the case. 249 */ 250void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) 251{ 252 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 253 bool phys_active; 254 int ret; 255 256 if (kvm_timer_update_state(vcpu)) 257 return; 258 259 /* 260 * If we enter the guest with the virtual input level to the VGIC 261 * asserted, then we have already told the VGIC what we need to, and 262 * we don't need to exit from the guest until the guest deactivates 263 * the already injected interrupt, so therefore we should set the 264 * hardware active state to prevent unnecessary exits from the guest. 265 * 266 * Also, if we enter the guest with the virtual timer interrupt active, 267 * then it must be active on the physical distributor, because we set 268 * the HW bit and the guest must be able to deactivate the virtual and 269 * physical interrupt at the same time. 270 * 271 * Conversely, if the virtual input level is deasserted and the virtual 272 * interrupt is not active, then always clear the hardware active state 273 * to ensure that hardware interrupts from the timer triggers a guest 274 * exit. 275 */ 276 phys_active = timer->irq.level || 277 kvm_vgic_map_is_active(vcpu, timer->irq.irq); 278 279 /* 280 * We want to avoid hitting the (re)distributor as much as 281 * possible, as this is a potentially expensive MMIO access 282 * (not to mention locks in the irq layer), and a solution for 283 * this is to cache the "active" state in memory. 284 * 285 * Things to consider: we cannot cache an "active set" state, 286 * because the HW can change this behind our back (it becomes 287 * "clear" in the HW). We must then restrict the caching to 288 * the "clear" state. 289 * 290 * The cache is invalidated on: 291 * - vcpu put, indicating that the HW cannot be trusted to be 292 * in a sane state on the next vcpu load, 293 * - any change in the interrupt state 294 * 295 * Usage conditions: 296 * - cached value is "active clear" 297 * - value to be programmed is "active clear" 298 */ 299 if (timer->active_cleared_last && !phys_active) 300 return; 301 302 ret = irq_set_irqchip_state(host_vtimer_irq, 303 IRQCHIP_STATE_ACTIVE, 304 phys_active); 305 WARN_ON(ret); 306 307 timer->active_cleared_last = !phys_active; 308} 309 310/** 311 * kvm_timer_sync_hwstate - sync timer state from cpu 312 * @vcpu: The vcpu pointer 313 * 314 * Check if the virtual timer has expired while we were running in the guest, 315 * and inject an interrupt if that was the case. 316 */ 317void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) 318{ 319 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 320 321 BUG_ON(timer_is_armed(timer)); 322 323 /* 324 * The guest could have modified the timer registers or the timer 325 * could have expired, update the timer state. 326 */ 327 kvm_timer_update_state(vcpu); 328} 329 330int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 331 const struct kvm_irq_level *irq) 332{ 333 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 334 335 /* 336 * The vcpu timer irq number cannot be determined in 337 * kvm_timer_vcpu_init() because it is called much before 338 * kvm_vcpu_set_target(). To handle this, we determine 339 * vcpu timer irq number when the vcpu is reset. 340 */ 341 timer->irq.irq = irq->irq; 342 343 /* 344 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 345 * and to 0 for ARMv7. We provide an implementation that always 346 * resets the timer to be disabled and unmasked and is compliant with 347 * the ARMv7 architecture. 348 */ 349 timer->cntv_ctl = 0; 350 kvm_timer_update_state(vcpu); 351 352 return 0; 353} 354 355void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 356{ 357 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 358 359 INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); 360 hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 361 timer->timer.function = kvm_timer_expire; 362} 363 364static void kvm_timer_init_interrupt(void *info) 365{ 366 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 367} 368 369int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) 370{ 371 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 372 373 switch (regid) { 374 case KVM_REG_ARM_TIMER_CTL: 375 timer->cntv_ctl = value; 376 break; 377 case KVM_REG_ARM_TIMER_CNT: 378 vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value; 379 break; 380 case KVM_REG_ARM_TIMER_CVAL: 381 timer->cntv_cval = value; 382 break; 383 default: 384 return -1; 385 } 386 387 kvm_timer_update_state(vcpu); 388 return 0; 389} 390 391u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) 392{ 393 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 394 395 switch (regid) { 396 case KVM_REG_ARM_TIMER_CTL: 397 return timer->cntv_ctl; 398 case KVM_REG_ARM_TIMER_CNT: 399 return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; 400 case KVM_REG_ARM_TIMER_CVAL: 401 return timer->cntv_cval; 402 } 403 return (u64)-1; 404} 405 406static int kvm_timer_starting_cpu(unsigned int cpu) 407{ 408 kvm_timer_init_interrupt(NULL); 409 return 0; 410} 411 412static int kvm_timer_dying_cpu(unsigned int cpu) 413{ 414 disable_percpu_irq(host_vtimer_irq); 415 return 0; 416} 417 418int kvm_timer_hyp_init(void) 419{ 420 struct arch_timer_kvm_info *info; 421 int err; 422 423 info = arch_timer_get_kvm_info(); 424 timecounter = &info->timecounter; 425 426 if (!timecounter->cc) { 427 kvm_err("kvm_arch_timer: uninitialized timecounter\n"); 428 return -ENODEV; 429 } 430 431 if (info->virtual_irq <= 0) { 432 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 433 info->virtual_irq); 434 return -ENODEV; 435 } 436 host_vtimer_irq = info->virtual_irq; 437 438 host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); 439 if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && 440 host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { 441 kvm_err("Invalid trigger for IRQ%d, assuming level low\n", 442 host_vtimer_irq); 443 host_vtimer_irq_flags = IRQF_TRIGGER_LOW; 444 } 445 446 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 447 "kvm guest timer", kvm_get_running_vcpus()); 448 if (err) { 449 kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", 450 host_vtimer_irq, err); 451 return err; 452 } 453 454 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); 455 456 cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, 457 "kvm/arm/timer:starting", kvm_timer_starting_cpu, 458 kvm_timer_dying_cpu); 459 return err; 460} 461 462void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 463{ 464 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 465 466 timer_disarm(timer); 467 kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq); 468} 469 470int kvm_timer_enable(struct kvm_vcpu *vcpu) 471{ 472 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 473 struct irq_desc *desc; 474 struct irq_data *data; 475 int phys_irq; 476 int ret; 477 478 if (timer->enabled) 479 return 0; 480 481 /* 482 * Find the physical IRQ number corresponding to the host_vtimer_irq 483 */ 484 desc = irq_to_desc(host_vtimer_irq); 485 if (!desc) { 486 kvm_err("%s: no interrupt descriptor\n", __func__); 487 return -EINVAL; 488 } 489 490 data = irq_desc_get_irq_data(desc); 491 while (data->parent_data) 492 data = data->parent_data; 493 494 phys_irq = data->hwirq; 495 496 /* 497 * Tell the VGIC that the virtual interrupt is tied to a 498 * physical interrupt. We do that once per VCPU. 499 */ 500 ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq); 501 if (ret) 502 return ret; 503 504 timer->enabled = 1; 505 506 return 0; 507} 508 509void kvm_timer_init(struct kvm *kvm) 510{ 511 kvm->arch.timer.cntvoff = kvm_phys_timer_read(); 512} 513 514/* 515 * On VHE system, we only need to configure trap on physical timer and counter 516 * accesses in EL0 and EL1 once, not for every world switch. 517 * The host kernel runs at EL2 with HCR_EL2.TGE == 1, 518 * and this makes those bits have no effect for the host kernel execution. 519 */ 520void kvm_timer_init_vhe(void) 521{ 522 /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */ 523 u32 cnthctl_shift = 10; 524 u64 val; 525 526 /* 527 * Disallow physical timer access for the guest. 528 * Physical counter access is allowed. 529 */ 530 val = read_sysreg(cnthctl_el2); 531 val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift); 532 val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); 533 write_sysreg(val, cnthctl_el2); 534}