at v4.11 611 lines 17 kB view raw
1/* 2 * Copyright (C) 2012 ARM Ltd. 3 * Author: Marc Zyngier <marc.zyngier@arm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License version 2 as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19#include <linux/cpu.h> 20#include <linux/kvm.h> 21#include <linux/kvm_host.h> 22#include <linux/interrupt.h> 23#include <linux/irq.h> 24 25#include <clocksource/arm_arch_timer.h> 26#include <asm/arch_timer.h> 27#include <asm/kvm_hyp.h> 28 29#include <kvm/arm_vgic.h> 30#include <kvm/arm_arch_timer.h> 31 32#include "trace.h" 33 34static struct timecounter *timecounter; 35static unsigned int host_vtimer_irq; 36static u32 host_vtimer_irq_flags; 37 38void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 39{ 40 vcpu_vtimer(vcpu)->active_cleared_last = false; 41} 42 43u64 kvm_phys_timer_read(void) 44{ 45 return timecounter->cc->read(timecounter->cc); 46} 47 48static bool timer_is_armed(struct arch_timer_cpu *timer) 49{ 50 return timer->armed; 51} 52 53/* timer_arm: as in "arm the timer", not as in ARM the company */ 54static void timer_arm(struct arch_timer_cpu *timer, u64 ns) 55{ 56 timer->armed = true; 57 hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), 58 HRTIMER_MODE_ABS); 59} 60 61static void timer_disarm(struct arch_timer_cpu *timer) 62{ 63 if (timer_is_armed(timer)) { 64 hrtimer_cancel(&timer->timer); 65 cancel_work_sync(&timer->expired); 66 timer->armed = false; 67 } 68} 69 70static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 71{ 72 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 73 74 /* 75 * We disable the timer in the world switch and let it be 76 * handled by kvm_timer_sync_hwstate(). Getting a timer 77 * interrupt at this point is a sure sign of some major 78 * breakage. 79 */ 80 pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); 81 return IRQ_HANDLED; 82} 83 84/* 85 * Work function for handling the backup timer that we schedule when a vcpu is 86 * no longer running, but had a timer programmed to fire in the future. 87 */ 88static void kvm_timer_inject_irq_work(struct work_struct *work) 89{ 90 struct kvm_vcpu *vcpu; 91 92 vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); 93 94 /* 95 * If the vcpu is blocked we want to wake it up so that it will see 96 * the timer has expired when entering the guest. 97 */ 98 kvm_vcpu_kick(vcpu); 99} 100 101static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) 102{ 103 u64 cval, now; 104 105 cval = timer_ctx->cnt_cval; 106 now = kvm_phys_timer_read() - timer_ctx->cntvoff; 107 108 if (now < cval) { 109 u64 ns; 110 111 ns = cyclecounter_cyc2ns(timecounter->cc, 112 cval - now, 113 timecounter->mask, 114 &timecounter->frac); 115 return ns; 116 } 117 118 return 0; 119} 120 121static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) 122{ 123 return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && 124 (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); 125} 126 127/* 128 * Returns the earliest expiration time in ns among guest timers. 129 * Note that it will return 0 if none of timers can fire. 130 */ 131static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) 132{ 133 u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX; 134 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 135 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 136 137 if (kvm_timer_irq_can_fire(vtimer)) 138 min_virt = kvm_timer_compute_delta(vtimer); 139 140 if (kvm_timer_irq_can_fire(ptimer)) 141 min_phys = kvm_timer_compute_delta(ptimer); 142 143 /* If none of timers can fire, then return 0 */ 144 if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX)) 145 return 0; 146 147 return min(min_virt, min_phys); 148} 149 150static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) 151{ 152 struct arch_timer_cpu *timer; 153 struct kvm_vcpu *vcpu; 154 u64 ns; 155 156 timer = container_of(hrt, struct arch_timer_cpu, timer); 157 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 158 159 /* 160 * Check that the timer has really expired from the guest's 161 * PoV (NTP on the host may have forced it to expire 162 * early). If we should have slept longer, restart it. 163 */ 164 ns = kvm_timer_earliest_exp(vcpu); 165 if (unlikely(ns)) { 166 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 167 return HRTIMER_RESTART; 168 } 169 170 schedule_work(&timer->expired); 171 return HRTIMER_NORESTART; 172} 173 174bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) 175{ 176 u64 cval, now; 177 178 if (!kvm_timer_irq_can_fire(timer_ctx)) 179 return false; 180 181 cval = timer_ctx->cnt_cval; 182 now = kvm_phys_timer_read() - timer_ctx->cntvoff; 183 184 return cval <= now; 185} 186 187static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 188 struct arch_timer_context *timer_ctx) 189{ 190 int ret; 191 192 BUG_ON(!vgic_initialized(vcpu->kvm)); 193 194 timer_ctx->active_cleared_last = false; 195 timer_ctx->irq.level = new_level; 196 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, 197 timer_ctx->irq.level); 198 199 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, 200 timer_ctx->irq.level); 201 WARN_ON(ret); 202} 203 204/* 205 * Check if there was a change in the timer state (should we raise or lower 206 * the line level to the GIC). 207 */ 208static int kvm_timer_update_state(struct kvm_vcpu *vcpu) 209{ 210 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 211 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 212 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 213 214 /* 215 * If userspace modified the timer registers via SET_ONE_REG before 216 * the vgic was initialized, we mustn't set the vtimer->irq.level value 217 * because the guest would never see the interrupt. Instead wait 218 * until we call this function from kvm_timer_flush_hwstate. 219 */ 220 if (!vgic_initialized(vcpu->kvm) || !timer->enabled) 221 return -ENODEV; 222 223 if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) 224 kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer); 225 226 if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) 227 kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); 228 229 return 0; 230} 231 232/* Schedule the background timer for the emulated timer. */ 233static void kvm_timer_emulate(struct kvm_vcpu *vcpu, 234 struct arch_timer_context *timer_ctx) 235{ 236 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 237 238 if (kvm_timer_should_fire(timer_ctx)) 239 return; 240 241 if (!kvm_timer_irq_can_fire(timer_ctx)) 242 return; 243 244 /* The timer has not yet expired, schedule a background timer */ 245 timer_arm(timer, kvm_timer_compute_delta(timer_ctx)); 246} 247 248/* 249 * Schedule the background timer before calling kvm_vcpu_block, so that this 250 * thread is removed from its waitqueue and made runnable when there's a timer 251 * interrupt to handle. 252 */ 253void kvm_timer_schedule(struct kvm_vcpu *vcpu) 254{ 255 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 256 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 257 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 258 259 BUG_ON(timer_is_armed(timer)); 260 261 /* 262 * No need to schedule a background timer if any guest timer has 263 * already expired, because kvm_vcpu_block will return before putting 264 * the thread to sleep. 265 */ 266 if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer)) 267 return; 268 269 /* 270 * If both timers are not capable of raising interrupts (disabled or 271 * masked), then there's no more work for us to do. 272 */ 273 if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer)) 274 return; 275 276 /* 277 * The guest timers have not yet expired, schedule a background timer. 278 * Set the earliest expiration time among the guest timers. 279 */ 280 timer_arm(timer, kvm_timer_earliest_exp(vcpu)); 281} 282 283void kvm_timer_unschedule(struct kvm_vcpu *vcpu) 284{ 285 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 286 timer_disarm(timer); 287} 288 289/** 290 * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu 291 * @vcpu: The vcpu pointer 292 * 293 * Check if the virtual timer has expired while we were running in the host, 294 * and inject an interrupt if that was the case. 295 */ 296void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) 297{ 298 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 299 bool phys_active; 300 int ret; 301 302 if (kvm_timer_update_state(vcpu)) 303 return; 304 305 /* Set the background timer for the physical timer emulation. */ 306 kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); 307 308 /* 309 * If we enter the guest with the virtual input level to the VGIC 310 * asserted, then we have already told the VGIC what we need to, and 311 * we don't need to exit from the guest until the guest deactivates 312 * the already injected interrupt, so therefore we should set the 313 * hardware active state to prevent unnecessary exits from the guest. 314 * 315 * Also, if we enter the guest with the virtual timer interrupt active, 316 * then it must be active on the physical distributor, because we set 317 * the HW bit and the guest must be able to deactivate the virtual and 318 * physical interrupt at the same time. 319 * 320 * Conversely, if the virtual input level is deasserted and the virtual 321 * interrupt is not active, then always clear the hardware active state 322 * to ensure that hardware interrupts from the timer triggers a guest 323 * exit. 324 */ 325 phys_active = vtimer->irq.level || 326 kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); 327 328 /* 329 * We want to avoid hitting the (re)distributor as much as 330 * possible, as this is a potentially expensive MMIO access 331 * (not to mention locks in the irq layer), and a solution for 332 * this is to cache the "active" state in memory. 333 * 334 * Things to consider: we cannot cache an "active set" state, 335 * because the HW can change this behind our back (it becomes 336 * "clear" in the HW). We must then restrict the caching to 337 * the "clear" state. 338 * 339 * The cache is invalidated on: 340 * - vcpu put, indicating that the HW cannot be trusted to be 341 * in a sane state on the next vcpu load, 342 * - any change in the interrupt state 343 * 344 * Usage conditions: 345 * - cached value is "active clear" 346 * - value to be programmed is "active clear" 347 */ 348 if (vtimer->active_cleared_last && !phys_active) 349 return; 350 351 ret = irq_set_irqchip_state(host_vtimer_irq, 352 IRQCHIP_STATE_ACTIVE, 353 phys_active); 354 WARN_ON(ret); 355 356 vtimer->active_cleared_last = !phys_active; 357} 358 359/** 360 * kvm_timer_sync_hwstate - sync timer state from cpu 361 * @vcpu: The vcpu pointer 362 * 363 * Check if the virtual timer has expired while we were running in the guest, 364 * and inject an interrupt if that was the case. 365 */ 366void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) 367{ 368 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 369 370 /* 371 * This is to cancel the background timer for the physical timer 372 * emulation if it is set. 373 */ 374 timer_disarm(timer); 375 376 /* 377 * The guest could have modified the timer registers or the timer 378 * could have expired, update the timer state. 379 */ 380 kvm_timer_update_state(vcpu); 381} 382 383int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 384 const struct kvm_irq_level *virt_irq, 385 const struct kvm_irq_level *phys_irq) 386{ 387 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 388 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 389 390 /* 391 * The vcpu timer irq number cannot be determined in 392 * kvm_timer_vcpu_init() because it is called much before 393 * kvm_vcpu_set_target(). To handle this, we determine 394 * vcpu timer irq number when the vcpu is reset. 395 */ 396 vtimer->irq.irq = virt_irq->irq; 397 ptimer->irq.irq = phys_irq->irq; 398 399 /* 400 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 401 * and to 0 for ARMv7. We provide an implementation that always 402 * resets the timer to be disabled and unmasked and is compliant with 403 * the ARMv7 architecture. 404 */ 405 vtimer->cnt_ctl = 0; 406 ptimer->cnt_ctl = 0; 407 kvm_timer_update_state(vcpu); 408 409 return 0; 410} 411 412/* Make the updates of cntvoff for all vtimer contexts atomic */ 413static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) 414{ 415 int i; 416 struct kvm *kvm = vcpu->kvm; 417 struct kvm_vcpu *tmp; 418 419 mutex_lock(&kvm->lock); 420 kvm_for_each_vcpu(i, tmp, kvm) 421 vcpu_vtimer(tmp)->cntvoff = cntvoff; 422 423 /* 424 * When called from the vcpu create path, the CPU being created is not 425 * included in the loop above, so we just set it here as well. 426 */ 427 vcpu_vtimer(vcpu)->cntvoff = cntvoff; 428 mutex_unlock(&kvm->lock); 429} 430 431void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 432{ 433 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 434 435 /* Synchronize cntvoff across all vtimers of a VM. */ 436 update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); 437 vcpu_ptimer(vcpu)->cntvoff = 0; 438 439 INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); 440 hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 441 timer->timer.function = kvm_timer_expire; 442} 443 444static void kvm_timer_init_interrupt(void *info) 445{ 446 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 447} 448 449int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) 450{ 451 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 452 453 switch (regid) { 454 case KVM_REG_ARM_TIMER_CTL: 455 vtimer->cnt_ctl = value; 456 break; 457 case KVM_REG_ARM_TIMER_CNT: 458 update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); 459 break; 460 case KVM_REG_ARM_TIMER_CVAL: 461 vtimer->cnt_cval = value; 462 break; 463 default: 464 return -1; 465 } 466 467 kvm_timer_update_state(vcpu); 468 return 0; 469} 470 471u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) 472{ 473 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 474 475 switch (regid) { 476 case KVM_REG_ARM_TIMER_CTL: 477 return vtimer->cnt_ctl; 478 case KVM_REG_ARM_TIMER_CNT: 479 return kvm_phys_timer_read() - vtimer->cntvoff; 480 case KVM_REG_ARM_TIMER_CVAL: 481 return vtimer->cnt_cval; 482 } 483 return (u64)-1; 484} 485 486static int kvm_timer_starting_cpu(unsigned int cpu) 487{ 488 kvm_timer_init_interrupt(NULL); 489 return 0; 490} 491 492static int kvm_timer_dying_cpu(unsigned int cpu) 493{ 494 disable_percpu_irq(host_vtimer_irq); 495 return 0; 496} 497 498int kvm_timer_hyp_init(void) 499{ 500 struct arch_timer_kvm_info *info; 501 int err; 502 503 info = arch_timer_get_kvm_info(); 504 timecounter = &info->timecounter; 505 506 if (!timecounter->cc) { 507 kvm_err("kvm_arch_timer: uninitialized timecounter\n"); 508 return -ENODEV; 509 } 510 511 if (info->virtual_irq <= 0) { 512 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 513 info->virtual_irq); 514 return -ENODEV; 515 } 516 host_vtimer_irq = info->virtual_irq; 517 518 host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); 519 if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && 520 host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { 521 kvm_err("Invalid trigger for IRQ%d, assuming level low\n", 522 host_vtimer_irq); 523 host_vtimer_irq_flags = IRQF_TRIGGER_LOW; 524 } 525 526 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 527 "kvm guest timer", kvm_get_running_vcpus()); 528 if (err) { 529 kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", 530 host_vtimer_irq, err); 531 return err; 532 } 533 534 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); 535 536 cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, 537 "kvm/arm/timer:starting", kvm_timer_starting_cpu, 538 kvm_timer_dying_cpu); 539 return err; 540} 541 542void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 543{ 544 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 545 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 546 547 timer_disarm(timer); 548 kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); 549} 550 551int kvm_timer_enable(struct kvm_vcpu *vcpu) 552{ 553 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 554 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 555 struct irq_desc *desc; 556 struct irq_data *data; 557 int phys_irq; 558 int ret; 559 560 if (timer->enabled) 561 return 0; 562 563 /* 564 * Find the physical IRQ number corresponding to the host_vtimer_irq 565 */ 566 desc = irq_to_desc(host_vtimer_irq); 567 if (!desc) { 568 kvm_err("%s: no interrupt descriptor\n", __func__); 569 return -EINVAL; 570 } 571 572 data = irq_desc_get_irq_data(desc); 573 while (data->parent_data) 574 data = data->parent_data; 575 576 phys_irq = data->hwirq; 577 578 /* 579 * Tell the VGIC that the virtual interrupt is tied to a 580 * physical interrupt. We do that once per VCPU. 581 */ 582 ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq); 583 if (ret) 584 return ret; 585 586 timer->enabled = 1; 587 588 return 0; 589} 590 591/* 592 * On VHE system, we only need to configure trap on physical timer and counter 593 * accesses in EL0 and EL1 once, not for every world switch. 594 * The host kernel runs at EL2 with HCR_EL2.TGE == 1, 595 * and this makes those bits have no effect for the host kernel execution. 596 */ 597void kvm_timer_init_vhe(void) 598{ 599 /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */ 600 u32 cnthctl_shift = 10; 601 u64 val; 602 603 /* 604 * Disallow physical timer access for the guest. 605 * Physical counter access is allowed. 606 */ 607 val = read_sysreg(cnthctl_el2); 608 val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift); 609 val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); 610 write_sysreg(val, cnthctl_el2); 611}