virt/kvm/arm/arm.c at v5.2-rc6

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / virt / kvm / arm / arm.c
at v5.2-rc6 1724 lines 38 kB view raw
wrap content
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
   4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
   5 */
   6
   7#include <linux/bug.h>
   8#include <linux/cpu_pm.h>
   9#include <linux/errno.h>
  10#include <linux/err.h>
  11#include <linux/kvm_host.h>
  12#include <linux/list.h>
  13#include <linux/module.h>
  14#include <linux/vmalloc.h>
  15#include <linux/fs.h>
  16#include <linux/mman.h>
  17#include <linux/sched.h>
  18#include <linux/kvm.h>
  19#include <linux/kvm_irqfd.h>
  20#include <linux/irqbypass.h>
  21#include <linux/sched/stat.h>
  22#include <trace/events/kvm.h>
  23#include <kvm/arm_pmu.h>
  24#include <kvm/arm_psci.h>
  25
  26#define CREATE_TRACE_POINTS
  27#include "trace.h"
  28
  29#include <linux/uaccess.h>
  30#include <asm/ptrace.h>
  31#include <asm/mman.h>
  32#include <asm/tlbflush.h>
  33#include <asm/cacheflush.h>
  34#include <asm/cpufeature.h>
  35#include <asm/virt.h>
  36#include <asm/kvm_arm.h>
  37#include <asm/kvm_asm.h>
  38#include <asm/kvm_mmu.h>
  39#include <asm/kvm_emulate.h>
  40#include <asm/kvm_coproc.h>
  41#include <asm/sections.h>
  42
  43#ifdef REQUIRES_VIRT
  44__asm__(".arch_extension	virt");
  45#endif
  46
  47DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data);
  48static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
  49
  50/* Per-CPU variable containing the currently running vcpu. */
  51static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
  52
  53/* The VMID used in the VTTBR */
  54static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
  55static u32 kvm_next_vmid;
  56static DEFINE_SPINLOCK(kvm_vmid_lock);
  57
  58static bool vgic_present;
  59
  60static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
  61
  62static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
  63{
  64	__this_cpu_write(kvm_arm_running_vcpu, vcpu);
  65}
  66
  67DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
  68
  69/**
  70 * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
  71 * Must be called from non-preemptible context
  72 */
  73struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
  74{
  75	return __this_cpu_read(kvm_arm_running_vcpu);
  76}
  77
  78/**
  79 * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
  80 */
  81struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
  82{
  83	return &kvm_arm_running_vcpu;
  84}
  85
  86int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
  87{
  88	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
  89}
  90
  91int kvm_arch_hardware_setup(void)
  92{
  93	return 0;
  94}
  95
  96void kvm_arch_check_processor_compat(void *rtn)
  97{
  98	*(int *)rtn = 0;
  99}
 100
 101
 102/**
 103 * kvm_arch_init_vm - initializes a VM data structure
 104 * @kvm:	pointer to the KVM struct
 105 */
 106int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 107{
 108	int ret, cpu;
 109
 110	ret = kvm_arm_setup_stage2(kvm, type);
 111	if (ret)
 112		return ret;
 113
 114	kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
 115	if (!kvm->arch.last_vcpu_ran)
 116		return -ENOMEM;
 117
 118	for_each_possible_cpu(cpu)
 119		*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
 120
 121	ret = kvm_alloc_stage2_pgd(kvm);
 122	if (ret)
 123		goto out_fail_alloc;
 124
 125	ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
 126	if (ret)
 127		goto out_free_stage2_pgd;
 128
 129	kvm_vgic_early_init(kvm);
 130
 131	/* Mark the initial VMID generation invalid */
 132	kvm->arch.vmid.vmid_gen = 0;
 133
 134	/* The maximum number of VCPUs is limited by the host's GIC model */
 135	kvm->arch.max_vcpus = vgic_present ?
 136				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
 137
 138	return ret;
 139out_free_stage2_pgd:
 140	kvm_free_stage2_pgd(kvm);
 141out_fail_alloc:
 142	free_percpu(kvm->arch.last_vcpu_ran);
 143	kvm->arch.last_vcpu_ran = NULL;
 144	return ret;
 145}
 146
 147bool kvm_arch_has_vcpu_debugfs(void)
 148{
 149	return false;
 150}
 151
 152int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
 153{
 154	return 0;
 155}
 156
 157vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 158{
 159	return VM_FAULT_SIGBUS;
 160}
 161
 162
 163/**
 164 * kvm_arch_destroy_vm - destroy the VM data structure
 165 * @kvm:	pointer to the KVM struct
 166 */
 167void kvm_arch_destroy_vm(struct kvm *kvm)
 168{
 169	int i;
 170
 171	kvm_vgic_destroy(kvm);
 172
 173	free_percpu(kvm->arch.last_vcpu_ran);
 174	kvm->arch.last_vcpu_ran = NULL;
 175
 176	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 177		if (kvm->vcpus[i]) {
 178			kvm_arch_vcpu_free(kvm->vcpus[i]);
 179			kvm->vcpus[i] = NULL;
 180		}
 181	}
 182	atomic_set(&kvm->online_vcpus, 0);
 183}
 184
 185int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 186{
 187	int r;
 188	switch (ext) {
 189	case KVM_CAP_IRQCHIP:
 190		r = vgic_present;
 191		break;
 192	case KVM_CAP_IOEVENTFD:
 193	case KVM_CAP_DEVICE_CTRL:
 194	case KVM_CAP_USER_MEMORY:
 195	case KVM_CAP_SYNC_MMU:
 196	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 197	case KVM_CAP_ONE_REG:
 198	case KVM_CAP_ARM_PSCI:
 199	case KVM_CAP_ARM_PSCI_0_2:
 200	case KVM_CAP_READONLY_MEM:
 201	case KVM_CAP_MP_STATE:
 202	case KVM_CAP_IMMEDIATE_EXIT:
 203	case KVM_CAP_VCPU_EVENTS:
 204		r = 1;
 205		break;
 206	case KVM_CAP_ARM_SET_DEVICE_ADDR:
 207		r = 1;
 208		break;
 209	case KVM_CAP_NR_VCPUS:
 210		r = num_online_cpus();
 211		break;
 212	case KVM_CAP_MAX_VCPUS:
 213		r = KVM_MAX_VCPUS;
 214		break;
 215	case KVM_CAP_MAX_VCPU_ID:
 216		r = KVM_MAX_VCPU_ID;
 217		break;
 218	case KVM_CAP_MSI_DEVID:
 219		if (!kvm)
 220			r = -EINVAL;
 221		else
 222			r = kvm->arch.vgic.msis_require_devid;
 223		break;
 224	case KVM_CAP_ARM_USER_IRQ:
 225		/*
 226		 * 1: EL1_VTIMER, EL1_PTIMER, and PMU.
 227		 * (bump this number if adding more devices)
 228		 */
 229		r = 1;
 230		break;
 231	default:
 232		r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
 233		break;
 234	}
 235	return r;
 236}
 237
 238long kvm_arch_dev_ioctl(struct file *filp,
 239			unsigned int ioctl, unsigned long arg)
 240{
 241	return -EINVAL;
 242}
 243
 244struct kvm *kvm_arch_alloc_vm(void)
 245{
 246	if (!has_vhe())
 247		return kzalloc(sizeof(struct kvm), GFP_KERNEL);
 248
 249	return vzalloc(sizeof(struct kvm));
 250}
 251
 252void kvm_arch_free_vm(struct kvm *kvm)
 253{
 254	if (!has_vhe())
 255		kfree(kvm);
 256	else
 257		vfree(kvm);
 258}
 259
 260struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 261{
 262	int err;
 263	struct kvm_vcpu *vcpu;
 264
 265	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
 266		err = -EBUSY;
 267		goto out;
 268	}
 269
 270	if (id >= kvm->arch.max_vcpus) {
 271		err = -EINVAL;
 272		goto out;
 273	}
 274
 275	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 276	if (!vcpu) {
 277		err = -ENOMEM;
 278		goto out;
 279	}
 280
 281	err = kvm_vcpu_init(vcpu, kvm, id);
 282	if (err)
 283		goto free_vcpu;
 284
 285	err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
 286	if (err)
 287		goto vcpu_uninit;
 288
 289	return vcpu;
 290vcpu_uninit:
 291	kvm_vcpu_uninit(vcpu);
 292free_vcpu:
 293	kmem_cache_free(kvm_vcpu_cache, vcpu);
 294out:
 295	return ERR_PTR(err);
 296}
 297
 298void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 299{
 300}
 301
 302void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 303{
 304	if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
 305		static_branch_dec(&userspace_irqchip_in_use);
 306
 307	kvm_mmu_free_memory_caches(vcpu);
 308	kvm_timer_vcpu_terminate(vcpu);
 309	kvm_pmu_vcpu_destroy(vcpu);
 310	kvm_vcpu_uninit(vcpu);
 311	kmem_cache_free(kvm_vcpu_cache, vcpu);
 312}
 313
 314void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 315{
 316	kvm_arch_vcpu_free(vcpu);
 317}
 318
 319int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 320{
 321	return kvm_timer_is_pending(vcpu);
 322}
 323
 324void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 325{
 326	kvm_vgic_v4_enable_doorbell(vcpu);
 327}
 328
 329void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 330{
 331	kvm_vgic_v4_disable_doorbell(vcpu);
 332}
 333
 334int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 335{
 336	/* Force users to call KVM_ARM_VCPU_INIT */
 337	vcpu->arch.target = -1;
 338	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 339
 340	/* Set up the timer */
 341	kvm_timer_vcpu_init(vcpu);
 342
 343	kvm_arm_reset_debug_ptr(vcpu);
 344
 345	return kvm_vgic_vcpu_init(vcpu);
 346}
 347
 348void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 349{
 350	int *last_ran;
 351	kvm_host_data_t *cpu_data;
 352
 353	last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
 354	cpu_data = this_cpu_ptr(&kvm_host_data);
 355
 356	/*
 357	 * We might get preempted before the vCPU actually runs, but
 358	 * over-invalidation doesn't affect correctness.
 359	 */
 360	if (*last_ran != vcpu->vcpu_id) {
 361		kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
 362		*last_ran = vcpu->vcpu_id;
 363	}
 364
 365	vcpu->cpu = cpu;
 366	vcpu->arch.host_cpu_context = &cpu_data->host_ctxt;
 367
 368	kvm_arm_set_running_vcpu(vcpu);
 369	kvm_vgic_load(vcpu);
 370	kvm_timer_vcpu_load(vcpu);
 371	kvm_vcpu_load_sysregs(vcpu);
 372	kvm_arch_vcpu_load_fp(vcpu);
 373	kvm_vcpu_pmu_restore_guest(vcpu);
 374
 375	if (single_task_running())
 376		vcpu_clear_wfe_traps(vcpu);
 377	else
 378		vcpu_set_wfe_traps(vcpu);
 379
 380	vcpu_ptrauth_setup_lazy(vcpu);
 381}
 382
 383void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 384{
 385	kvm_arch_vcpu_put_fp(vcpu);
 386	kvm_vcpu_put_sysregs(vcpu);
 387	kvm_timer_vcpu_put(vcpu);
 388	kvm_vgic_put(vcpu);
 389	kvm_vcpu_pmu_restore_host(vcpu);
 390
 391	vcpu->cpu = -1;
 392
 393	kvm_arm_set_running_vcpu(NULL);
 394}
 395
 396static void vcpu_power_off(struct kvm_vcpu *vcpu)
 397{
 398	vcpu->arch.power_off = true;
 399	kvm_make_request(KVM_REQ_SLEEP, vcpu);
 400	kvm_vcpu_kick(vcpu);
 401}
 402
 403int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 404				    struct kvm_mp_state *mp_state)
 405{
 406	if (vcpu->arch.power_off)
 407		mp_state->mp_state = KVM_MP_STATE_STOPPED;
 408	else
 409		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
 410
 411	return 0;
 412}
 413
 414int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 415				    struct kvm_mp_state *mp_state)
 416{
 417	int ret = 0;
 418
 419	switch (mp_state->mp_state) {
 420	case KVM_MP_STATE_RUNNABLE:
 421		vcpu->arch.power_off = false;
 422		break;
 423	case KVM_MP_STATE_STOPPED:
 424		vcpu_power_off(vcpu);
 425		break;
 426	default:
 427		ret = -EINVAL;
 428	}
 429
 430	return ret;
 431}
 432
 433/**
 434 * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
 435 * @v:		The VCPU pointer
 436 *
 437 * If the guest CPU is not waiting for interrupts or an interrupt line is
 438 * asserted, the CPU is by definition runnable.
 439 */
 440int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 441{
 442	bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
 443	return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
 444		&& !v->arch.power_off && !v->arch.pause);
 445}
 446
 447bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
 448{
 449	return vcpu_mode_priv(vcpu);
 450}
 451
 452/* Just ensure a guest exit from a particular CPU */
 453static void exit_vm_noop(void *info)
 454{
 455}
 456
 457void force_vm_exit(const cpumask_t *mask)
 458{
 459	preempt_disable();
 460	smp_call_function_many(mask, exit_vm_noop, NULL, true);
 461	preempt_enable();
 462}
 463
 464/**
 465 * need_new_vmid_gen - check that the VMID is still valid
 466 * @vmid: The VMID to check
 467 *
 468 * return true if there is a new generation of VMIDs being used
 469 *
 470 * The hardware supports a limited set of values with the value zero reserved
 471 * for the host, so we check if an assigned value belongs to a previous
 472 * generation, which which requires us to assign a new value. If we're the
 473 * first to use a VMID for the new generation, we must flush necessary caches
 474 * and TLBs on all CPUs.
 475 */
 476static bool need_new_vmid_gen(struct kvm_vmid *vmid)
 477{
 478	u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
 479	smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
 480	return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
 481}
 482
 483/**
 484 * update_vmid - Update the vmid with a valid VMID for the current generation
 485 * @kvm: The guest that struct vmid belongs to
 486 * @vmid: The stage-2 VMID information struct
 487 */
 488static void update_vmid(struct kvm_vmid *vmid)
 489{
 490	if (!need_new_vmid_gen(vmid))
 491		return;
 492
 493	spin_lock(&kvm_vmid_lock);
 494
 495	/*
 496	 * We need to re-check the vmid_gen here to ensure that if another vcpu
 497	 * already allocated a valid vmid for this vm, then this vcpu should
 498	 * use the same vmid.
 499	 */
 500	if (!need_new_vmid_gen(vmid)) {
 501		spin_unlock(&kvm_vmid_lock);
 502		return;
 503	}
 504
 505	/* First user of a new VMID generation? */
 506	if (unlikely(kvm_next_vmid == 0)) {
 507		atomic64_inc(&kvm_vmid_gen);
 508		kvm_next_vmid = 1;
 509
 510		/*
 511		 * On SMP we know no other CPUs can use this CPU's or each
 512		 * other's VMID after force_vm_exit returns since the
 513		 * kvm_vmid_lock blocks them from reentry to the guest.
 514		 */
 515		force_vm_exit(cpu_all_mask);
 516		/*
 517		 * Now broadcast TLB + ICACHE invalidation over the inner
 518		 * shareable domain to make sure all data structures are
 519		 * clean.
 520		 */
 521		kvm_call_hyp(__kvm_flush_vm_context);
 522	}
 523
 524	vmid->vmid = kvm_next_vmid;
 525	kvm_next_vmid++;
 526	kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
 527
 528	smp_wmb();
 529	WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
 530
 531	spin_unlock(&kvm_vmid_lock);
 532}
 533
 534static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 535{
 536	struct kvm *kvm = vcpu->kvm;
 537	int ret = 0;
 538
 539	if (likely(vcpu->arch.has_run_once))
 540		return 0;
 541
 542	if (!kvm_arm_vcpu_is_finalized(vcpu))
 543		return -EPERM;
 544
 545	vcpu->arch.has_run_once = true;
 546
 547	if (likely(irqchip_in_kernel(kvm))) {
 548		/*
 549		 * Map the VGIC hardware resources before running a vcpu the
 550		 * first time on this VM.
 551		 */
 552		if (unlikely(!vgic_ready(kvm))) {
 553			ret = kvm_vgic_map_resources(kvm);
 554			if (ret)
 555				return ret;
 556		}
 557	} else {
 558		/*
 559		 * Tell the rest of the code that there are userspace irqchip
 560		 * VMs in the wild.
 561		 */
 562		static_branch_inc(&userspace_irqchip_in_use);
 563	}
 564
 565	ret = kvm_timer_enable(vcpu);
 566	if (ret)
 567		return ret;
 568
 569	ret = kvm_arm_pmu_v3_enable(vcpu);
 570
 571	return ret;
 572}
 573
 574bool kvm_arch_intc_initialized(struct kvm *kvm)
 575{
 576	return vgic_initialized(kvm);
 577}
 578
 579void kvm_arm_halt_guest(struct kvm *kvm)
 580{
 581	int i;
 582	struct kvm_vcpu *vcpu;
 583
 584	kvm_for_each_vcpu(i, vcpu, kvm)
 585		vcpu->arch.pause = true;
 586	kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
 587}
 588
 589void kvm_arm_resume_guest(struct kvm *kvm)
 590{
 591	int i;
 592	struct kvm_vcpu *vcpu;
 593
 594	kvm_for_each_vcpu(i, vcpu, kvm) {
 595		vcpu->arch.pause = false;
 596		swake_up_one(kvm_arch_vcpu_wq(vcpu));
 597	}
 598}
 599
 600static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 601{
 602	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
 603
 604	swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
 605				       (!vcpu->arch.pause)));
 606
 607	if (vcpu->arch.power_off || vcpu->arch.pause) {
 608		/* Awaken to handle a signal, request we sleep again later. */
 609		kvm_make_request(KVM_REQ_SLEEP, vcpu);
 610	}
 611
 612	/*
 613	 * Make sure we will observe a potential reset request if we've
 614	 * observed a change to the power state. Pairs with the smp_wmb() in
 615	 * kvm_psci_vcpu_on().
 616	 */
 617	smp_rmb();
 618}
 619
 620static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
 621{
 622	return vcpu->arch.target >= 0;
 623}
 624
 625static void check_vcpu_requests(struct kvm_vcpu *vcpu)
 626{
 627	if (kvm_request_pending(vcpu)) {
 628		if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
 629			vcpu_req_sleep(vcpu);
 630
 631		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
 632			kvm_reset_vcpu(vcpu);
 633
 634		/*
 635		 * Clear IRQ_PENDING requests that were made to guarantee
 636		 * that a VCPU sees new virtual interrupts.
 637		 */
 638		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
 639	}
 640}
 641
 642/**
 643 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
 644 * @vcpu:	The VCPU pointer
 645 * @run:	The kvm_run structure pointer used for userspace state exchange
 646 *
 647 * This function is called through the VCPU_RUN ioctl called from user space. It
 648 * will execute VM code in a loop until the time slice for the process is used
 649 * or some emulation is needed from user space in which case the function will
 650 * return with return value 0 and with the kvm_run structure filled in with the
 651 * required data for the requested emulation.
 652 */
 653int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 654{
 655	int ret;
 656
 657	if (unlikely(!kvm_vcpu_initialized(vcpu)))
 658		return -ENOEXEC;
 659
 660	ret = kvm_vcpu_first_run_init(vcpu);
 661	if (ret)
 662		return ret;
 663
 664	if (run->exit_reason == KVM_EXIT_MMIO) {
 665		ret = kvm_handle_mmio_return(vcpu, vcpu->run);
 666		if (ret)
 667			return ret;
 668	}
 669
 670	if (run->immediate_exit)
 671		return -EINTR;
 672
 673	vcpu_load(vcpu);
 674
 675	kvm_sigset_activate(vcpu);
 676
 677	ret = 1;
 678	run->exit_reason = KVM_EXIT_UNKNOWN;
 679	while (ret > 0) {
 680		/*
 681		 * Check conditions before entering the guest
 682		 */
 683		cond_resched();
 684
 685		update_vmid(&vcpu->kvm->arch.vmid);
 686
 687		check_vcpu_requests(vcpu);
 688
 689		/*
 690		 * Preparing the interrupts to be injected also
 691		 * involves poking the GIC, which must be done in a
 692		 * non-preemptible context.
 693		 */
 694		preempt_disable();
 695
 696		kvm_pmu_flush_hwstate(vcpu);
 697
 698		local_irq_disable();
 699
 700		kvm_vgic_flush_hwstate(vcpu);
 701
 702		/*
 703		 * Exit if we have a signal pending so that we can deliver the
 704		 * signal to user space.
 705		 */
 706		if (signal_pending(current)) {
 707			ret = -EINTR;
 708			run->exit_reason = KVM_EXIT_INTR;
 709		}
 710
 711		/*
 712		 * If we're using a userspace irqchip, then check if we need
 713		 * to tell a userspace irqchip about timer or PMU level
 714		 * changes and if so, exit to userspace (the actual level
 715		 * state gets updated in kvm_timer_update_run and
 716		 * kvm_pmu_update_run below).
 717		 */
 718		if (static_branch_unlikely(&userspace_irqchip_in_use)) {
 719			if (kvm_timer_should_notify_user(vcpu) ||
 720			    kvm_pmu_should_notify_user(vcpu)) {
 721				ret = -EINTR;
 722				run->exit_reason = KVM_EXIT_INTR;
 723			}
 724		}
 725
 726		/*
 727		 * Ensure we set mode to IN_GUEST_MODE after we disable
 728		 * interrupts and before the final VCPU requests check.
 729		 * See the comment in kvm_vcpu_exiting_guest_mode() and
 730		 * Documentation/virtual/kvm/vcpu-requests.rst
 731		 */
 732		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 733
 734		if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
 735		    kvm_request_pending(vcpu)) {
 736			vcpu->mode = OUTSIDE_GUEST_MODE;
 737			isb(); /* Ensure work in x_flush_hwstate is committed */
 738			kvm_pmu_sync_hwstate(vcpu);
 739			if (static_branch_unlikely(&userspace_irqchip_in_use))
 740				kvm_timer_sync_hwstate(vcpu);
 741			kvm_vgic_sync_hwstate(vcpu);
 742			local_irq_enable();
 743			preempt_enable();
 744			continue;
 745		}
 746
 747		kvm_arm_setup_debug(vcpu);
 748
 749		/**************************************************************
 750		 * Enter the guest
 751		 */
 752		trace_kvm_entry(*vcpu_pc(vcpu));
 753		guest_enter_irqoff();
 754
 755		if (has_vhe()) {
 756			kvm_arm_vhe_guest_enter();
 757			ret = kvm_vcpu_run_vhe(vcpu);
 758			kvm_arm_vhe_guest_exit();
 759		} else {
 760			ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
 761		}
 762
 763		vcpu->mode = OUTSIDE_GUEST_MODE;
 764		vcpu->stat.exits++;
 765		/*
 766		 * Back from guest
 767		 *************************************************************/
 768
 769		kvm_arm_clear_debug(vcpu);
 770
 771		/*
 772		 * We must sync the PMU state before the vgic state so
 773		 * that the vgic can properly sample the updated state of the
 774		 * interrupt line.
 775		 */
 776		kvm_pmu_sync_hwstate(vcpu);
 777
 778		/*
 779		 * Sync the vgic state before syncing the timer state because
 780		 * the timer code needs to know if the virtual timer
 781		 * interrupts are active.
 782		 */
 783		kvm_vgic_sync_hwstate(vcpu);
 784
 785		/*
 786		 * Sync the timer hardware state before enabling interrupts as
 787		 * we don't want vtimer interrupts to race with syncing the
 788		 * timer virtual interrupt state.
 789		 */
 790		if (static_branch_unlikely(&userspace_irqchip_in_use))
 791			kvm_timer_sync_hwstate(vcpu);
 792
 793		kvm_arch_vcpu_ctxsync_fp(vcpu);
 794
 795		/*
 796		 * We may have taken a host interrupt in HYP mode (ie
 797		 * while executing the guest). This interrupt is still
 798		 * pending, as we haven't serviced it yet!
 799		 *
 800		 * We're now back in SVC mode, with interrupts
 801		 * disabled.  Enabling the interrupts now will have
 802		 * the effect of taking the interrupt again, in SVC
 803		 * mode this time.
 804		 */
 805		local_irq_enable();
 806
 807		/*
 808		 * We do local_irq_enable() before calling guest_exit() so
 809		 * that if a timer interrupt hits while running the guest we
 810		 * account that tick as being spent in the guest.  We enable
 811		 * preemption after calling guest_exit() so that if we get
 812		 * preempted we make sure ticks after that is not counted as
 813		 * guest time.
 814		 */
 815		guest_exit();
 816		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
 817
 818		/* Exit types that need handling before we can be preempted */
 819		handle_exit_early(vcpu, run, ret);
 820
 821		preempt_enable();
 822
 823		ret = handle_exit(vcpu, run, ret);
 824	}
 825
 826	/* Tell userspace about in-kernel device output levels */
 827	if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
 828		kvm_timer_update_run(vcpu);
 829		kvm_pmu_update_run(vcpu);
 830	}
 831
 832	kvm_sigset_deactivate(vcpu);
 833
 834	vcpu_put(vcpu);
 835	return ret;
 836}
 837
 838static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
 839{
 840	int bit_index;
 841	bool set;
 842	unsigned long *hcr;
 843
 844	if (number == KVM_ARM_IRQ_CPU_IRQ)
 845		bit_index = __ffs(HCR_VI);
 846	else /* KVM_ARM_IRQ_CPU_FIQ */
 847		bit_index = __ffs(HCR_VF);
 848
 849	hcr = vcpu_hcr(vcpu);
 850	if (level)
 851		set = test_and_set_bit(bit_index, hcr);
 852	else
 853		set = test_and_clear_bit(bit_index, hcr);
 854
 855	/*
 856	 * If we didn't change anything, no need to wake up or kick other CPUs
 857	 */
 858	if (set == level)
 859		return 0;
 860
 861	/*
 862	 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
 863	 * trigger a world-switch round on the running physical CPU to set the
 864	 * virtual IRQ/FIQ fields in the HCR appropriately.
 865	 */
 866	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 867	kvm_vcpu_kick(vcpu);
 868
 869	return 0;
 870}
 871
 872int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 873			  bool line_status)
 874{
 875	u32 irq = irq_level->irq;
 876	unsigned int irq_type, vcpu_idx, irq_num;
 877	int nrcpus = atomic_read(&kvm->online_vcpus);
 878	struct kvm_vcpu *vcpu = NULL;
 879	bool level = irq_level->level;
 880
 881	irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
 882	vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
 883	irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
 884
 885	trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
 886
 887	switch (irq_type) {
 888	case KVM_ARM_IRQ_TYPE_CPU:
 889		if (irqchip_in_kernel(kvm))
 890			return -ENXIO;
 891
 892		if (vcpu_idx >= nrcpus)
 893			return -EINVAL;
 894
 895		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 896		if (!vcpu)
 897			return -EINVAL;
 898
 899		if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
 900			return -EINVAL;
 901
 902		return vcpu_interrupt_line(vcpu, irq_num, level);
 903	case KVM_ARM_IRQ_TYPE_PPI:
 904		if (!irqchip_in_kernel(kvm))
 905			return -ENXIO;
 906
 907		if (vcpu_idx >= nrcpus)
 908			return -EINVAL;
 909
 910		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 911		if (!vcpu)
 912			return -EINVAL;
 913
 914		if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
 915			return -EINVAL;
 916
 917		return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
 918	case KVM_ARM_IRQ_TYPE_SPI:
 919		if (!irqchip_in_kernel(kvm))
 920			return -ENXIO;
 921
 922		if (irq_num < VGIC_NR_PRIVATE_IRQS)
 923			return -EINVAL;
 924
 925		return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
 926	}
 927
 928	return -EINVAL;
 929}
 930
 931static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 932			       const struct kvm_vcpu_init *init)
 933{
 934	unsigned int i, ret;
 935	int phys_target = kvm_target_cpu();
 936
 937	if (init->target != phys_target)
 938		return -EINVAL;
 939
 940	/*
 941	 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
 942	 * use the same target.
 943	 */
 944	if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
 945		return -EINVAL;
 946
 947	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
 948	for (i = 0; i < sizeof(init->features) * 8; i++) {
 949		bool set = (init->features[i / 32] & (1 << (i % 32)));
 950
 951		if (set && i >= KVM_VCPU_MAX_FEATURES)
 952			return -ENOENT;
 953
 954		/*
 955		 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
 956		 * use the same feature set.
 957		 */
 958		if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
 959		    test_bit(i, vcpu->arch.features) != set)
 960			return -EINVAL;
 961
 962		if (set)
 963			set_bit(i, vcpu->arch.features);
 964	}
 965
 966	vcpu->arch.target = phys_target;
 967
 968	/* Now we know what it is, we can reset it. */
 969	ret = kvm_reset_vcpu(vcpu);
 970	if (ret) {
 971		vcpu->arch.target = -1;
 972		bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 973	}
 974
 975	return ret;
 976}
 977
 978static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 979					 struct kvm_vcpu_init *init)
 980{
 981	int ret;
 982
 983	ret = kvm_vcpu_set_target(vcpu, init);
 984	if (ret)
 985		return ret;
 986
 987	/*
 988	 * Ensure a rebooted VM will fault in RAM pages and detect if the
 989	 * guest MMU is turned off and flush the caches as needed.
 990	 */
 991	if (vcpu->arch.has_run_once)
 992		stage2_unmap_vm(vcpu->kvm);
 993
 994	vcpu_reset_hcr(vcpu);
 995
 996	/*
 997	 * Handle the "start in power-off" case.
 998	 */
 999	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
1000		vcpu_power_off(vcpu);
1001	else
1002		vcpu->arch.power_off = false;
1003
1004	return 0;
1005}
1006
1007static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
1008				 struct kvm_device_attr *attr)
1009{
1010	int ret = -ENXIO;
1011
1012	switch (attr->group) {
1013	default:
1014		ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
1015		break;
1016	}
1017
1018	return ret;
1019}
1020
1021static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
1022				 struct kvm_device_attr *attr)
1023{
1024	int ret = -ENXIO;
1025
1026	switch (attr->group) {
1027	default:
1028		ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
1029		break;
1030	}
1031
1032	return ret;
1033}
1034
1035static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
1036				 struct kvm_device_attr *attr)
1037{
1038	int ret = -ENXIO;
1039
1040	switch (attr->group) {
1041	default:
1042		ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
1043		break;
1044	}
1045
1046	return ret;
1047}
1048
1049static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
1050				   struct kvm_vcpu_events *events)
1051{
1052	memset(events, 0, sizeof(*events));
1053
1054	return __kvm_arm_vcpu_get_events(vcpu, events);
1055}
1056
1057static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
1058				   struct kvm_vcpu_events *events)
1059{
1060	int i;
1061
1062	/* check whether the reserved field is zero */
1063	for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
1064		if (events->reserved[i])
1065			return -EINVAL;
1066
1067	/* check whether the pad field is zero */
1068	for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
1069		if (events->exception.pad[i])
1070			return -EINVAL;
1071
1072	return __kvm_arm_vcpu_set_events(vcpu, events);
1073}
1074
1075long kvm_arch_vcpu_ioctl(struct file *filp,
1076			 unsigned int ioctl, unsigned long arg)
1077{
1078	struct kvm_vcpu *vcpu = filp->private_data;
1079	void __user *argp = (void __user *)arg;
1080	struct kvm_device_attr attr;
1081	long r;
1082
1083	switch (ioctl) {
1084	case KVM_ARM_VCPU_INIT: {
1085		struct kvm_vcpu_init init;
1086
1087		r = -EFAULT;
1088		if (copy_from_user(&init, argp, sizeof(init)))
1089			break;
1090
1091		r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
1092		break;
1093	}
1094	case KVM_SET_ONE_REG:
1095	case KVM_GET_ONE_REG: {
1096		struct kvm_one_reg reg;
1097
1098		r = -ENOEXEC;
1099		if (unlikely(!kvm_vcpu_initialized(vcpu)))
1100			break;
1101
1102		r = -EFAULT;
1103		if (copy_from_user(&reg, argp, sizeof(reg)))
1104			break;
1105
1106		if (ioctl == KVM_SET_ONE_REG)
1107			r = kvm_arm_set_reg(vcpu, &reg);
1108		else
1109			r = kvm_arm_get_reg(vcpu, &reg);
1110		break;
1111	}
1112	case KVM_GET_REG_LIST: {
1113		struct kvm_reg_list __user *user_list = argp;
1114		struct kvm_reg_list reg_list;
1115		unsigned n;
1116
1117		r = -ENOEXEC;
1118		if (unlikely(!kvm_vcpu_initialized(vcpu)))
1119			break;
1120
1121		r = -EPERM;
1122		if (!kvm_arm_vcpu_is_finalized(vcpu))
1123			break;
1124
1125		r = -EFAULT;
1126		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
1127			break;
1128		n = reg_list.n;
1129		reg_list.n = kvm_arm_num_regs(vcpu);
1130		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
1131			break;
1132		r = -E2BIG;
1133		if (n < reg_list.n)
1134			break;
1135		r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
1136		break;
1137	}
1138	case KVM_SET_DEVICE_ATTR: {
1139		r = -EFAULT;
1140		if (copy_from_user(&attr, argp, sizeof(attr)))
1141			break;
1142		r = kvm_arm_vcpu_set_attr(vcpu, &attr);
1143		break;
1144	}
1145	case KVM_GET_DEVICE_ATTR: {
1146		r = -EFAULT;
1147		if (copy_from_user(&attr, argp, sizeof(attr)))
1148			break;
1149		r = kvm_arm_vcpu_get_attr(vcpu, &attr);
1150		break;
1151	}
1152	case KVM_HAS_DEVICE_ATTR: {
1153		r = -EFAULT;
1154		if (copy_from_user(&attr, argp, sizeof(attr)))
1155			break;
1156		r = kvm_arm_vcpu_has_attr(vcpu, &attr);
1157		break;
1158	}
1159	case KVM_GET_VCPU_EVENTS: {
1160		struct kvm_vcpu_events events;
1161
1162		if (kvm_arm_vcpu_get_events(vcpu, &events))
1163			return -EINVAL;
1164
1165		if (copy_to_user(argp, &events, sizeof(events)))
1166			return -EFAULT;
1167
1168		return 0;
1169	}
1170	case KVM_SET_VCPU_EVENTS: {
1171		struct kvm_vcpu_events events;
1172
1173		if (copy_from_user(&events, argp, sizeof(events)))
1174			return -EFAULT;
1175
1176		return kvm_arm_vcpu_set_events(vcpu, &events);
1177	}
1178	case KVM_ARM_VCPU_FINALIZE: {
1179		int what;
1180
1181		if (!kvm_vcpu_initialized(vcpu))
1182			return -ENOEXEC;
1183
1184		if (get_user(what, (const int __user *)argp))
1185			return -EFAULT;
1186
1187		return kvm_arm_vcpu_finalize(vcpu, what);
1188	}
1189	default:
1190		r = -EINVAL;
1191	}
1192
1193	return r;
1194}
1195
1196/**
1197 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
1198 * @kvm: kvm instance
1199 * @log: slot id and address to which we copy the log
1200 *
1201 * Steps 1-4 below provide general overview of dirty page logging. See
1202 * kvm_get_dirty_log_protect() function description for additional details.
1203 *
1204 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
1205 * always flush the TLB (step 4) even if previous step failed  and the dirty
1206 * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
1207 * does not preclude user space subsequent dirty log read. Flushing TLB ensures
1208 * writes will be marked dirty for next log read.
1209 *
1210 *   1. Take a snapshot of the bit and clear it if needed.
1211 *   2. Write protect the corresponding page.
1212 *   3. Copy the snapshot to the userspace.
1213 *   4. Flush TLB's if needed.
1214 */
1215int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1216{
1217	bool flush = false;
1218	int r;
1219
1220	mutex_lock(&kvm->slots_lock);
1221
1222	r = kvm_get_dirty_log_protect(kvm, log, &flush);
1223
1224	if (flush)
1225		kvm_flush_remote_tlbs(kvm);
1226
1227	mutex_unlock(&kvm->slots_lock);
1228	return r;
1229}
1230
1231int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
1232{
1233	bool flush = false;
1234	int r;
1235
1236	mutex_lock(&kvm->slots_lock);
1237
1238	r = kvm_clear_dirty_log_protect(kvm, log, &flush);
1239
1240	if (flush)
1241		kvm_flush_remote_tlbs(kvm);
1242
1243	mutex_unlock(&kvm->slots_lock);
1244	return r;
1245}
1246
1247static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
1248					struct kvm_arm_device_addr *dev_addr)
1249{
1250	unsigned long dev_id, type;
1251
1252	dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
1253		KVM_ARM_DEVICE_ID_SHIFT;
1254	type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
1255		KVM_ARM_DEVICE_TYPE_SHIFT;
1256
1257	switch (dev_id) {
1258	case KVM_ARM_DEVICE_VGIC_V2:
1259		if (!vgic_present)
1260			return -ENXIO;
1261		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
1262	default:
1263		return -ENODEV;
1264	}
1265}
1266
1267long kvm_arch_vm_ioctl(struct file *filp,
1268		       unsigned int ioctl, unsigned long arg)
1269{
1270	struct kvm *kvm = filp->private_data;
1271	void __user *argp = (void __user *)arg;
1272
1273	switch (ioctl) {
1274	case KVM_CREATE_IRQCHIP: {
1275		int ret;
1276		if (!vgic_present)
1277			return -ENXIO;
1278		mutex_lock(&kvm->lock);
1279		ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
1280		mutex_unlock(&kvm->lock);
1281		return ret;
1282	}
1283	case KVM_ARM_SET_DEVICE_ADDR: {
1284		struct kvm_arm_device_addr dev_addr;
1285
1286		if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
1287			return -EFAULT;
1288		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
1289	}
1290	case KVM_ARM_PREFERRED_TARGET: {
1291		int err;
1292		struct kvm_vcpu_init init;
1293
1294		err = kvm_vcpu_preferred_target(&init);
1295		if (err)
1296			return err;
1297
1298		if (copy_to_user(argp, &init, sizeof(init)))
1299			return -EFAULT;
1300
1301		return 0;
1302	}
1303	default:
1304		return -EINVAL;
1305	}
1306}
1307
1308static void cpu_init_hyp_mode(void *dummy)
1309{
1310	phys_addr_t pgd_ptr;
1311	unsigned long hyp_stack_ptr;
1312	unsigned long stack_page;
1313	unsigned long vector_ptr;
1314
1315	/* Switch from the HYP stub to our own HYP init vector */
1316	__hyp_set_vectors(kvm_get_idmap_vector());
1317
1318	pgd_ptr = kvm_mmu_get_httbr();
1319	stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
1320	hyp_stack_ptr = stack_page + PAGE_SIZE;
1321	vector_ptr = (unsigned long)kvm_get_hyp_vector();
1322
1323	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
1324	__cpu_init_stage2();
1325}
1326
1327static void cpu_hyp_reset(void)
1328{
1329	if (!is_kernel_in_hyp_mode())
1330		__hyp_reset_vectors();
1331}
1332
1333static void cpu_hyp_reinit(void)
1334{
1335	cpu_hyp_reset();
1336
1337	if (is_kernel_in_hyp_mode())
1338		kvm_timer_init_vhe();
1339	else
1340		cpu_init_hyp_mode(NULL);
1341
1342	kvm_arm_init_debug();
1343
1344	if (vgic_present)
1345		kvm_vgic_init_cpu_hardware();
1346}
1347
1348static void _kvm_arch_hardware_enable(void *discard)
1349{
1350	if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
1351		cpu_hyp_reinit();
1352		__this_cpu_write(kvm_arm_hardware_enabled, 1);
1353	}
1354}
1355
1356int kvm_arch_hardware_enable(void)
1357{
1358	_kvm_arch_hardware_enable(NULL);
1359	return 0;
1360}
1361
1362static void _kvm_arch_hardware_disable(void *discard)
1363{
1364	if (__this_cpu_read(kvm_arm_hardware_enabled)) {
1365		cpu_hyp_reset();
1366		__this_cpu_write(kvm_arm_hardware_enabled, 0);
1367	}
1368}
1369
1370void kvm_arch_hardware_disable(void)
1371{
1372	_kvm_arch_hardware_disable(NULL);
1373}
1374
1375#ifdef CONFIG_CPU_PM
1376static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
1377				    unsigned long cmd,
1378				    void *v)
1379{
1380	/*
1381	 * kvm_arm_hardware_enabled is left with its old value over
1382	 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
1383	 * re-enable hyp.
1384	 */
1385	switch (cmd) {
1386	case CPU_PM_ENTER:
1387		if (__this_cpu_read(kvm_arm_hardware_enabled))
1388			/*
1389			 * don't update kvm_arm_hardware_enabled here
1390			 * so that the hardware will be re-enabled
1391			 * when we resume. See below.
1392			 */
1393			cpu_hyp_reset();
1394
1395		return NOTIFY_OK;
1396	case CPU_PM_ENTER_FAILED:
1397	case CPU_PM_EXIT:
1398		if (__this_cpu_read(kvm_arm_hardware_enabled))
1399			/* The hardware was enabled before suspend. */
1400			cpu_hyp_reinit();
1401
1402		return NOTIFY_OK;
1403
1404	default:
1405		return NOTIFY_DONE;
1406	}
1407}
1408
1409static struct notifier_block hyp_init_cpu_pm_nb = {
1410	.notifier_call = hyp_init_cpu_pm_notifier,
1411};
1412
1413static void __init hyp_cpu_pm_init(void)
1414{
1415	cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
1416}
1417static void __init hyp_cpu_pm_exit(void)
1418{
1419	cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
1420}
1421#else
1422static inline void hyp_cpu_pm_init(void)
1423{
1424}
1425static inline void hyp_cpu_pm_exit(void)
1426{
1427}
1428#endif
1429
1430static int init_common_resources(void)
1431{
1432	kvm_set_ipa_limit();
1433
1434	return 0;
1435}
1436
1437static int init_subsystems(void)
1438{
1439	int err = 0;
1440
1441	/*
1442	 * Enable hardware so that subsystem initialisation can access EL2.
1443	 */
1444	on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
1445
1446	/*
1447	 * Register CPU lower-power notifier
1448	 */
1449	hyp_cpu_pm_init();
1450
1451	/*
1452	 * Init HYP view of VGIC
1453	 */
1454	err = kvm_vgic_hyp_init();
1455	switch (err) {
1456	case 0:
1457		vgic_present = true;
1458		break;
1459	case -ENODEV:
1460	case -ENXIO:
1461		vgic_present = false;
1462		err = 0;
1463		break;
1464	default:
1465		goto out;
1466	}
1467
1468	/*
1469	 * Init HYP architected timer support
1470	 */
1471	err = kvm_timer_hyp_init(vgic_present);
1472	if (err)
1473		goto out;
1474
1475	kvm_perf_init();
1476	kvm_coproc_table_init();
1477
1478out:
1479	on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
1480
1481	return err;
1482}
1483
1484static void teardown_hyp_mode(void)
1485{
1486	int cpu;
1487
1488	free_hyp_pgds();
1489	for_each_possible_cpu(cpu)
1490		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
1491	hyp_cpu_pm_exit();
1492}
1493
1494/**
1495 * Inits Hyp-mode on all online CPUs
1496 */
1497static int init_hyp_mode(void)
1498{
1499	int cpu;
1500	int err = 0;
1501
1502	/*
1503	 * Allocate Hyp PGD and setup Hyp identity mapping
1504	 */
1505	err = kvm_mmu_init();
1506	if (err)
1507		goto out_err;
1508
1509	/*
1510	 * Allocate stack pages for Hypervisor-mode
1511	 */
1512	for_each_possible_cpu(cpu) {
1513		unsigned long stack_page;
1514
1515		stack_page = __get_free_page(GFP_KERNEL);
1516		if (!stack_page) {
1517			err = -ENOMEM;
1518			goto out_err;
1519		}
1520
1521		per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
1522	}
1523
1524	/*
1525	 * Map the Hyp-code called directly from the host
1526	 */
1527	err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
1528				  kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
1529	if (err) {
1530		kvm_err("Cannot map world-switch code\n");
1531		goto out_err;
1532	}
1533
1534	err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
1535				  kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
1536	if (err) {
1537		kvm_err("Cannot map rodata section\n");
1538		goto out_err;
1539	}
1540
1541	err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
1542				  kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
1543	if (err) {
1544		kvm_err("Cannot map bss section\n");
1545		goto out_err;
1546	}
1547
1548	err = kvm_map_vectors();
1549	if (err) {
1550		kvm_err("Cannot map vectors\n");
1551		goto out_err;
1552	}
1553
1554	/*
1555	 * Map the Hyp stack pages
1556	 */
1557	for_each_possible_cpu(cpu) {
1558		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
1559		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
1560					  PAGE_HYP);
1561
1562		if (err) {
1563			kvm_err("Cannot map hyp stack\n");
1564			goto out_err;
1565		}
1566	}
1567
1568	for_each_possible_cpu(cpu) {
1569		kvm_host_data_t *cpu_data;
1570
1571		cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
1572		kvm_init_host_cpu_context(&cpu_data->host_ctxt, cpu);
1573		err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
1574
1575		if (err) {
1576			kvm_err("Cannot map host CPU state: %d\n", err);
1577			goto out_err;
1578		}
1579	}
1580
1581	err = hyp_map_aux_data();
1582	if (err)
1583		kvm_err("Cannot map host auxiliary data: %d\n", err);
1584
1585	return 0;
1586
1587out_err:
1588	teardown_hyp_mode();
1589	kvm_err("error initializing Hyp mode: %d\n", err);
1590	return err;
1591}
1592
1593static void check_kvm_target_cpu(void *ret)
1594{
1595	*(int *)ret = kvm_target_cpu();
1596}
1597
1598struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
1599{
1600	struct kvm_vcpu *vcpu;
1601	int i;
1602
1603	mpidr &= MPIDR_HWID_BITMASK;
1604	kvm_for_each_vcpu(i, vcpu, kvm) {
1605		if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
1606			return vcpu;
1607	}
1608	return NULL;
1609}
1610
1611bool kvm_arch_has_irq_bypass(void)
1612{
1613	return true;
1614}
1615
1616int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
1617				      struct irq_bypass_producer *prod)
1618{
1619	struct kvm_kernel_irqfd *irqfd =
1620		container_of(cons, struct kvm_kernel_irqfd, consumer);
1621
1622	return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
1623					  &irqfd->irq_entry);
1624}
1625void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
1626				      struct irq_bypass_producer *prod)
1627{
1628	struct kvm_kernel_irqfd *irqfd =
1629		container_of(cons, struct kvm_kernel_irqfd, consumer);
1630
1631	kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
1632				     &irqfd->irq_entry);
1633}
1634
1635void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
1636{
1637	struct kvm_kernel_irqfd *irqfd =
1638		container_of(cons, struct kvm_kernel_irqfd, consumer);
1639
1640	kvm_arm_halt_guest(irqfd->kvm);
1641}
1642
1643void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
1644{
1645	struct kvm_kernel_irqfd *irqfd =
1646		container_of(cons, struct kvm_kernel_irqfd, consumer);
1647
1648	kvm_arm_resume_guest(irqfd->kvm);
1649}
1650
1651/**
1652 * Initialize Hyp-mode and memory mappings on all CPUs.
1653 */
1654int kvm_arch_init(void *opaque)
1655{
1656	int err;
1657	int ret, cpu;
1658	bool in_hyp_mode;
1659
1660	if (!is_hyp_mode_available()) {
1661		kvm_info("HYP mode not available\n");
1662		return -ENODEV;
1663	}
1664
1665	in_hyp_mode = is_kernel_in_hyp_mode();
1666
1667	if (!in_hyp_mode && kvm_arch_requires_vhe()) {
1668		kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n");
1669		return -ENODEV;
1670	}
1671
1672	for_each_online_cpu(cpu) {
1673		smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
1674		if (ret < 0) {
1675			kvm_err("Error, CPU %d not supported!\n", cpu);
1676			return -ENODEV;
1677		}
1678	}
1679
1680	err = init_common_resources();
1681	if (err)
1682		return err;
1683
1684	err = kvm_arm_init_sve();
1685	if (err)
1686		return err;
1687
1688	if (!in_hyp_mode) {
1689		err = init_hyp_mode();
1690		if (err)
1691			goto out_err;
1692	}
1693
1694	err = init_subsystems();
1695	if (err)
1696		goto out_hyp;
1697
1698	if (in_hyp_mode)
1699		kvm_info("VHE mode initialized successfully\n");
1700	else
1701		kvm_info("Hyp mode initialized successfully\n");
1702
1703	return 0;
1704
1705out_hyp:
1706	if (!in_hyp_mode)
1707		teardown_hyp_mode();
1708out_err:
1709	return err;
1710}
1711
1712/* NOP: Compiling as a module not supported */
1713void kvm_arch_exit(void)
1714{
1715	kvm_perf_teardown();
1716}
1717
1718static int arm_init(void)
1719{
1720	int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1721	return rc;
1722}
1723
1724module_init(arm_init);
Configure Feed

Configure Feed