Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

+1

MAINTAINERS

··· 6491 6491 F: include/linux/kvm* 6492 6492 F: include/uapi/linux/kvm* 6493 6493 F: virt/kvm/ 6494 + F: tools/kvm/ 6494 6495 6495 6496 KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V 6496 6497 M: Joerg Roedel <joro@8bytes.org>

+6

arch/arm/include/asm/kvm_host.h

··· 41 41 42 42 #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS 43 43 44 + #define KVM_REQ_VCPU_EXIT 8 45 + 44 46 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); 45 47 int __attribute_const__ kvm_target_cpu(void); 46 48 int kvm_reset_vcpu(struct kvm_vcpu *vcpu); ··· 228 226 229 227 struct kvm_vcpu *kvm_arm_get_running_vcpu(void); 230 228 struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); 229 + void kvm_arm_halt_guest(struct kvm *kvm); 230 + void kvm_arm_resume_guest(struct kvm *kvm); 231 + void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); 232 + void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); 231 233 232 234 int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); 233 235 unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);

+3

arch/arm/include/asm/kvm_mmio.h

··· 28 28 bool sign_extend; 29 29 }; 30 30 31 + void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); 32 + unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); 33 + 31 34 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); 32 35 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, 33 36 phys_addr_t fault_ipa);

+7

arch/arm/kvm/Kconfig

··· 46 46 ---help--- 47 47 Provides host support for ARM processors. 48 48 49 + config KVM_NEW_VGIC 50 + bool "New VGIC implementation" 51 + depends on KVM 52 + default y 53 + ---help--- 54 + uses the new VGIC implementation 55 + 49 56 source drivers/vhost/Kconfig 50 57 51 58 endif # VIRTUALIZATION

+11

arch/arm/kvm/Makefile

··· 21 21 obj-y += kvm-arm.o init.o interrupts.o 22 22 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o 23 23 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o 24 + 25 + ifeq ($(CONFIG_KVM_NEW_VGIC),y) 26 + obj-y += $(KVM)/arm/vgic/vgic.o 27 + obj-y += $(KVM)/arm/vgic/vgic-init.o 28 + obj-y += $(KVM)/arm/vgic/vgic-irqfd.o 29 + obj-y += $(KVM)/arm/vgic/vgic-v2.o 30 + obj-y += $(KVM)/arm/vgic/vgic-mmio.o 31 + obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o 32 + obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o 33 + else 24 34 obj-y += $(KVM)/arm/vgic.o 25 35 obj-y += $(KVM)/arm/vgic-v2.o 26 36 obj-y += $(KVM)/arm/vgic-v2-emul.o 37 + endif 27 38 obj-y += $(KVM)/arm/arch_timer.o

+22 -15

arch/arm/kvm/arm.c

··· 455 455 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) 456 456 { 457 457 struct kvm *kvm = vcpu->kvm; 458 - int ret; 458 + int ret = 0; 459 459 460 460 if (likely(vcpu->arch.has_run_once)) 461 461 return 0; ··· 478 478 * interrupts from the virtual timer with a userspace gic. 479 479 */ 480 480 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) 481 - kvm_timer_enable(kvm); 481 + ret = kvm_timer_enable(vcpu); 482 482 483 - return 0; 483 + return ret; 484 484 } 485 485 486 486 bool kvm_arch_intc_initialized(struct kvm *kvm) ··· 488 488 return vgic_initialized(kvm); 489 489 } 490 490 491 - static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused; 492 - static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused; 493 - 494 - static void kvm_arm_halt_guest(struct kvm *kvm) 491 + void kvm_arm_halt_guest(struct kvm *kvm) 495 492 { 496 493 int i; 497 494 struct kvm_vcpu *vcpu; 498 495 499 496 kvm_for_each_vcpu(i, vcpu, kvm) 500 497 vcpu->arch.pause = true; 501 - force_vm_exit(cpu_all_mask); 498 + kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT); 502 499 } 503 500 504 - static void kvm_arm_resume_guest(struct kvm *kvm) 501 + void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu) 502 + { 503 + vcpu->arch.pause = true; 504 + kvm_vcpu_kick(vcpu); 505 + } 506 + 507 + void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu) 508 + { 509 + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); 510 + 511 + vcpu->arch.pause = false; 512 + swake_up(wq); 513 + } 514 + 515 + void kvm_arm_resume_guest(struct kvm *kvm) 505 516 { 506 517 int i; 507 518 struct kvm_vcpu *vcpu; 508 519 509 - kvm_for_each_vcpu(i, vcpu, kvm) { 510 - struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); 511 - 512 - vcpu->arch.pause = false; 513 - swake_up(wq); 514 - } 520 + kvm_for_each_vcpu(i, vcpu, kvm) 521 + kvm_arm_resume_vcpu(vcpu); 515 522 } 516 523 517 524 static void vcpu_sleep(struct kvm_vcpu *vcpu)

+12 -12

arch/arm/kvm/mmio.c

··· 23 23 24 24 #include "trace.h" 25 25 26 - static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) 26 + void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data) 27 27 { 28 28 void *datap = NULL; 29 29 union { ··· 55 55 memcpy(buf, datap, len); 56 56 } 57 57 58 - static unsigned long mmio_read_buf(char *buf, unsigned int len) 58 + unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len) 59 59 { 60 60 unsigned long data = 0; 61 61 union { ··· 66 66 67 67 switch (len) { 68 68 case 1: 69 - data = buf[0]; 69 + data = *(u8 *)buf; 70 70 break; 71 71 case 2: 72 72 memcpy(&tmp.hword, buf, len); ··· 87 87 88 88 /** 89 89 * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation 90 + * or in-kernel IO emulation 91 + * 90 92 * @vcpu: The VCPU pointer 91 93 * @run: The VCPU run struct containing the mmio data 92 - * 93 - * This should only be called after returning from userspace for MMIO load 94 - * emulation. 95 94 */ 96 95 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) 97 96 { ··· 103 104 if (len > sizeof(unsigned long)) 104 105 return -EINVAL; 105 106 106 - data = mmio_read_buf(run->mmio.data, len); 107 + data = kvm_mmio_read_buf(run->mmio.data, len); 107 108 108 109 if (vcpu->arch.mmio_decode.sign_extend && 109 110 len < sizeof(unsigned long)) { ··· 189 190 len); 190 191 191 192 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); 192 - mmio_write_buf(data_buf, len, data); 193 + kvm_mmio_write_buf(data_buf, len, data); 193 194 194 195 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, 195 196 data_buf); ··· 205 206 run->mmio.is_write = is_write; 206 207 run->mmio.phys_addr = fault_ipa; 207 208 run->mmio.len = len; 208 - if (is_write) 209 - memcpy(run->mmio.data, data_buf, len); 210 209 211 210 if (!ret) { 212 211 /* We handled the access successfully in the kernel. */ 212 + if (!is_write) 213 + memcpy(run->mmio.data, data_buf, len); 213 214 vcpu->stat.mmio_exit_kernel++; 214 215 kvm_handle_mmio_return(vcpu, run); 215 216 return 1; 216 - } else { 217 - vcpu->stat.mmio_exit_user++; 218 217 } 219 218 219 + if (is_write) 220 + memcpy(run->mmio.data, data_buf, len); 221 + vcpu->stat.mmio_exit_user++; 220 222 run->exit_reason = KVM_EXIT_MMIO; 221 223 return 0; 222 224 }

+6

arch/arm64/include/asm/kvm_host.h

··· 43 43 44 44 #define KVM_VCPU_MAX_FEATURES 4 45 45 46 + #define KVM_REQ_VCPU_EXIT 8 47 + 46 48 int __attribute_const__ kvm_target_cpu(void); 47 49 int kvm_reset_vcpu(struct kvm_vcpu *vcpu); 48 50 int kvm_arch_dev_ioctl_check_extension(long ext); ··· 329 327 330 328 struct kvm_vcpu *kvm_arm_get_running_vcpu(void); 331 329 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); 330 + void kvm_arm_halt_guest(struct kvm *kvm); 331 + void kvm_arm_resume_guest(struct kvm *kvm); 332 + void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); 333 + void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); 332 334 333 335 u64 __kvm_call_hyp(void *hypfn, ...); 334 336 #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)

+3

arch/arm64/include/asm/kvm_mmio.h

··· 30 30 bool sign_extend; 31 31 }; 32 32 33 + void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); 34 + unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); 35 + 33 36 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); 34 37 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, 35 38 phys_addr_t fault_ipa);

+7

arch/arm64/kvm/Kconfig

··· 54 54 Adds support for a virtual Performance Monitoring Unit (PMU) in 55 55 virtual machines. 56 56 57 + config KVM_NEW_VGIC 58 + bool "New VGIC implementation" 59 + depends on KVM 60 + default y 61 + ---help--- 62 + uses the new VGIC implementation 63 + 57 64 source drivers/vhost/Kconfig 58 65 59 66 endif # VIRTUALIZATION

+12

arch/arm64/kvm/Makefile

··· 20 20 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o 21 21 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o 22 22 23 + ifeq ($(CONFIG_KVM_NEW_VGIC),y) 24 + kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o 25 + kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o 26 + kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o 27 + kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o 28 + kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o 29 + kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o 30 + kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o 31 + kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o 32 + kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o 33 + else 23 34 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o 24 35 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o 25 36 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o 26 37 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o 27 38 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o 39 + endif 28 40 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o 29 41 kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o

+1 -1

arch/arm64/kvm/inject_fault.c

··· 162 162 esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT); 163 163 164 164 if (!is_iabt) 165 - esr |= ESR_ELx_EC_DABT_LOW; 165 + esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; 166 166 167 167 vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT; 168 168 }

+41 -3

arch/x86/include/uapi/asm/svm.h

··· 2 2 #define _UAPI__SVM_H 3 3 4 4 #define SVM_EXIT_READ_CR0 0x000 5 + #define SVM_EXIT_READ_CR2 0x002 5 6 #define SVM_EXIT_READ_CR3 0x003 6 7 #define SVM_EXIT_READ_CR4 0x004 7 8 #define SVM_EXIT_READ_CR8 0x008 8 9 #define SVM_EXIT_WRITE_CR0 0x010 10 + #define SVM_EXIT_WRITE_CR2 0x012 9 11 #define SVM_EXIT_WRITE_CR3 0x013 10 12 #define SVM_EXIT_WRITE_CR4 0x014 11 13 #define SVM_EXIT_WRITE_CR8 0x018 ··· 82 80 83 81 #define SVM_EXIT_REASONS \ 84 82 { SVM_EXIT_READ_CR0, "read_cr0" }, \ 83 + { SVM_EXIT_READ_CR2, "read_cr2" }, \ 85 84 { SVM_EXIT_READ_CR3, "read_cr3" }, \ 86 85 { SVM_EXIT_READ_CR4, "read_cr4" }, \ 87 86 { SVM_EXIT_READ_CR8, "read_cr8" }, \ 88 87 { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ 88 + { SVM_EXIT_WRITE_CR2, "write_cr2" }, \ 89 89 { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ 90 90 { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ 91 91 { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ ··· 95 91 { SVM_EXIT_READ_DR1, "read_dr1" }, \ 96 92 { SVM_EXIT_READ_DR2, "read_dr2" }, \ 97 93 { SVM_EXIT_READ_DR3, "read_dr3" }, \ 94 + { SVM_EXIT_READ_DR4, "read_dr4" }, \ 95 + { SVM_EXIT_READ_DR5, "read_dr5" }, \ 96 + { SVM_EXIT_READ_DR6, "read_dr6" }, \ 97 + { SVM_EXIT_READ_DR7, "read_dr7" }, \ 98 98 { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ 99 99 { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ 100 100 { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ 101 101 { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ 102 + { SVM_EXIT_WRITE_DR4, "write_dr4" }, \ 102 103 { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ 104 + { SVM_EXIT_WRITE_DR6, "write_dr6" }, \ 103 105 { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ 106 + { SVM_EXIT_EXCP_BASE + DE_VECTOR, "DE excp" }, \ 104 107 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ 105 108 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ 109 + { SVM_EXIT_EXCP_BASE + OF_VECTOR, "OF excp" }, \ 110 + { SVM_EXIT_EXCP_BASE + BR_VECTOR, "BR excp" }, \ 106 111 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ 107 - { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ 108 112 { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ 113 + { SVM_EXIT_EXCP_BASE + DF_VECTOR, "DF excp" }, \ 114 + { SVM_EXIT_EXCP_BASE + TS_VECTOR, "TS excp" }, \ 115 + { SVM_EXIT_EXCP_BASE + NP_VECTOR, "NP excp" }, \ 116 + { SVM_EXIT_EXCP_BASE + SS_VECTOR, "SS excp" }, \ 117 + { SVM_EXIT_EXCP_BASE + GP_VECTOR, "GP excp" }, \ 118 + { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ 119 + { SVM_EXIT_EXCP_BASE + MF_VECTOR, "MF excp" }, \ 109 120 { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \ 110 121 { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ 122 + { SVM_EXIT_EXCP_BASE + XM_VECTOR, "XF excp" }, \ 111 123 { SVM_EXIT_INTR, "interrupt" }, \ 112 124 { SVM_EXIT_NMI, "nmi" }, \ 113 125 { SVM_EXIT_SMI, "smi" }, \ 114 126 { SVM_EXIT_INIT, "init" }, \ 115 127 { SVM_EXIT_VINTR, "vintr" }, \ 116 128 { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \ 129 + { SVM_EXIT_IDTR_READ, "read_idtr" }, \ 130 + { SVM_EXIT_GDTR_READ, "read_gdtr" }, \ 131 + { SVM_EXIT_LDTR_READ, "read_ldtr" }, \ 132 + { SVM_EXIT_TR_READ, "read_rt" }, \ 133 + { SVM_EXIT_IDTR_WRITE, "write_idtr" }, \ 134 + { SVM_EXIT_GDTR_WRITE, "write_gdtr" }, \ 135 + { SVM_EXIT_LDTR_WRITE, "write_ldtr" }, \ 136 + { SVM_EXIT_TR_WRITE, "write_rt" }, \ 137 + { SVM_EXIT_RDTSC, "rdtsc" }, \ 138 + { SVM_EXIT_RDPMC, "rdpmc" }, \ 139 + { SVM_EXIT_PUSHF, "pushf" }, \ 140 + { SVM_EXIT_POPF, "popf" }, \ 117 141 { SVM_EXIT_CPUID, "cpuid" }, \ 142 + { SVM_EXIT_RSM, "rsm" }, \ 143 + { SVM_EXIT_IRET, "iret" }, \ 144 + { SVM_EXIT_SWINT, "swint" }, \ 118 145 { SVM_EXIT_INVD, "invd" }, \ 119 146 { SVM_EXIT_PAUSE, "pause" }, \ 120 147 { SVM_EXIT_HLT, "hlt" }, \ ··· 154 119 { SVM_EXIT_IOIO, "io" }, \ 155 120 { SVM_EXIT_MSR, "msr" }, \ 156 121 { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ 122 + { SVM_EXIT_FERR_FREEZE, "ferr_freeze" }, \ 157 123 { SVM_EXIT_SHUTDOWN, "shutdown" }, \ 158 124 { SVM_EXIT_VMRUN, "vmrun" }, \ 159 125 { SVM_EXIT_VMMCALL, "hypercall" }, \ ··· 163 127 { SVM_EXIT_STGI, "stgi" }, \ 164 128 { SVM_EXIT_CLGI, "clgi" }, \ 165 129 { SVM_EXIT_SKINIT, "skinit" }, \ 130 + { SVM_EXIT_RDTSCP, "rdtscp" }, \ 131 + { SVM_EXIT_ICEBP, "icebp" }, \ 166 132 { SVM_EXIT_WBINVD, "wbinvd" }, \ 167 133 { SVM_EXIT_MONITOR, "monitor" }, \ 168 134 { SVM_EXIT_MWAIT, "mwait" }, \ 169 135 { SVM_EXIT_XSETBV, "xsetbv" }, \ 170 136 { SVM_EXIT_NPF, "npf" }, \ 171 - { SVM_EXIT_RSM, "rsm" }, \ 172 137 { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ 173 - { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" } 138 + { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ 139 + { SVM_EXIT_ERR, "invalid_guest_state" } 174 140 175 141 176 142 #endif /* _UAPI__SVM_H */

+2 -2

arch/x86/kvm/svm.c

··· 84 84 #define TSC_RATIO_MIN 0x0000000000000001ULL 85 85 #define TSC_RATIO_MAX 0x000000ffffffffffULL 86 86 87 - #define AVIC_HPA_MASK ~((0xFFFULL << 52) || 0xFFF) 87 + #define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF) 88 88 89 89 /* 90 90 * 0xff is broadcast, so the max index allowed for physical APIC ID ··· 3597 3597 u32 icrh = svm->vmcb->control.exit_info_1 >> 32; 3598 3598 u32 icrl = svm->vmcb->control.exit_info_1; 3599 3599 u32 id = svm->vmcb->control.exit_info_2 >> 32; 3600 - u32 index = svm->vmcb->control.exit_info_2 && 0xFF; 3600 + u32 index = svm->vmcb->control.exit_info_2 & 0xFF; 3601 3601 struct kvm_lapic *apic = svm->vcpu.arch.apic; 3602 3602 3603 3603 trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);

+29 -17

arch/x86/kvm/vmx.c

··· 2418 2418 2419 2419 if (is_guest_mode(vcpu)) 2420 2420 msr_bitmap = vmx_msr_bitmap_nested; 2421 - else if (vcpu->arch.apic_base & X2APIC_ENABLE) { 2421 + else if (cpu_has_secondary_exec_ctrls() && 2422 + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & 2423 + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { 2422 2424 if (is_long_mode(vcpu)) 2423 2425 msr_bitmap = vmx_msr_bitmap_longmode_x2apic; 2424 2426 else ··· 4789 4787 struct vcpu_vmx *vmx = to_vmx(vcpu); 4790 4788 4791 4789 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); 4790 + if (cpu_has_secondary_exec_ctrls()) { 4791 + if (kvm_vcpu_apicv_active(vcpu)) 4792 + vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, 4793 + SECONDARY_EXEC_APIC_REGISTER_VIRT | 4794 + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 4795 + else 4796 + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, 4797 + SECONDARY_EXEC_APIC_REGISTER_VIRT | 4798 + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 4799 + } 4800 + 4801 + if (cpu_has_vmx_msr_bitmap()) 4802 + vmx_set_msr_bitmap(vcpu); 4792 4803 } 4793 4804 4794 4805 static u32 vmx_exec_control(struct vcpu_vmx *vmx) ··· 6348 6333 6349 6334 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ 6350 6335 6351 - if (enable_apicv) { 6352 - for (msr = 0x800; msr <= 0x8ff; msr++) 6353 - vmx_disable_intercept_msr_read_x2apic(msr); 6336 + for (msr = 0x800; msr <= 0x8ff; msr++) 6337 + vmx_disable_intercept_msr_read_x2apic(msr); 6354 6338 6355 - /* According SDM, in x2apic mode, the whole id reg is used. 6356 - * But in KVM, it only use the highest eight bits. Need to 6357 - * intercept it */ 6358 - vmx_enable_intercept_msr_read_x2apic(0x802); 6359 - /* TMCCT */ 6360 - vmx_enable_intercept_msr_read_x2apic(0x839); 6361 - /* TPR */ 6362 - vmx_disable_intercept_msr_write_x2apic(0x808); 6363 - /* EOI */ 6364 - vmx_disable_intercept_msr_write_x2apic(0x80b); 6365 - /* SELF-IPI */ 6366 - vmx_disable_intercept_msr_write_x2apic(0x83f); 6367 - } 6339 + /* According SDM, in x2apic mode, the whole id reg is used. But in 6340 + * KVM, it only use the highest eight bits. Need to intercept it */ 6341 + vmx_enable_intercept_msr_read_x2apic(0x802); 6342 + /* TMCCT */ 6343 + vmx_enable_intercept_msr_read_x2apic(0x839); 6344 + /* TPR */ 6345 + vmx_disable_intercept_msr_write_x2apic(0x808); 6346 + /* EOI */ 6347 + vmx_disable_intercept_msr_write_x2apic(0x80b); 6348 + /* SELF-IPI */ 6349 + vmx_disable_intercept_msr_write_x2apic(0x83f); 6368 6350 6369 6351 if (enable_ept) { 6370 6352 kvm_mmu_set_mask_ptes(0ull,

+4 -7

include/kvm/arm_arch_timer.h

··· 24 24 #include <linux/workqueue.h> 25 25 26 26 struct arch_timer_kvm { 27 - /* Is the timer enabled */ 28 - bool enabled; 29 - 30 27 /* Virtual offset */ 31 28 cycle_t cntvoff; 32 29 }; ··· 50 53 /* Timer IRQ */ 51 54 struct kvm_irq_level irq; 52 55 53 - /* VGIC mapping */ 54 - struct irq_phys_map *map; 55 - 56 56 /* Active IRQ state caching */ 57 57 bool active_cleared_last; 58 + 59 + /* Is the timer enabled */ 60 + bool enabled; 58 61 }; 59 62 60 63 int kvm_timer_hyp_init(void); 61 - void kvm_timer_enable(struct kvm *kvm); 64 + int kvm_timer_enable(struct kvm_vcpu *vcpu); 62 65 void kvm_timer_init(struct kvm *kvm); 63 66 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 64 67 const struct kvm_irq_level *irq);

+11 -9

include/kvm/arm_vgic.h

··· 19 19 #ifndef __ASM_ARM_KVM_VGIC_H 20 20 #define __ASM_ARM_KVM_VGIC_H 21 21 22 + #ifdef CONFIG_KVM_NEW_VGIC 23 + #include <kvm/vgic/vgic.h> 24 + #else 25 + 22 26 #include <linux/kernel.h> 23 27 #include <linux/kvm.h> 24 28 #include <linux/irqreturn.h> ··· 162 158 struct irq_phys_map { 163 159 u32 virt_irq; 164 160 u32 phys_irq; 165 - u32 irq; 166 161 }; 167 162 168 163 struct irq_phys_map_entry { ··· 308 305 unsigned long *active_shared; 309 306 unsigned long *pend_act_shared; 310 307 311 - /* Number of list registers on this CPU */ 312 - int nr_lr; 313 - 314 308 /* CPU vif control registers for world switch */ 315 309 union { 316 310 struct vgic_v2_cpu_if vgic_v2; ··· 342 342 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 343 343 bool level); 344 344 int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, 345 - struct irq_phys_map *map, bool level); 345 + unsigned int virt_irq, bool level); 346 346 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); 347 347 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); 348 - struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, 349 - int virt_irq, int irq); 350 - int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); 351 - bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map); 348 + int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq); 349 + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); 350 + bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); 352 351 353 352 #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) 354 353 #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) 355 354 #define vgic_ready(k) ((k)->arch.vgic.ready) 355 + #define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ 356 + ((i) < (k)->arch.vgic.nr_irqs)) 356 357 357 358 int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, 358 359 const struct vgic_ops **ops, ··· 371 370 } 372 371 #endif 373 372 373 + #endif /* old VGIC include */ 374 374 #endif

+246

include/kvm/vgic/vgic.h

··· 1 + /* 2 + * Copyright (C) 2015, 2016 ARM Ltd. 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + #ifndef __ASM_ARM_KVM_VGIC_VGIC_H 17 + #define __ASM_ARM_KVM_VGIC_VGIC_H 18 + 19 + #include <linux/kernel.h> 20 + #include <linux/kvm.h> 21 + #include <linux/irqreturn.h> 22 + #include <linux/spinlock.h> 23 + #include <linux/types.h> 24 + #include <kvm/iodev.h> 25 + 26 + #define VGIC_V3_MAX_CPUS 255 27 + #define VGIC_V2_MAX_CPUS 8 28 + #define VGIC_NR_IRQS_LEGACY 256 29 + #define VGIC_NR_SGIS 16 30 + #define VGIC_NR_PPIS 16 31 + #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) 32 + #define VGIC_MAX_PRIVATE (VGIC_NR_PRIVATE_IRQS - 1) 33 + #define VGIC_MAX_SPI 1019 34 + #define VGIC_MAX_RESERVED 1023 35 + #define VGIC_MIN_LPI 8192 36 + 37 + enum vgic_type { 38 + VGIC_V2, /* Good ol' GICv2 */ 39 + VGIC_V3, /* New fancy GICv3 */ 40 + }; 41 + 42 + /* same for all guests, as depending only on the _host's_ GIC model */ 43 + struct vgic_global { 44 + /* type of the host GIC */ 45 + enum vgic_type type; 46 + 47 + /* Physical address of vgic virtual cpu interface */ 48 + phys_addr_t vcpu_base; 49 + 50 + /* virtual control interface mapping */ 51 + void __iomem *vctrl_base; 52 + 53 + /* Number of implemented list registers */ 54 + int nr_lr; 55 + 56 + /* Maintenance IRQ number */ 57 + unsigned int maint_irq; 58 + 59 + /* maximum number of VCPUs allowed (GICv2 limits us to 8) */ 60 + int max_gic_vcpus; 61 + 62 + /* Only needed for the legacy KVM_CREATE_IRQCHIP */ 63 + bool can_emulate_gicv2; 64 + }; 65 + 66 + extern struct vgic_global kvm_vgic_global_state; 67 + 68 + #define VGIC_V2_MAX_LRS (1 << 6) 69 + #define VGIC_V3_MAX_LRS 16 70 + #define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) 71 + 72 + enum vgic_irq_config { 73 + VGIC_CONFIG_EDGE = 0, 74 + VGIC_CONFIG_LEVEL 75 + }; 76 + 77 + struct vgic_irq { 78 + spinlock_t irq_lock; /* Protects the content of the struct */ 79 + struct list_head ap_list; 80 + 81 + struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU 82 + * SPIs and LPIs: The VCPU whose ap_list 83 + * this is queued on. 84 + */ 85 + 86 + struct kvm_vcpu *target_vcpu; /* The VCPU that this interrupt should 87 + * be sent to, as a result of the 88 + * targets reg (v2) or the 89 + * affinity reg (v3). 90 + */ 91 + 92 + u32 intid; /* Guest visible INTID */ 93 + bool pending; 94 + bool line_level; /* Level only */ 95 + bool soft_pending; /* Level only */ 96 + bool active; /* not used for LPIs */ 97 + bool enabled; 98 + bool hw; /* Tied to HW IRQ */ 99 + u32 hwintid; /* HW INTID number */ 100 + union { 101 + u8 targets; /* GICv2 target VCPUs mask */ 102 + u32 mpidr; /* GICv3 target VCPU */ 103 + }; 104 + u8 source; /* GICv2 SGIs only */ 105 + u8 priority; 106 + enum vgic_irq_config config; /* Level or edge */ 107 + }; 108 + 109 + struct vgic_register_region; 110 + 111 + struct vgic_io_device { 112 + gpa_t base_addr; 113 + struct kvm_vcpu *redist_vcpu; 114 + const struct vgic_register_region *regions; 115 + int nr_regions; 116 + struct kvm_io_device dev; 117 + }; 118 + 119 + struct vgic_dist { 120 + bool in_kernel; 121 + bool ready; 122 + bool initialized; 123 + 124 + /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */ 125 + u32 vgic_model; 126 + 127 + int nr_spis; 128 + 129 + /* TODO: Consider moving to global state */ 130 + /* Virtual control interface mapping */ 131 + void __iomem *vctrl_base; 132 + 133 + /* base addresses in guest physical address space: */ 134 + gpa_t vgic_dist_base; /* distributor */ 135 + union { 136 + /* either a GICv2 CPU interface */ 137 + gpa_t vgic_cpu_base; 138 + /* or a number of GICv3 redistributor regions */ 139 + gpa_t vgic_redist_base; 140 + }; 141 + 142 + /* distributor enabled */ 143 + bool enabled; 144 + 145 + struct vgic_irq *spis; 146 + 147 + struct vgic_io_device dist_iodev; 148 + struct vgic_io_device *redist_iodevs; 149 + }; 150 + 151 + struct vgic_v2_cpu_if { 152 + u32 vgic_hcr; 153 + u32 vgic_vmcr; 154 + u32 vgic_misr; /* Saved only */ 155 + u64 vgic_eisr; /* Saved only */ 156 + u64 vgic_elrsr; /* Saved only */ 157 + u32 vgic_apr; 158 + u32 vgic_lr[VGIC_V2_MAX_LRS]; 159 + }; 160 + 161 + struct vgic_v3_cpu_if { 162 + #ifdef CONFIG_KVM_ARM_VGIC_V3 163 + u32 vgic_hcr; 164 + u32 vgic_vmcr; 165 + u32 vgic_sre; /* Restored only, change ignored */ 166 + u32 vgic_misr; /* Saved only */ 167 + u32 vgic_eisr; /* Saved only */ 168 + u32 vgic_elrsr; /* Saved only */ 169 + u32 vgic_ap0r[4]; 170 + u32 vgic_ap1r[4]; 171 + u64 vgic_lr[VGIC_V3_MAX_LRS]; 172 + #endif 173 + }; 174 + 175 + struct vgic_cpu { 176 + /* CPU vif control registers for world switch */ 177 + union { 178 + struct vgic_v2_cpu_if vgic_v2; 179 + struct vgic_v3_cpu_if vgic_v3; 180 + }; 181 + 182 + unsigned int used_lrs; 183 + struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; 184 + 185 + spinlock_t ap_list_lock; /* Protects the ap_list */ 186 + 187 + /* 188 + * List of IRQs that this VCPU should consider because they are either 189 + * Active or Pending (hence the name; AP list), or because they recently 190 + * were one of the two and need to be migrated off this list to another 191 + * VCPU. 192 + */ 193 + struct list_head ap_list_head; 194 + 195 + u64 live_lrs; 196 + }; 197 + 198 + int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); 199 + void kvm_vgic_early_init(struct kvm *kvm); 200 + int kvm_vgic_create(struct kvm *kvm, u32 type); 201 + void kvm_vgic_destroy(struct kvm *kvm); 202 + void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); 203 + void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); 204 + int kvm_vgic_map_resources(struct kvm *kvm); 205 + int kvm_vgic_hyp_init(void); 206 + 207 + int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, 208 + bool level); 209 + int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, 210 + bool level); 211 + int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); 212 + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); 213 + bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); 214 + 215 + int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); 216 + 217 + #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) 218 + #define vgic_initialized(k) ((k)->arch.vgic.initialized) 219 + #define vgic_ready(k) ((k)->arch.vgic.ready) 220 + #define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ 221 + ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) 222 + 223 + bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); 224 + void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); 225 + void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); 226 + 227 + #ifdef CONFIG_KVM_ARM_VGIC_V3 228 + void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); 229 + #else 230 + static inline void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) 231 + { 232 + } 233 + #endif 234 + 235 + /** 236 + * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW 237 + * 238 + * The host's GIC naturally limits the maximum amount of VCPUs a guest 239 + * can use. 240 + */ 241 + static inline int kvm_vgic_get_max_vcpus(void) 242 + { 243 + return kvm_vgic_global_state.max_gic_vcpus; 244 + } 245 + 246 + #endif /* __ASM_ARM_KVM_VGIC_VGIC_H */

+6

include/linux/irqchip/arm-gic-v3.h

··· 273 273 #define ICH_LR_ACTIVE_BIT (1ULL << 63) 274 274 #define ICH_LR_PHYS_ID_SHIFT 32 275 275 #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) 276 + #define ICH_LR_PRIORITY_SHIFT 48 277 + 278 + /* These are for GICv2 emulation only */ 279 + #define GICH_LR_VIRTUALID (0x3ffUL << 0) 280 + #define GICH_LR_PHYSID_CPUID_SHIFT (10) 281 + #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) 276 282 277 283 #define ICH_MISR_EOI (1 << 0) 278 284 #define ICH_MISR_U (1 << 1)

+2

include/linux/irqchip/arm-gic.h

··· 33 33 34 34 #define GIC_DIST_CTRL 0x000 35 35 #define GIC_DIST_CTR 0x004 36 + #define GIC_DIST_IIDR 0x008 36 37 #define GIC_DIST_IGROUP 0x080 37 38 #define GIC_DIST_ENABLE_SET 0x100 38 39 #define GIC_DIST_ENABLE_CLEAR 0x180 ··· 77 76 #define GICH_LR_VIRTUALID (0x3ff << 0) 78 77 #define GICH_LR_PHYSID_CPUID_SHIFT (10) 79 78 #define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT) 79 + #define GICH_LR_PRIORITY_SHIFT 23 80 80 #define GICH_LR_STATE (3 << 28) 81 81 #define GICH_LR_PENDING_BIT (1 << 28) 82 82 #define GICH_LR_ACTIVE_BIT (1 << 29)

+7

include/linux/kvm_host.h

··· 412 412 #endif 413 413 long tlbs_dirty; 414 414 struct list_head devices; 415 + struct dentry *debugfs_dentry; 416 + struct kvm_stat_data **debugfs_stat_data; 415 417 }; 416 418 417 419 #define kvm_err(fmt, ...) \ ··· 991 989 enum kvm_stat_kind { 992 990 KVM_STAT_VM, 993 991 KVM_STAT_VCPU, 992 + }; 993 + 994 + struct kvm_stat_data { 995 + int offset; 996 + struct kvm *kvm; 994 997 }; 995 998 996 999 struct kvm_stats_debugfs_item {

+3 -3

include/trace/events/kvm.h

··· 108 108 __entry->coalesced = coalesced; 109 109 ), 110 110 111 - TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s", 111 + TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s", 112 112 __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, 113 113 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), 114 114 (__entry->e & (1<<11)) ? "logical" : "physical", ··· 129 129 __entry->e = e; 130 130 ), 131 131 132 - TP_printk("dst %x vec=%u (%s|%s|%s%s)", 132 + TP_printk("dst %x vec %u (%s|%s|%s%s)", 133 133 (u8)(__entry->e >> 56), (u8)__entry->e, 134 134 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), 135 135 (__entry->e & (1<<11)) ? "logical" : "physical", ··· 151 151 __entry->data = data; 152 152 ), 153 153 154 - TP_printk("dst %u vec %x (%s|%s|%s%s)", 154 + TP_printk("dst %u vec %u (%s|%s|%s%s)", 155 155 (u8)(__entry->address >> 12), (u8)__entry->data, 156 156 __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), 157 157 (__entry->address & (1<<2)) ? "logical" : "physical",

+5 -1

tools/Makefile

··· 16 16 @echo ' gpio - GPIO tools' 17 17 @echo ' hv - tools used when in Hyper-V clients' 18 18 @echo ' iio - IIO tools' 19 + @echo ' kvm_stat - top-like utility for displaying kvm statistics' 19 20 @echo ' lguest - a minimal 32-bit x86 hypervisor' 20 21 @echo ' net - misc networking tools' 21 22 @echo ' perf - Linux performance measurement and analysis tool' ··· 111 110 freefall_install: 112 111 $(call descend,laptop/$(@:_install=),install) 113 112 113 + kvm_stat_install: 114 + $(call descend,kvm/$(@:_install=),install) 115 + 114 116 install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ 115 117 perf_install selftests_install turbostat_install usb_install \ 116 118 virtio_install vm_install net_install x86_energy_perf_policy_install \ 117 - tmon_install freefall_install objtool_install 119 + tmon_install freefall_install objtool_install kvm_stat_install 118 120 119 121 acpi_clean: 120 122 $(call descend,power/acpi,clean)

+41

tools/kvm/kvm_stat/Makefile

··· 1 + include ../../scripts/Makefile.include 2 + include ../../scripts/utilities.mak 3 + BINDIR=usr/bin 4 + MANDIR=usr/share/man 5 + MAN1DIR=$(MANDIR)/man1 6 + 7 + MAN1=kvm_stat.1 8 + 9 + A2X=a2x 10 + a2x_path := $(call get-executable,$(A2X)) 11 + 12 + all: man 13 + 14 + ifneq ($(findstring $(MAKEFLAGS),s),s) 15 + ifneq ($(V),1) 16 + QUIET_A2X = @echo ' A2X '$@; 17 + endif 18 + endif 19 + 20 + %.1: %.txt 21 + ifeq ($(a2x_path),) 22 + $(error "You need to install asciidoc for man pages") 23 + else 24 + $(QUIET_A2X)$(A2X) --doctype manpage --format manpage $< 25 + endif 26 + 27 + clean: 28 + rm -f $(MAN1) 29 + 30 + man: $(MAN1) 31 + 32 + install-man: man 33 + install -d -m 755 $(INSTALL_ROOT)/$(MAN1DIR) 34 + install -m 644 kvm_stat.1 $(INSTALL_ROOT)/$(MAN1DIR) 35 + 36 + install-tools: 37 + install -d -m 755 $(INSTALL_ROOT)/$(BINDIR) 38 + install -m 755 -p "kvm_stat" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" 39 + 40 + install: install-tools install-man 41 + .PHONY: all clean man install-tools install-man install

+1127

tools/kvm/kvm_stat/kvm_stat

··· 1 + #!/usr/bin/python 2 + # 3 + # top-like utility for displaying kvm statistics 4 + # 5 + # Copyright 2006-2008 Qumranet Technologies 6 + # Copyright 2008-2011 Red Hat, Inc. 7 + # 8 + # Authors: 9 + # Avi Kivity <avi@redhat.com> 10 + # 11 + # This work is licensed under the terms of the GNU GPL, version 2. See 12 + # the COPYING file in the top-level directory. 13 + """The kvm_stat module outputs statistics about running KVM VMs 14 + 15 + Three different ways of output formatting are available: 16 + - as a top-like text ui 17 + - in a key -> value format 18 + - in an all keys, all values format 19 + 20 + The data is sampled from the KVM's debugfs entries and its perf events. 21 + """ 22 + 23 + import curses 24 + import sys 25 + import os 26 + import time 27 + import optparse 28 + import ctypes 29 + import fcntl 30 + import resource 31 + import struct 32 + import re 33 + from collections import defaultdict 34 + from time import sleep 35 + 36 + VMX_EXIT_REASONS = { 37 + 'EXCEPTION_NMI': 0, 38 + 'EXTERNAL_INTERRUPT': 1, 39 + 'TRIPLE_FAULT': 2, 40 + 'PENDING_INTERRUPT': 7, 41 + 'NMI_WINDOW': 8, 42 + 'TASK_SWITCH': 9, 43 + 'CPUID': 10, 44 + 'HLT': 12, 45 + 'INVLPG': 14, 46 + 'RDPMC': 15, 47 + 'RDTSC': 16, 48 + 'VMCALL': 18, 49 + 'VMCLEAR': 19, 50 + 'VMLAUNCH': 20, 51 + 'VMPTRLD': 21, 52 + 'VMPTRST': 22, 53 + 'VMREAD': 23, 54 + 'VMRESUME': 24, 55 + 'VMWRITE': 25, 56 + 'VMOFF': 26, 57 + 'VMON': 27, 58 + 'CR_ACCESS': 28, 59 + 'DR_ACCESS': 29, 60 + 'IO_INSTRUCTION': 30, 61 + 'MSR_READ': 31, 62 + 'MSR_WRITE': 32, 63 + 'INVALID_STATE': 33, 64 + 'MWAIT_INSTRUCTION': 36, 65 + 'MONITOR_INSTRUCTION': 39, 66 + 'PAUSE_INSTRUCTION': 40, 67 + 'MCE_DURING_VMENTRY': 41, 68 + 'TPR_BELOW_THRESHOLD': 43, 69 + 'APIC_ACCESS': 44, 70 + 'EPT_VIOLATION': 48, 71 + 'EPT_MISCONFIG': 49, 72 + 'WBINVD': 54, 73 + 'XSETBV': 55, 74 + 'APIC_WRITE': 56, 75 + 'INVPCID': 58, 76 + } 77 + 78 + SVM_EXIT_REASONS = { 79 + 'READ_CR0': 0x000, 80 + 'READ_CR3': 0x003, 81 + 'READ_CR4': 0x004, 82 + 'READ_CR8': 0x008, 83 + 'WRITE_CR0': 0x010, 84 + 'WRITE_CR3': 0x013, 85 + 'WRITE_CR4': 0x014, 86 + 'WRITE_CR8': 0x018, 87 + 'READ_DR0': 0x020, 88 + 'READ_DR1': 0x021, 89 + 'READ_DR2': 0x022, 90 + 'READ_DR3': 0x023, 91 + 'READ_DR4': 0x024, 92 + 'READ_DR5': 0x025, 93 + 'READ_DR6': 0x026, 94 + 'READ_DR7': 0x027, 95 + 'WRITE_DR0': 0x030, 96 + 'WRITE_DR1': 0x031, 97 + 'WRITE_DR2': 0x032, 98 + 'WRITE_DR3': 0x033, 99 + 'WRITE_DR4': 0x034, 100 + 'WRITE_DR5': 0x035, 101 + 'WRITE_DR6': 0x036, 102 + 'WRITE_DR7': 0x037, 103 + 'EXCP_BASE': 0x040, 104 + 'INTR': 0x060, 105 + 'NMI': 0x061, 106 + 'SMI': 0x062, 107 + 'INIT': 0x063, 108 + 'VINTR': 0x064, 109 + 'CR0_SEL_WRITE': 0x065, 110 + 'IDTR_READ': 0x066, 111 + 'GDTR_READ': 0x067, 112 + 'LDTR_READ': 0x068, 113 + 'TR_READ': 0x069, 114 + 'IDTR_WRITE': 0x06a, 115 + 'GDTR_WRITE': 0x06b, 116 + 'LDTR_WRITE': 0x06c, 117 + 'TR_WRITE': 0x06d, 118 + 'RDTSC': 0x06e, 119 + 'RDPMC': 0x06f, 120 + 'PUSHF': 0x070, 121 + 'POPF': 0x071, 122 + 'CPUID': 0x072, 123 + 'RSM': 0x073, 124 + 'IRET': 0x074, 125 + 'SWINT': 0x075, 126 + 'INVD': 0x076, 127 + 'PAUSE': 0x077, 128 + 'HLT': 0x078, 129 + 'INVLPG': 0x079, 130 + 'INVLPGA': 0x07a, 131 + 'IOIO': 0x07b, 132 + 'MSR': 0x07c, 133 + 'TASK_SWITCH': 0x07d, 134 + 'FERR_FREEZE': 0x07e, 135 + 'SHUTDOWN': 0x07f, 136 + 'VMRUN': 0x080, 137 + 'VMMCALL': 0x081, 138 + 'VMLOAD': 0x082, 139 + 'VMSAVE': 0x083, 140 + 'STGI': 0x084, 141 + 'CLGI': 0x085, 142 + 'SKINIT': 0x086, 143 + 'RDTSCP': 0x087, 144 + 'ICEBP': 0x088, 145 + 'WBINVD': 0x089, 146 + 'MONITOR': 0x08a, 147 + 'MWAIT': 0x08b, 148 + 'MWAIT_COND': 0x08c, 149 + 'XSETBV': 0x08d, 150 + 'NPF': 0x400, 151 + } 152 + 153 + # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h) 154 + AARCH64_EXIT_REASONS = { 155 + 'UNKNOWN': 0x00, 156 + 'WFI': 0x01, 157 + 'CP15_32': 0x03, 158 + 'CP15_64': 0x04, 159 + 'CP14_MR': 0x05, 160 + 'CP14_LS': 0x06, 161 + 'FP_ASIMD': 0x07, 162 + 'CP10_ID': 0x08, 163 + 'CP14_64': 0x0C, 164 + 'ILL_ISS': 0x0E, 165 + 'SVC32': 0x11, 166 + 'HVC32': 0x12, 167 + 'SMC32': 0x13, 168 + 'SVC64': 0x15, 169 + 'HVC64': 0x16, 170 + 'SMC64': 0x17, 171 + 'SYS64': 0x18, 172 + 'IABT': 0x20, 173 + 'IABT_HYP': 0x21, 174 + 'PC_ALIGN': 0x22, 175 + 'DABT': 0x24, 176 + 'DABT_HYP': 0x25, 177 + 'SP_ALIGN': 0x26, 178 + 'FP_EXC32': 0x28, 179 + 'FP_EXC64': 0x2C, 180 + 'SERROR': 0x2F, 181 + 'BREAKPT': 0x30, 182 + 'BREAKPT_HYP': 0x31, 183 + 'SOFTSTP': 0x32, 184 + 'SOFTSTP_HYP': 0x33, 185 + 'WATCHPT': 0x34, 186 + 'WATCHPT_HYP': 0x35, 187 + 'BKPT32': 0x38, 188 + 'VECTOR32': 0x3A, 189 + 'BRK64': 0x3C, 190 + } 191 + 192 + # From include/uapi/linux/kvm.h, KVM_EXIT_xxx 193 + USERSPACE_EXIT_REASONS = { 194 + 'UNKNOWN': 0, 195 + 'EXCEPTION': 1, 196 + 'IO': 2, 197 + 'HYPERCALL': 3, 198 + 'DEBUG': 4, 199 + 'HLT': 5, 200 + 'MMIO': 6, 201 + 'IRQ_WINDOW_OPEN': 7, 202 + 'SHUTDOWN': 8, 203 + 'FAIL_ENTRY': 9, 204 + 'INTR': 10, 205 + 'SET_TPR': 11, 206 + 'TPR_ACCESS': 12, 207 + 'S390_SIEIC': 13, 208 + 'S390_RESET': 14, 209 + 'DCR': 15, 210 + 'NMI': 16, 211 + 'INTERNAL_ERROR': 17, 212 + 'OSI': 18, 213 + 'PAPR_HCALL': 19, 214 + 'S390_UCONTROL': 20, 215 + 'WATCHDOG': 21, 216 + 'S390_TSCH': 22, 217 + 'EPR': 23, 218 + 'SYSTEM_EVENT': 24, 219 + } 220 + 221 + IOCTL_NUMBERS = { 222 + 'SET_FILTER': 0x40082406, 223 + 'ENABLE': 0x00002400, 224 + 'DISABLE': 0x00002401, 225 + 'RESET': 0x00002403, 226 + } 227 + 228 + class Arch(object): 229 + """Encapsulates global architecture specific data. 230 + 231 + Contains the performance event open syscall and ioctl numbers, as 232 + well as the VM exit reasons for the architecture it runs on. 233 + 234 + """ 235 + @staticmethod 236 + def get_arch(): 237 + machine = os.uname()[4] 238 + 239 + if machine.startswith('ppc'): 240 + return ArchPPC() 241 + elif machine.startswith('aarch64'): 242 + return ArchA64() 243 + elif machine.startswith('s390'): 244 + return ArchS390() 245 + else: 246 + # X86_64 247 + for line in open('/proc/cpuinfo'): 248 + if not line.startswith('flags'): 249 + continue 250 + 251 + flags = line.split() 252 + if 'vmx' in flags: 253 + return ArchX86(VMX_EXIT_REASONS) 254 + if 'svm' in flags: 255 + return ArchX86(SVM_EXIT_REASONS) 256 + return 257 + 258 + class ArchX86(Arch): 259 + def __init__(self, exit_reasons): 260 + self.sc_perf_evt_open = 298 261 + self.ioctl_numbers = IOCTL_NUMBERS 262 + self.exit_reasons = exit_reasons 263 + 264 + class ArchPPC(Arch): 265 + def __init__(self): 266 + self.sc_perf_evt_open = 319 267 + self.ioctl_numbers = IOCTL_NUMBERS 268 + self.ioctl_numbers['ENABLE'] = 0x20002400 269 + self.ioctl_numbers['DISABLE'] = 0x20002401 270 + self.ioctl_numbers['RESET'] = 0x20002403 271 + 272 + # PPC comes in 32 and 64 bit and some generated ioctl 273 + # numbers depend on the wordsize. 274 + char_ptr_size = ctypes.sizeof(ctypes.c_char_p) 275 + self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 276 + self.exit_reasons = {} 277 + 278 + class ArchA64(Arch): 279 + def __init__(self): 280 + self.sc_perf_evt_open = 241 281 + self.ioctl_numbers = IOCTL_NUMBERS 282 + self.exit_reasons = AARCH64_EXIT_REASONS 283 + 284 + class ArchS390(Arch): 285 + def __init__(self): 286 + self.sc_perf_evt_open = 331 287 + self.ioctl_numbers = IOCTL_NUMBERS 288 + self.exit_reasons = None 289 + 290 + ARCH = Arch.get_arch() 291 + 292 + 293 + def walkdir(path): 294 + """Returns os.walk() data for specified directory. 295 + 296 + As it is only a wrapper it returns the same 3-tuple of (dirpath, 297 + dirnames, filenames). 298 + """ 299 + return next(os.walk(path)) 300 + 301 + 302 + def parse_int_list(list_string): 303 + """Returns an int list from a string of comma separated integers and 304 + integer ranges.""" 305 + integers = [] 306 + members = list_string.split(',') 307 + 308 + for member in members: 309 + if '-' not in member: 310 + integers.append(int(member)) 311 + else: 312 + int_range = member.split('-') 313 + integers.extend(range(int(int_range[0]), 314 + int(int_range[1]) + 1)) 315 + 316 + return integers 317 + 318 + 319 + def get_online_cpus(): 320 + """Returns a list of cpu id integers.""" 321 + with open('/sys/devices/system/cpu/online') as cpu_list: 322 + cpu_string = cpu_list.readline() 323 + return parse_int_list(cpu_string) 324 + 325 + 326 + def get_filters(): 327 + """Returns a dict of trace events, their filter ids and 328 + the values that can be filtered. 329 + 330 + Trace events can be filtered for special values by setting a 331 + filter string via an ioctl. The string normally has the format 332 + identifier==value. For each filter a new event will be created, to 333 + be able to distinguish the events. 334 + 335 + """ 336 + filters = {} 337 + filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) 338 + if ARCH.exit_reasons: 339 + filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) 340 + return filters 341 + 342 + libc = ctypes.CDLL('libc.so.6', use_errno=True) 343 + syscall = libc.syscall 344 + 345 + class perf_event_attr(ctypes.Structure): 346 + """Struct that holds the necessary data to set up a trace event. 347 + 348 + For an extensive explanation see perf_event_open(2) and 349 + include/uapi/linux/perf_event.h, struct perf_event_attr 350 + 351 + All fields that are not initialized in the constructor are 0. 352 + 353 + """ 354 + _fields_ = [('type', ctypes.c_uint32), 355 + ('size', ctypes.c_uint32), 356 + ('config', ctypes.c_uint64), 357 + ('sample_freq', ctypes.c_uint64), 358 + ('sample_type', ctypes.c_uint64), 359 + ('read_format', ctypes.c_uint64), 360 + ('flags', ctypes.c_uint64), 361 + ('wakeup_events', ctypes.c_uint32), 362 + ('bp_type', ctypes.c_uint32), 363 + ('bp_addr', ctypes.c_uint64), 364 + ('bp_len', ctypes.c_uint64), 365 + ] 366 + 367 + def __init__(self): 368 + super(self.__class__, self).__init__() 369 + self.type = PERF_TYPE_TRACEPOINT 370 + self.size = ctypes.sizeof(self) 371 + self.read_format = PERF_FORMAT_GROUP 372 + 373 + def perf_event_open(attr, pid, cpu, group_fd, flags): 374 + """Wrapper for the sys_perf_evt_open() syscall. 375 + 376 + Used to set up performance events, returns a file descriptor or -1 377 + on error. 378 + 379 + Attributes are: 380 + - syscall number 381 + - struct perf_event_attr * 382 + - pid or -1 to monitor all pids 383 + - cpu number or -1 to monitor all cpus 384 + - The file descriptor of the group leader or -1 to create a group. 385 + - flags 386 + 387 + """ 388 + return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), 389 + ctypes.c_int(pid), ctypes.c_int(cpu), 390 + ctypes.c_int(group_fd), ctypes.c_long(flags)) 391 + 392 + PERF_TYPE_TRACEPOINT = 2 393 + PERF_FORMAT_GROUP = 1 << 3 394 + 395 + PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' 396 + PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' 397 + 398 + class Group(object): 399 + """Represents a perf event group.""" 400 + 401 + def __init__(self): 402 + self.events = [] 403 + 404 + def add_event(self, event): 405 + self.events.append(event) 406 + 407 + def read(self): 408 + """Returns a dict with 'event name: value' for all events in the 409 + group. 410 + 411 + Values are read by reading from the file descriptor of the 412 + event that is the group leader. See perf_event_open(2) for 413 + details. 414 + 415 + Read format for the used event configuration is: 416 + struct read_format { 417 + u64 nr; /* The number of events */ 418 + struct { 419 + u64 value; /* The value of the event */ 420 + } values[nr]; 421 + }; 422 + 423 + """ 424 + length = 8 * (1 + len(self.events)) 425 + read_format = 'xxxxxxxx' + 'Q' * len(self.events) 426 + return dict(zip([event.name for event in self.events], 427 + struct.unpack(read_format, 428 + os.read(self.events[0].fd, length)))) 429 + 430 + class Event(object): 431 + """Represents a performance event and manages its life cycle.""" 432 + def __init__(self, name, group, trace_cpu, trace_pid, trace_point, 433 + trace_filter, trace_set='kvm'): 434 + self.name = name 435 + self.fd = None 436 + self.setup_event(group, trace_cpu, trace_pid, trace_point, 437 + trace_filter, trace_set) 438 + 439 + def __del__(self): 440 + """Closes the event's file descriptor. 441 + 442 + As no python file object was created for the file descriptor, 443 + python will not reference count the descriptor and will not 444 + close it itself automatically, so we do it. 445 + 446 + """ 447 + if self.fd: 448 + os.close(self.fd) 449 + 450 + def setup_event_attribute(self, trace_set, trace_point): 451 + """Returns an initialized ctype perf_event_attr struct.""" 452 + 453 + id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, 454 + trace_point, 'id') 455 + 456 + event_attr = perf_event_attr() 457 + event_attr.config = int(open(id_path).read()) 458 + return event_attr 459 + 460 + def setup_event(self, group, trace_cpu, trace_pid, trace_point, 461 + trace_filter, trace_set): 462 + """Sets up the perf event in Linux. 463 + 464 + Issues the syscall to register the event in the kernel and 465 + then sets the optional filter. 466 + 467 + """ 468 + 469 + event_attr = self.setup_event_attribute(trace_set, trace_point) 470 + 471 + # First event will be group leader. 472 + group_leader = -1 473 + 474 + # All others have to pass the leader's descriptor instead. 475 + if group.events: 476 + group_leader = group.events[0].fd 477 + 478 + fd = perf_event_open(event_attr, trace_pid, 479 + trace_cpu, group_leader, 0) 480 + if fd == -1: 481 + err = ctypes.get_errno() 482 + raise OSError(err, os.strerror(err), 483 + 'while calling sys_perf_event_open().') 484 + 485 + if trace_filter: 486 + fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'], 487 + trace_filter) 488 + 489 + self.fd = fd 490 + 491 + def enable(self): 492 + """Enables the trace event in the kernel. 493 + 494 + Enabling the group leader makes reading counters from it and the 495 + events under it possible. 496 + 497 + """ 498 + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0) 499 + 500 + def disable(self): 501 + """Disables the trace event in the kernel. 502 + 503 + Disabling the group leader makes reading all counters under it 504 + impossible. 505 + 506 + """ 507 + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0) 508 + 509 + def reset(self): 510 + """Resets the count of the trace event in the kernel.""" 511 + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) 512 + 513 + class TracepointProvider(object): 514 + """Data provider for the stats class. 515 + 516 + Manages the events/groups from which it acquires its data. 517 + 518 + """ 519 + def __init__(self): 520 + self.group_leaders = [] 521 + self.filters = get_filters() 522 + self._fields = self.get_available_fields() 523 + self._pid = 0 524 + 525 + def get_available_fields(self): 526 + """Returns a list of available event's of format 'event name(filter 527 + name)'. 528 + 529 + All available events have directories under 530 + /sys/kernel/debug/tracing/events/ which export information 531 + about the specific event. Therefore, listing the dirs gives us 532 + a list of all available events. 533 + 534 + Some events like the vm exit reasons can be filtered for 535 + specific values. To take account for that, the routine below 536 + creates special fields with the following format: 537 + event name(filter name) 538 + 539 + """ 540 + path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') 541 + fields = walkdir(path)[1] 542 + extra = [] 543 + for field in fields: 544 + if field in self.filters: 545 + filter_name_, filter_dicts = self.filters[field] 546 + for name in filter_dicts: 547 + extra.append(field + '(' + name + ')') 548 + fields += extra 549 + return fields 550 + 551 + def setup_traces(self): 552 + """Creates all event and group objects needed to be able to retrieve 553 + data.""" 554 + if self._pid > 0: 555 + # Fetch list of all threads of the monitored pid, as qemu 556 + # starts a thread for each vcpu. 557 + path = os.path.join('/proc', str(self._pid), 'task') 558 + groupids = walkdir(path)[1] 559 + else: 560 + groupids = get_online_cpus() 561 + 562 + # The constant is needed as a buffer for python libs, std 563 + # streams and other files that the script opens. 564 + newlim = len(groupids) * len(self._fields) + 50 565 + try: 566 + softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) 567 + 568 + if hardlim < newlim: 569 + # Now we need CAP_SYS_RESOURCE, to increase the hard limit. 570 + resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim)) 571 + else: 572 + # Raising the soft limit is sufficient. 573 + resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim)) 574 + 575 + except ValueError: 576 + sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim)) 577 + 578 + for groupid in groupids: 579 + group = Group() 580 + for name in self._fields: 581 + tracepoint = name 582 + tracefilter = None 583 + match = re.match(r'(.*)$(.*)$', name) 584 + if match: 585 + tracepoint, sub = match.groups() 586 + tracefilter = ('%s==%d\0' % 587 + (self.filters[tracepoint][0], 588 + self.filters[tracepoint][1][sub])) 589 + 590 + # From perf_event_open(2): 591 + # pid > 0 and cpu == -1 592 + # This measures the specified process/thread on any CPU. 593 + # 594 + # pid == -1 and cpu >= 0 595 + # This measures all processes/threads on the specified CPU. 596 + trace_cpu = groupid if self._pid == 0 else -1 597 + trace_pid = int(groupid) if self._pid != 0 else -1 598 + 599 + group.add_event(Event(name=name, 600 + group=group, 601 + trace_cpu=trace_cpu, 602 + trace_pid=trace_pid, 603 + trace_point=tracepoint, 604 + trace_filter=tracefilter)) 605 + 606 + self.group_leaders.append(group) 607 + 608 + def available_fields(self): 609 + return self.get_available_fields() 610 + 611 + @property 612 + def fields(self): 613 + return self._fields 614 + 615 + @fields.setter 616 + def fields(self, fields): 617 + """Enables/disables the (un)wanted events""" 618 + self._fields = fields 619 + for group in self.group_leaders: 620 + for index, event in enumerate(group.events): 621 + if event.name in fields: 622 + event.reset() 623 + event.enable() 624 + else: 625 + # Do not disable the group leader. 626 + # It would disable all of its events. 627 + if index != 0: 628 + event.disable() 629 + 630 + @property 631 + def pid(self): 632 + return self._pid 633 + 634 + @pid.setter 635 + def pid(self, pid): 636 + """Changes the monitored pid by setting new traces.""" 637 + self._pid = pid 638 + # The garbage collector will get rid of all Event/Group 639 + # objects and open files after removing the references. 640 + self.group_leaders = [] 641 + self.setup_traces() 642 + self.fields = self._fields 643 + 644 + def read(self): 645 + """Returns 'event name: current value' for all enabled events.""" 646 + ret = defaultdict(int) 647 + for group in self.group_leaders: 648 + for name, val in group.read().iteritems(): 649 + if name in self._fields: 650 + ret[name] += val 651 + return ret 652 + 653 + class DebugfsProvider(object): 654 + """Provides data from the files that KVM creates in the kvm debugfs 655 + folder.""" 656 + def __init__(self): 657 + self._fields = self.get_available_fields() 658 + self._pid = 0 659 + self.do_read = True 660 + 661 + def get_available_fields(self): 662 + """"Returns a list of available fields. 663 + 664 + The fields are all available KVM debugfs files 665 + 666 + """ 667 + return walkdir(PATH_DEBUGFS_KVM)[2] 668 + 669 + @property 670 + def fields(self): 671 + return self._fields 672 + 673 + @fields.setter 674 + def fields(self, fields): 675 + self._fields = fields 676 + 677 + @property 678 + def pid(self): 679 + return self._pid 680 + 681 + @pid.setter 682 + def pid(self, pid): 683 + if pid != 0: 684 + self._pid = pid 685 + 686 + vms = walkdir(PATH_DEBUGFS_KVM)[1] 687 + if len(vms) == 0: 688 + self.do_read = False 689 + 690 + self.paths = filter(lambda x: "{}-".format(pid) in x, vms) 691 + 692 + else: 693 + self.paths = [''] 694 + self.do_read = True 695 + 696 + def read(self): 697 + """Returns a dict with format:'file name / field -> current value'.""" 698 + results = {} 699 + 700 + # If no debugfs filtering support is available, then don't read. 701 + if not self.do_read: 702 + return results 703 + 704 + for path in self.paths: 705 + for field in self._fields: 706 + results[field] = results.get(field, 0) \ 707 + + self.read_field(field, path) 708 + 709 + return results 710 + 711 + def read_field(self, field, path): 712 + """Returns the value of a single field from a specific VM.""" 713 + try: 714 + return int(open(os.path.join(PATH_DEBUGFS_KVM, 715 + path, 716 + field)) 717 + .read()) 718 + except IOError: 719 + return 0 720 + 721 + class Stats(object): 722 + """Manages the data providers and the data they provide. 723 + 724 + It is used to set filters on the provider's data and collect all 725 + provider data. 726 + 727 + """ 728 + def __init__(self, providers, pid, fields=None): 729 + self.providers = providers 730 + self._pid_filter = pid 731 + self._fields_filter = fields 732 + self.values = {} 733 + self.update_provider_pid() 734 + self.update_provider_filters() 735 + 736 + def update_provider_filters(self): 737 + """Propagates fields filters to providers.""" 738 + def wanted(key): 739 + if not self._fields_filter: 740 + return True 741 + return re.match(self._fields_filter, key) is not None 742 + 743 + # As we reset the counters when updating the fields we can 744 + # also clear the cache of old values. 745 + self.values = {} 746 + for provider in self.providers: 747 + provider_fields = [key for key in provider.get_available_fields() 748 + if wanted(key)] 749 + provider.fields = provider_fields 750 + 751 + def update_provider_pid(self): 752 + """Propagates pid filters to providers.""" 753 + for provider in self.providers: 754 + provider.pid = self._pid_filter 755 + 756 + @property 757 + def fields_filter(self): 758 + return self._fields_filter 759 + 760 + @fields_filter.setter 761 + def fields_filter(self, fields_filter): 762 + self._fields_filter = fields_filter 763 + self.update_provider_filters() 764 + 765 + @property 766 + def pid_filter(self): 767 + return self._pid_filter 768 + 769 + @pid_filter.setter 770 + def pid_filter(self, pid): 771 + self._pid_filter = pid 772 + self.values = {} 773 + self.update_provider_pid() 774 + 775 + def get(self): 776 + """Returns a dict with field -> (value, delta to last value) of all 777 + provider data.""" 778 + for provider in self.providers: 779 + new = provider.read() 780 + for key in provider.fields: 781 + oldval = self.values.get(key, (0, 0)) 782 + newval = new.get(key, 0) 783 + newdelta = None 784 + if oldval is not None: 785 + newdelta = newval - oldval[0] 786 + self.values[key] = (newval, newdelta) 787 + return self.values 788 + 789 + LABEL_WIDTH = 40 790 + NUMBER_WIDTH = 10 791 + 792 + class Tui(object): 793 + """Instruments curses to draw a nice text ui.""" 794 + def __init__(self, stats): 795 + self.stats = stats 796 + self.screen = None 797 + self.drilldown = False 798 + self.update_drilldown() 799 + 800 + def __enter__(self): 801 + """Initialises curses for later use. Based on curses.wrapper 802 + implementation from the Python standard library.""" 803 + self.screen = curses.initscr() 804 + curses.noecho() 805 + curses.cbreak() 806 + 807 + # The try/catch works around a minor bit of 808 + # over-conscientiousness in the curses module, the error 809 + # return from C start_color() is ignorable. 810 + try: 811 + curses.start_color() 812 + except: 813 + pass 814 + 815 + curses.use_default_colors() 816 + return self 817 + 818 + def __exit__(self, *exception): 819 + """Resets the terminal to its normal state. Based on curses.wrappre 820 + implementation from the Python standard library.""" 821 + if self.screen: 822 + self.screen.keypad(0) 823 + curses.echo() 824 + curses.nocbreak() 825 + curses.endwin() 826 + 827 + def update_drilldown(self): 828 + """Sets or removes a filter that only allows fields without braces.""" 829 + if not self.stats.fields_filter: 830 + self.stats.fields_filter = r'^[^\(]*$' 831 + 832 + elif self.stats.fields_filter == r'^[^\(]*$': 833 + self.stats.fields_filter = None 834 + 835 + def update_pid(self, pid): 836 + """Propagates pid selection to stats object.""" 837 + self.stats.pid_filter = pid 838 + 839 + def refresh(self, sleeptime): 840 + """Refreshes on-screen data.""" 841 + self.screen.erase() 842 + if self.stats.pid_filter > 0: 843 + self.screen.addstr(0, 0, 'kvm statistics - pid {0}' 844 + .format(self.stats.pid_filter), 845 + curses.A_BOLD) 846 + else: 847 + self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) 848 + self.screen.addstr(2, 1, 'Event') 849 + self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - 850 + len('Total'), 'Total') 851 + self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - 852 + len('Current'), 'Current') 853 + row = 3 854 + stats = self.stats.get() 855 + def sortkey(x): 856 + if stats[x][1]: 857 + return (-stats[x][1], -stats[x][0]) 858 + else: 859 + return (0, -stats[x][0]) 860 + for key in sorted(stats.keys(), key=sortkey): 861 + 862 + if row >= self.screen.getmaxyx()[0]: 863 + break 864 + values = stats[key] 865 + if not values[0] and not values[1]: 866 + break 867 + col = 1 868 + self.screen.addstr(row, col, key) 869 + col += LABEL_WIDTH 870 + self.screen.addstr(row, col, '%10d' % (values[0],)) 871 + col += NUMBER_WIDTH 872 + if values[1] is not None: 873 + self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) 874 + row += 1 875 + self.screen.refresh() 876 + 877 + def show_filter_selection(self): 878 + """Draws filter selection mask. 879 + 880 + Asks for a valid regex and sets the fields filter accordingly. 881 + 882 + """ 883 + while True: 884 + self.screen.erase() 885 + self.screen.addstr(0, 0, 886 + "Show statistics for events matching a regex.", 887 + curses.A_BOLD) 888 + self.screen.addstr(2, 0, 889 + "Current regex: {0}" 890 + .format(self.stats.fields_filter)) 891 + self.screen.addstr(3, 0, "New regex: ") 892 + curses.echo() 893 + regex = self.screen.getstr() 894 + curses.noecho() 895 + if len(regex) == 0: 896 + return 897 + try: 898 + re.compile(regex) 899 + self.stats.fields_filter = regex 900 + return 901 + except re.error: 902 + continue 903 + 904 + def show_vm_selection(self): 905 + """Draws PID selection mask. 906 + 907 + Asks for a pid until a valid pid or 0 has been entered. 908 + 909 + """ 910 + while True: 911 + self.screen.erase() 912 + self.screen.addstr(0, 0, 913 + 'Show statistics for specific pid.', 914 + curses.A_BOLD) 915 + self.screen.addstr(1, 0, 916 + 'This might limit the shown data to the trace ' 917 + 'statistics.') 918 + 919 + curses.echo() 920 + self.screen.addstr(3, 0, "Pid [0 or pid]: ") 921 + pid = self.screen.getstr() 922 + curses.noecho() 923 + 924 + try: 925 + pid = int(pid) 926 + 927 + if pid == 0: 928 + self.update_pid(pid) 929 + break 930 + else: 931 + if not os.path.isdir(os.path.join('/proc/', str(pid))): 932 + continue 933 + else: 934 + self.update_pid(pid) 935 + break 936 + 937 + except ValueError: 938 + continue 939 + 940 + def show_stats(self): 941 + """Refreshes the screen and processes user input.""" 942 + sleeptime = 0.25 943 + while True: 944 + self.refresh(sleeptime) 945 + curses.halfdelay(int(sleeptime * 10)) 946 + sleeptime = 3 947 + try: 948 + char = self.screen.getkey() 949 + if char == 'x': 950 + self.drilldown = not self.drilldown 951 + self.update_drilldown() 952 + if char == 'q': 953 + break 954 + if char == 'f': 955 + self.show_filter_selection() 956 + if char == 'p': 957 + self.show_vm_selection() 958 + except KeyboardInterrupt: 959 + break 960 + except curses.error: 961 + continue 962 + 963 + def batch(stats): 964 + """Prints statistics in a key, value format.""" 965 + s = stats.get() 966 + time.sleep(1) 967 + s = stats.get() 968 + for key in sorted(s.keys()): 969 + values = s[key] 970 + print '%-42s%10d%10d' % (key, values[0], values[1]) 971 + 972 + def log(stats): 973 + """Prints statistics as reiterating key block, multiple value blocks.""" 974 + keys = sorted(stats.get().iterkeys()) 975 + def banner(): 976 + for k in keys: 977 + print '%s' % k, 978 + print 979 + def statline(): 980 + s = stats.get() 981 + for k in keys: 982 + print ' %9d' % s[k][1], 983 + print 984 + line = 0 985 + banner_repeat = 20 986 + while True: 987 + time.sleep(1) 988 + if line % banner_repeat == 0: 989 + banner() 990 + statline() 991 + line += 1 992 + 993 + def get_options(): 994 + """Returns processed program arguments.""" 995 + description_text = """ 996 + This script displays various statistics about VMs running under KVM. 997 + The statistics are gathered from the KVM debugfs entries and / or the 998 + currently available perf traces. 999 + 1000 + The monitoring takes additional cpu cycles and might affect the VM's 1001 + performance. 1002 + 1003 + Requirements: 1004 + - Access to: 1005 + /sys/kernel/debug/kvm 1006 + /sys/kernel/debug/trace/events/* 1007 + /proc/pid/task 1008 + - /proc/sys/kernel/perf_event_paranoid < 1 if user has no 1009 + CAP_SYS_ADMIN and perf events are used. 1010 + - CAP_SYS_RESOURCE if the hard limit is not high enough to allow 1011 + the large number of files that are possibly opened. 1012 + """ 1013 + 1014 + class PlainHelpFormatter(optparse.IndentedHelpFormatter): 1015 + def format_description(self, description): 1016 + if description: 1017 + return description + "\n" 1018 + else: 1019 + return "" 1020 + 1021 + optparser = optparse.OptionParser(description=description_text, 1022 + formatter=PlainHelpFormatter()) 1023 + optparser.add_option('-1', '--once', '--batch', 1024 + action='store_true', 1025 + default=False, 1026 + dest='once', 1027 + help='run in batch mode for one second', 1028 + ) 1029 + optparser.add_option('-l', '--log', 1030 + action='store_true', 1031 + default=False, 1032 + dest='log', 1033 + help='run in logging mode (like vmstat)', 1034 + ) 1035 + optparser.add_option('-t', '--tracepoints', 1036 + action='store_true', 1037 + default=False, 1038 + dest='tracepoints', 1039 + help='retrieve statistics from tracepoints', 1040 + ) 1041 + optparser.add_option('-d', '--debugfs', 1042 + action='store_true', 1043 + default=False, 1044 + dest='debugfs', 1045 + help='retrieve statistics from debugfs', 1046 + ) 1047 + optparser.add_option('-f', '--fields', 1048 + action='store', 1049 + default=None, 1050 + dest='fields', 1051 + help='fields to display (regex)', 1052 + ) 1053 + optparser.add_option('-p', '--pid', 1054 + action='store', 1055 + default=0, 1056 + type=int, 1057 + dest='pid', 1058 + help='restrict statistics to pid', 1059 + ) 1060 + (options, _) = optparser.parse_args(sys.argv) 1061 + return options 1062 + 1063 + def get_providers(options): 1064 + """Returns a list of data providers depending on the passed options.""" 1065 + providers = [] 1066 + 1067 + if options.tracepoints: 1068 + providers.append(TracepointProvider()) 1069 + if options.debugfs: 1070 + providers.append(DebugfsProvider()) 1071 + if len(providers) == 0: 1072 + providers.append(TracepointProvider()) 1073 + 1074 + return providers 1075 + 1076 + def check_access(options): 1077 + """Exits if the current user can't access all needed directories.""" 1078 + if not os.path.exists('/sys/kernel/debug'): 1079 + sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') 1080 + sys.exit(1) 1081 + 1082 + if not os.path.exists(PATH_DEBUGFS_KVM): 1083 + sys.stderr.write("Please make sure, that debugfs is mounted and " 1084 + "readable by the current user:\n" 1085 + "('mount -t debugfs debugfs /sys/kernel/debug')\n" 1086 + "Also ensure, that the kvm modules are loaded.\n") 1087 + sys.exit(1) 1088 + 1089 + if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints 1090 + or not options.debugfs): 1091 + sys.stderr.write("Please enable CONFIG_TRACING in your kernel " 1092 + "when using the option -t (default).\n" 1093 + "If it is enabled, make {0} readable by the " 1094 + "current user.\n" 1095 + .format(PATH_DEBUGFS_TRACING)) 1096 + if options.tracepoints: 1097 + sys.exit(1) 1098 + 1099 + sys.stderr.write("Falling back to debugfs statistics!\n") 1100 + options.debugfs = True 1101 + sleep(5) 1102 + 1103 + return options 1104 + 1105 + def main(): 1106 + options = get_options() 1107 + options = check_access(options) 1108 + 1109 + if (options.pid > 0 and 1110 + not os.path.isdir(os.path.join('/proc/', 1111 + str(options.pid)))): 1112 + sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n') 1113 + sys.exit('Specified pid does not exist.') 1114 + 1115 + providers = get_providers(options) 1116 + stats = Stats(providers, options.pid, fields=options.fields) 1117 + 1118 + if options.log: 1119 + log(stats) 1120 + elif not options.once: 1121 + with Tui(stats) as tui: 1122 + tui.show_stats() 1123 + else: 1124 + batch(stats) 1125 + 1126 + if __name__ == "__main__": 1127 + main()

+63

tools/kvm/kvm_stat/kvm_stat.txt

··· 1 + kvm_stat(1) 2 + =========== 3 + 4 + NAME 5 + ---- 6 + kvm_stat - Report KVM kernel module event counters 7 + 8 + SYNOPSIS 9 + -------- 10 + [verse] 11 + 'kvm_stat' [OPTION]... 12 + 13 + DESCRIPTION 14 + ----------- 15 + kvm_stat prints counts of KVM kernel module trace events. These events signify 16 + state transitions such as guest mode entry and exit. 17 + 18 + This tool is useful for observing guest behavior from the host perspective. 19 + Often conclusions about performance or buggy behavior can be drawn from the 20 + output. 21 + 22 + The set of KVM kernel module trace events may be specific to the kernel version 23 + or architecture. It is best to check the KVM kernel module source code for the 24 + meaning of events. 25 + 26 + OPTIONS 27 + ------- 28 + -1:: 29 + --once:: 30 + --batch:: 31 + run in batch mode for one second 32 + 33 + -l:: 34 + --log:: 35 + run in logging mode (like vmstat) 36 + 37 + -t:: 38 + --tracepoints:: 39 + retrieve statistics from tracepoints 40 + 41 + -d:: 42 + --debugfs:: 43 + retrieve statistics from debugfs 44 + 45 + -p<pid>:: 46 + --pid=<pid>:: 47 + limit statistics to one virtual machine (pid) 48 + 49 + -f<fields>:: 50 + --fields=<fields>:: 51 + fields to display (regex) 52 + 53 + -h:: 54 + --help:: 55 + show help message 56 + 57 + SEE ALSO 58 + -------- 59 + 'perf'(1), 'trace-cmd'(1) 60 + 61 + AUTHOR 62 + ------ 63 + Stefan Hajnoczi <stefanha@redhat.com>

+44 -24

virt/kvm/arm/arch_timer.c

··· 20 20 #include <linux/kvm.h> 21 21 #include <linux/kvm_host.h> 22 22 #include <linux/interrupt.h> 23 + #include <linux/irq.h> 23 24 24 25 #include <clocksource/arm_arch_timer.h> 25 26 #include <asm/arch_timer.h> ··· 175 174 176 175 timer->active_cleared_last = false; 177 176 timer->irq.level = new_level; 178 - trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, 177 + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq, 179 178 timer->irq.level); 180 179 ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, 181 - timer->map, 180 + timer->irq.irq, 182 181 timer->irq.level); 183 182 WARN_ON(ret); 184 183 } ··· 197 196 * because the guest would never see the interrupt. Instead wait 198 197 * until we call this function from kvm_timer_flush_hwstate. 199 198 */ 200 - if (!vgic_initialized(vcpu->kvm)) 199 + if (!vgic_initialized(vcpu->kvm) || !timer->enabled) 201 200 return -ENODEV; 202 201 203 202 if (kvm_timer_should_fire(vcpu) != timer->irq.level) ··· 275 274 * to ensure that hardware interrupts from the timer triggers a guest 276 275 * exit. 277 276 */ 278 - if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map)) 279 - phys_active = true; 280 - else 281 - phys_active = false; 277 + phys_active = timer->irq.level || 278 + kvm_vgic_map_is_active(vcpu, timer->irq.irq); 282 279 283 280 /* 284 281 * We want to avoid hitting the (re)distributor as much as ··· 301 302 if (timer->active_cleared_last && !phys_active) 302 303 return; 303 304 304 - ret = irq_set_irqchip_state(timer->map->irq, 305 + ret = irq_set_irqchip_state(host_vtimer_irq, 305 306 IRQCHIP_STATE_ACTIVE, 306 307 phys_active); 307 308 WARN_ON(ret); ··· 333 334 const struct kvm_irq_level *irq) 334 335 { 335 336 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 336 - struct irq_phys_map *map; 337 337 338 338 /* 339 339 * The vcpu timer irq number cannot be determined in ··· 351 353 timer->cntv_ctl = 0; 352 354 kvm_timer_update_state(vcpu); 353 355 354 - /* 355 - * Tell the VGIC that the virtual interrupt is tied to a 356 - * physical interrupt. We do that once per VCPU. 357 - */ 358 - map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq); 359 - if (WARN_ON(IS_ERR(map))) 360 - return PTR_ERR(map); 361 - 362 - timer->map = map; 363 356 return 0; 364 357 } 365 358 ··· 476 487 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 477 488 478 489 timer_disarm(timer); 479 - if (timer->map) 480 - kvm_vgic_unmap_phys_irq(vcpu, timer->map); 490 + kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq); 481 491 } 482 492 483 - void kvm_timer_enable(struct kvm *kvm) 493 + int kvm_timer_enable(struct kvm_vcpu *vcpu) 484 494 { 485 - if (kvm->arch.timer.enabled) 486 - return; 495 + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 496 + struct irq_desc *desc; 497 + struct irq_data *data; 498 + int phys_irq; 499 + int ret; 500 + 501 + if (timer->enabled) 502 + return 0; 503 + 504 + /* 505 + * Find the physical IRQ number corresponding to the host_vtimer_irq 506 + */ 507 + desc = irq_to_desc(host_vtimer_irq); 508 + if (!desc) { 509 + kvm_err("%s: no interrupt descriptor\n", __func__); 510 + return -EINVAL; 511 + } 512 + 513 + data = irq_desc_get_irq_data(desc); 514 + while (data->parent_data) 515 + data = data->parent_data; 516 + 517 + phys_irq = data->hwirq; 518 + 519 + /* 520 + * Tell the VGIC that the virtual interrupt is tied to a 521 + * physical interrupt. We do that once per VCPU. 522 + */ 523 + ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq); 524 + if (ret) 525 + return ret; 526 + 487 527 488 528 /* 489 529 * There is a potential race here between VCPUs starting for the first ··· 523 505 * the arch timers are enabled. 524 506 */ 525 507 if (timecounter && wqueue) 526 - kvm->arch.timer.enabled = 1; 508 + timer->enabled = 1; 509 + 510 + return 0; 527 511 } 528 512 529 513 void kvm_timer_init(struct kvm *kvm)

+2 -3

virt/kvm/arm/hyp/timer-sr.c

··· 24 24 /* vcpu is already in the HYP VA space */ 25 25 void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) 26 26 { 27 - struct kvm *kvm = kern_hyp_va(vcpu->kvm); 28 27 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 29 28 u64 val; 30 29 31 - if (kvm->arch.timer.enabled) { 30 + if (timer->enabled) { 32 31 timer->cntv_ctl = read_sysreg_el0(cntv_ctl); 33 32 timer->cntv_cval = read_sysreg_el0(cntv_cval); 34 33 } ··· 59 60 val |= CNTHCTL_EL1PCTEN; 60 61 write_sysreg(val, cnthctl_el2); 61 62 62 - if (kvm->arch.timer.enabled) { 63 + if (timer->enabled) { 63 64 write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); 64 65 write_sysreg_el0(timer->cntv_cval, cntv_cval); 65 66 isb();

+12 -5

virt/kvm/arm/hyp/vgic-v2-sr.c

··· 21 21 22 22 #include <asm/kvm_hyp.h> 23 23 24 + #ifdef CONFIG_KVM_NEW_VGIC 25 + extern struct vgic_global kvm_vgic_global_state; 26 + #define vgic_v2_params kvm_vgic_global_state 27 + #else 28 + extern struct vgic_params vgic_v2_params; 29 + #endif 30 + 24 31 static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, 25 32 void __iomem *base) 26 33 { 27 34 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 28 - int nr_lr = vcpu->arch.vgic_cpu.nr_lr; 35 + int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; 29 36 u32 eisr0, eisr1; 30 37 int i; 31 38 bool expect_mi; ··· 74 67 static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) 75 68 { 76 69 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 77 - int nr_lr = vcpu->arch.vgic_cpu.nr_lr; 70 + int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; 78 71 u32 elrsr0, elrsr1; 79 72 80 73 elrsr0 = readl_relaxed(base + GICH_ELRSR0); ··· 93 86 static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) 94 87 { 95 88 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 96 - int nr_lr = vcpu->arch.vgic_cpu.nr_lr; 89 + int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; 97 90 int i; 98 91 99 92 for (i = 0; i < nr_lr; i++) { ··· 148 141 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 149 142 struct vgic_dist *vgic = &kvm->arch.vgic; 150 143 void __iomem *base = kern_hyp_va(vgic->vctrl_base); 151 - int i, nr_lr; 144 + int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; 145 + int i; 152 146 u64 live_lrs = 0; 153 147 154 148 if (!base) 155 149 return; 156 150 157 - nr_lr = vcpu->arch.vgic_cpu.nr_lr; 158 151 159 152 for (i = 0; i < nr_lr; i++) 160 153 if (cpu_if->vgic_lr[i] & GICH_LR_STATE)

+14 -11

virt/kvm/arm/pmu.c

··· 436 436 return 0; 437 437 } 438 438 439 - static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi) 439 + #define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) 440 + 441 + /* 442 + * For one VM the interrupt type must be same for each vcpu. 443 + * As a PPI, the interrupt number is the same for all vcpus, 444 + * while as an SPI it must be a separate number per vcpu. 445 + */ 446 + static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 440 447 { 441 448 int i; 442 449 struct kvm_vcpu *vcpu; ··· 452 445 if (!kvm_arm_pmu_irq_initialized(vcpu)) 453 446 continue; 454 447 455 - if (is_ppi) { 448 + if (irq_is_ppi(irq)) { 456 449 if (vcpu->arch.pmu.irq_num != irq) 457 450 return false; 458 451 } else { ··· 463 456 464 457 return true; 465 458 } 466 - 467 459 468 460 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 469 461 { ··· 477 471 if (get_user(irq, uaddr)) 478 472 return -EFAULT; 479 473 480 - /* 481 - * The PMU overflow interrupt could be a PPI or SPI, but for one 482 - * VM the interrupt type must be same for each vcpu. As a PPI, 483 - * the interrupt number is the same for all vcpus, while as an 484 - * SPI it must be a separate number per vcpu. 485 - */ 486 - if (irq < VGIC_NR_SGIS || irq >= vcpu->kvm->arch.vgic.nr_irqs || 487 - !irq_is_valid(vcpu->kvm, irq, irq < VGIC_NR_PRIVATE_IRQS)) 474 + /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 475 + if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq))) 476 + return -EINVAL; 477 + 478 + if (!pmu_irq_is_valid(vcpu->kvm, irq)) 488 479 return -EINVAL; 489 480 490 481 if (kvm_arm_pmu_irq_initialized(vcpu))

+3 -1

virt/kvm/arm/vgic-v2.c

··· 171 171 .enable = vgic_v2_enable, 172 172 }; 173 173 174 - static struct vgic_params vgic_v2_params; 174 + struct vgic_params __section(.hyp.text) vgic_v2_params; 175 175 176 176 static void vgic_cpu_init_lrs(void *params) 177 177 { ··· 200 200 struct vgic_params *vgic = &vgic_v2_params; 201 201 const struct resource *vctrl_res = &gic_kvm_info->vctrl; 202 202 const struct resource *vcpu_res = &gic_kvm_info->vcpu; 203 + 204 + memset(vgic, 0, sizeof(*vgic)); 203 205 204 206 if (!gic_kvm_info->maint_irq) { 205 207 kvm_err("error getting vgic maintenance irq\n");

+1 -7

virt/kvm/arm/vgic-v3.c

··· 29 29 #include <asm/kvm_asm.h> 30 30 #include <asm/kvm_mmu.h> 31 31 32 - /* These are for GICv2 emulation only */ 33 - #define GICH_LR_VIRTUALID (0x3ffUL << 0) 34 - #define GICH_LR_PHYSID_CPUID_SHIFT (10) 35 - #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) 36 - #define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1) 37 - 38 32 static u32 ich_vtr_el2; 39 33 40 34 static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) ··· 37 43 u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr]; 38 44 39 45 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) 40 - lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; 46 + lr_desc.irq = val & ICH_LR_VIRTUAL_ID_MASK; 41 47 else 42 48 lr_desc.irq = val & GICH_LR_VIRTUALID; 43 49

+25 -61

virt/kvm/arm/vgic.c

··· 690 690 */ 691 691 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) 692 692 { 693 - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 694 693 u64 elrsr = vgic_get_elrsr(vcpu); 695 694 unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); 696 695 int i; 697 696 698 - for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) { 697 + for_each_clear_bit(i, elrsr_ptr, vgic->nr_lr) { 699 698 struct vgic_lr lr = vgic_get_lr(vcpu, i); 700 699 701 700 /* ··· 819 820 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 820 821 struct vgic_io_device *iodev = container_of(this, 821 822 struct vgic_io_device, dev); 822 - struct kvm_run *run = vcpu->run; 823 823 const struct vgic_io_range *range; 824 824 struct kvm_exit_mmio mmio; 825 825 bool updated_state; ··· 847 849 updated_state = false; 848 850 } 849 851 spin_unlock(&dist->lock); 850 - run->mmio.is_write = is_write; 851 - run->mmio.len = len; 852 - run->mmio.phys_addr = addr; 853 - memcpy(run->mmio.data, val, len); 854 - 855 - kvm_handle_mmio_return(vcpu, run); 856 852 857 853 if (updated_state) 858 854 vgic_kick_vcpus(vcpu->kvm); ··· 1094 1102 return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); 1095 1103 } 1096 1104 1097 - bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map) 1105 + bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) 1098 1106 { 1099 1107 int i; 1100 1108 1101 - for (i = 0; i < vcpu->arch.vgic_cpu.nr_lr; i++) { 1109 + for (i = 0; i < vgic->nr_lr; i++) { 1102 1110 struct vgic_lr vlr = vgic_get_lr(vcpu, i); 1103 1111 1104 - if (vlr.irq == map->virt_irq && vlr.state & LR_STATE_ACTIVE) 1112 + if (vlr.irq == virt_irq && vlr.state & LR_STATE_ACTIVE) 1105 1113 return true; 1106 1114 } 1107 1115 1108 - return vgic_irq_is_active(vcpu, map->virt_irq); 1116 + return vgic_irq_is_active(vcpu, virt_irq); 1109 1117 } 1110 1118 1111 1119 /* ··· 1513 1521 } 1514 1522 1515 1523 static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, 1516 - struct irq_phys_map *map, 1517 1524 unsigned int irq_num, bool level) 1518 1525 { 1519 1526 struct vgic_dist *dist = &kvm->arch.vgic; ··· 1651 1660 if (map) 1652 1661 return -EINVAL; 1653 1662 1654 - return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level); 1663 + return vgic_update_irq_pending(kvm, cpuid, irq_num, level); 1655 1664 } 1656 1665 1657 1666 /** 1658 1667 * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic 1659 1668 * @kvm: The VM structure pointer 1660 1669 * @cpuid: The CPU for PPIs 1661 - * @map: Pointer to a irq_phys_map structure describing the mapping 1670 + * @virt_irq: The virtual IRQ to be injected 1662 1671 * @level: Edge-triggered: true: to trigger the interrupt 1663 1672 * false: to ignore the call 1664 1673 * Level-sensitive true: raise the input signal ··· 1669 1678 * being HIGH and 0 being LOW and all devices being active-HIGH. 1670 1679 */ 1671 1680 int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, 1672 - struct irq_phys_map *map, bool level) 1681 + unsigned int virt_irq, bool level) 1673 1682 { 1674 1683 int ret; 1675 1684 ··· 1677 1686 if (ret) 1678 1687 return ret; 1679 1688 1680 - return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level); 1689 + return vgic_update_irq_pending(kvm, cpuid, virt_irq, level); 1681 1690 } 1682 1691 1683 1692 static irqreturn_t vgic_maintenance_handler(int irq, void *data) ··· 1703 1712 /** 1704 1713 * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ 1705 1714 * @vcpu: The VCPU pointer 1706 - * @virt_irq: The virtual irq number 1707 - * @irq: The Linux IRQ number 1715 + * @virt_irq: The virtual IRQ number for the guest 1716 + * @phys_irq: The hardware IRQ number of the host 1708 1717 * 1709 1718 * Establish a mapping between a guest visible irq (@virt_irq) and a 1710 - * Linux irq (@irq). On injection, @virt_irq will be associated with 1711 - * the physical interrupt represented by @irq. This mapping can be 1719 + * hardware irq (@phys_irq). On injection, @virt_irq will be associated with 1720 + * the physical interrupt represented by @phys_irq. This mapping can be 1712 1721 * established multiple times as long as the parameters are the same. 1713 1722 * 1714 - * Returns a valid pointer on success, and an error pointer otherwise 1723 + * Returns 0 on success or an error value otherwise. 1715 1724 */ 1716 - struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, 1717 - int virt_irq, int irq) 1725 + int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq) 1718 1726 { 1719 1727 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1720 1728 struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); 1721 1729 struct irq_phys_map *map; 1722 1730 struct irq_phys_map_entry *entry; 1723 - struct irq_desc *desc; 1724 - struct irq_data *data; 1725 - int phys_irq; 1726 - 1727 - desc = irq_to_desc(irq); 1728 - if (!desc) { 1729 - kvm_err("%s: no interrupt descriptor\n", __func__); 1730 - return ERR_PTR(-EINVAL); 1731 - } 1732 - 1733 - data = irq_desc_get_irq_data(desc); 1734 - while (data->parent_data) 1735 - data = data->parent_data; 1736 - 1737 - phys_irq = data->hwirq; 1731 + int ret = 0; 1738 1732 1739 1733 /* Create a new mapping */ 1740 1734 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1741 1735 if (!entry) 1742 - return ERR_PTR(-ENOMEM); 1736 + return -ENOMEM; 1743 1737 1744 1738 spin_lock(&dist->irq_phys_map_lock); 1745 1739 ··· 1732 1756 map = vgic_irq_map_search(vcpu, virt_irq); 1733 1757 if (map) { 1734 1758 /* Make sure this mapping matches */ 1735 - if (map->phys_irq != phys_irq || 1736 - map->irq != irq) 1737 - map = ERR_PTR(-EINVAL); 1759 + if (map->phys_irq != phys_irq) 1760 + ret = -EINVAL; 1738 1761 1739 1762 /* Found an existing, valid mapping */ 1740 1763 goto out; ··· 1742 1767 map = &entry->map; 1743 1768 map->virt_irq = virt_irq; 1744 1769 map->phys_irq = phys_irq; 1745 - map->irq = irq; 1746 1770 1747 1771 list_add_tail_rcu(&entry->entry, root); 1748 1772 ··· 1749 1775 spin_unlock(&dist->irq_phys_map_lock); 1750 1776 /* If we've found a hit in the existing list, free the useless 1751 1777 * entry */ 1752 - if (IS_ERR(map) || map != &entry->map) 1778 + if (ret || map != &entry->map) 1753 1779 kfree(entry); 1754 - return map; 1780 + return ret; 1755 1781 } 1756 1782 1757 1783 static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, ··· 1787 1813 /** 1788 1814 * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping 1789 1815 * @vcpu: The VCPU pointer 1790 - * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq 1816 + * @virt_irq: The virtual IRQ number to be unmapped 1791 1817 * 1792 1818 * Remove an existing mapping between virtual and physical interrupts. 1793 1819 */ 1794 - int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map) 1820 + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) 1795 1821 { 1796 1822 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1797 1823 struct irq_phys_map_entry *entry; 1798 1824 struct list_head *root; 1799 1825 1800 - if (!map) 1801 - return -EINVAL; 1802 - 1803 - root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq); 1826 + root = vgic_get_irq_phys_map_list(vcpu, virt_irq); 1804 1827 1805 1828 spin_lock(&dist->irq_phys_map_lock); 1806 1829 1807 1830 list_for_each_entry(entry, root, entry) { 1808 - if (&entry->map == map) { 1831 + if (entry->map.virt_irq == virt_irq) { 1809 1832 list_del_rcu(&entry->entry); 1810 1833 call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); 1811 1834 break; ··· 1857 1886 kvm_vgic_vcpu_destroy(vcpu); 1858 1887 return -ENOMEM; 1859 1888 } 1860 - 1861 - /* 1862 - * Store the number of LRs per vcpu, so we don't have to go 1863 - * all the way to the distributor structure to find out. Only 1864 - * assembly code should use this one. 1865 - */ 1866 - vgic_cpu->nr_lr = vgic->nr_lr; 1867 1889 1868 1890 return 0; 1869 1891 }

+452

virt/kvm/arm/vgic/vgic-init.c

··· 1 + /* 2 + * Copyright (C) 2015, 2016 ARM Ltd. 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + 17 + #include <linux/uaccess.h> 18 + #include <linux/interrupt.h> 19 + #include <linux/cpu.h> 20 + #include <linux/kvm_host.h> 21 + #include <kvm/arm_vgic.h> 22 + #include <asm/kvm_mmu.h> 23 + #include "vgic.h" 24 + 25 + /* 26 + * Initialization rules: there are multiple stages to the vgic 27 + * initialization, both for the distributor and the CPU interfaces. 28 + * 29 + * Distributor: 30 + * 31 + * - kvm_vgic_early_init(): initialization of static data that doesn't 32 + * depend on any sizing information or emulation type. No allocation 33 + * is allowed there. 34 + * 35 + * - vgic_init(): allocation and initialization of the generic data 36 + * structures that depend on sizing information (number of CPUs, 37 + * number of interrupts). Also initializes the vcpu specific data 38 + * structures. Can be executed lazily for GICv2. 39 + * 40 + * CPU Interface: 41 + * 42 + * - kvm_vgic_cpu_early_init(): initialization of static data that 43 + * doesn't depend on any sizing information or emulation type. No 44 + * allocation is allowed there. 45 + */ 46 + 47 + /* EARLY INIT */ 48 + 49 + /* 50 + * Those 2 functions should not be needed anymore but they 51 + * still are called from arm.c 52 + */ 53 + void kvm_vgic_early_init(struct kvm *kvm) 54 + { 55 + } 56 + 57 + void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) 58 + { 59 + } 60 + 61 + /* CREATION */ 62 + 63 + /** 64 + * kvm_vgic_create: triggered by the instantiation of the VGIC device by 65 + * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only) 66 + * or through the generic KVM_CREATE_DEVICE API ioctl. 67 + * irqchip_in_kernel() tells you if this function succeeded or not. 68 + * @kvm: kvm struct pointer 69 + * @type: KVM_DEV_TYPE_ARM_VGIC_V[23] 70 + */ 71 + int kvm_vgic_create(struct kvm *kvm, u32 type) 72 + { 73 + int i, vcpu_lock_idx = -1, ret; 74 + struct kvm_vcpu *vcpu; 75 + 76 + mutex_lock(&kvm->lock); 77 + 78 + if (irqchip_in_kernel(kvm)) { 79 + ret = -EEXIST; 80 + goto out; 81 + } 82 + 83 + /* 84 + * This function is also called by the KVM_CREATE_IRQCHIP handler, 85 + * which had no chance yet to check the availability of the GICv2 86 + * emulation. So check this here again. KVM_CREATE_DEVICE does 87 + * the proper checks already. 88 + */ 89 + if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && 90 + !kvm_vgic_global_state.can_emulate_gicv2) { 91 + ret = -ENODEV; 92 + goto out; 93 + } 94 + 95 + /* 96 + * Any time a vcpu is run, vcpu_load is called which tries to grab the 97 + * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure 98 + * that no other VCPUs are run while we create the vgic. 99 + */ 100 + ret = -EBUSY; 101 + kvm_for_each_vcpu(i, vcpu, kvm) { 102 + if (!mutex_trylock(&vcpu->mutex)) 103 + goto out_unlock; 104 + vcpu_lock_idx = i; 105 + } 106 + 107 + kvm_for_each_vcpu(i, vcpu, kvm) { 108 + if (vcpu->arch.has_run_once) 109 + goto out_unlock; 110 + } 111 + ret = 0; 112 + 113 + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) 114 + kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; 115 + else 116 + kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS; 117 + 118 + if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) { 119 + ret = -E2BIG; 120 + goto out_unlock; 121 + } 122 + 123 + kvm->arch.vgic.in_kernel = true; 124 + kvm->arch.vgic.vgic_model = type; 125 + 126 + /* 127 + * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init) 128 + * it is stored in distributor struct for asm save/restore purpose 129 + */ 130 + kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base; 131 + 132 + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; 133 + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; 134 + kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; 135 + 136 + out_unlock: 137 + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { 138 + vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); 139 + mutex_unlock(&vcpu->mutex); 140 + } 141 + 142 + out: 143 + mutex_unlock(&kvm->lock); 144 + return ret; 145 + } 146 + 147 + /* INIT/DESTROY */ 148 + 149 + /** 150 + * kvm_vgic_dist_init: initialize the dist data structures 151 + * @kvm: kvm struct pointer 152 + * @nr_spis: number of spis, frozen by caller 153 + */ 154 + static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) 155 + { 156 + struct vgic_dist *dist = &kvm->arch.vgic; 157 + struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); 158 + int i; 159 + 160 + dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); 161 + if (!dist->spis) 162 + return -ENOMEM; 163 + 164 + /* 165 + * In the following code we do not take the irq struct lock since 166 + * no other action on irq structs can happen while the VGIC is 167 + * not initialized yet: 168 + * If someone wants to inject an interrupt or does a MMIO access, we 169 + * require prior initialization in case of a virtual GICv3 or trigger 170 + * initialization when using a virtual GICv2. 171 + */ 172 + for (i = 0; i < nr_spis; i++) { 173 + struct vgic_irq *irq = &dist->spis[i]; 174 + 175 + irq->intid = i + VGIC_NR_PRIVATE_IRQS; 176 + INIT_LIST_HEAD(&irq->ap_list); 177 + spin_lock_init(&irq->irq_lock); 178 + irq->vcpu = NULL; 179 + irq->target_vcpu = vcpu0; 180 + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) 181 + irq->targets = 0; 182 + else 183 + irq->mpidr = 0; 184 + } 185 + return 0; 186 + } 187 + 188 + /** 189 + * kvm_vgic_vcpu_init: initialize the vcpu data structures and 190 + * enable the VCPU interface 191 + * @vcpu: the VCPU which's VGIC should be initialized 192 + */ 193 + static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) 194 + { 195 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 196 + int i; 197 + 198 + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); 199 + spin_lock_init(&vgic_cpu->ap_list_lock); 200 + 201 + /* 202 + * Enable and configure all SGIs to be edge-triggered and 203 + * configure all PPIs as level-triggered. 204 + */ 205 + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { 206 + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; 207 + 208 + INIT_LIST_HEAD(&irq->ap_list); 209 + spin_lock_init(&irq->irq_lock); 210 + irq->intid = i; 211 + irq->vcpu = NULL; 212 + irq->target_vcpu = vcpu; 213 + irq->targets = 1U << vcpu->vcpu_id; 214 + if (vgic_irq_is_sgi(i)) { 215 + /* SGIs */ 216 + irq->enabled = 1; 217 + irq->config = VGIC_CONFIG_EDGE; 218 + } else { 219 + /* PPIs */ 220 + irq->config = VGIC_CONFIG_LEVEL; 221 + } 222 + } 223 + if (kvm_vgic_global_state.type == VGIC_V2) 224 + vgic_v2_enable(vcpu); 225 + else 226 + vgic_v3_enable(vcpu); 227 + } 228 + 229 + /* 230 + * vgic_init: allocates and initializes dist and vcpu data structures 231 + * depending on two dimensioning parameters: 232 + * - the number of spis 233 + * - the number of vcpus 234 + * The function is generally called when nr_spis has been explicitly set 235 + * by the guest through the KVM DEVICE API. If not nr_spis is set to 256. 236 + * vgic_initialized() returns true when this function has succeeded. 237 + * Must be called with kvm->lock held! 238 + */ 239 + int vgic_init(struct kvm *kvm) 240 + { 241 + struct vgic_dist *dist = &kvm->arch.vgic; 242 + struct kvm_vcpu *vcpu; 243 + int ret = 0, i; 244 + 245 + if (vgic_initialized(kvm)) 246 + return 0; 247 + 248 + /* freeze the number of spis */ 249 + if (!dist->nr_spis) 250 + dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS; 251 + 252 + ret = kvm_vgic_dist_init(kvm, dist->nr_spis); 253 + if (ret) 254 + goto out; 255 + 256 + kvm_for_each_vcpu(i, vcpu, kvm) 257 + kvm_vgic_vcpu_init(vcpu); 258 + 259 + dist->initialized = true; 260 + out: 261 + return ret; 262 + } 263 + 264 + static void kvm_vgic_dist_destroy(struct kvm *kvm) 265 + { 266 + struct vgic_dist *dist = &kvm->arch.vgic; 267 + 268 + mutex_lock(&kvm->lock); 269 + 270 + dist->ready = false; 271 + dist->initialized = false; 272 + 273 + kfree(dist->spis); 274 + kfree(dist->redist_iodevs); 275 + dist->nr_spis = 0; 276 + 277 + mutex_unlock(&kvm->lock); 278 + } 279 + 280 + void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) 281 + { 282 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 283 + 284 + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); 285 + } 286 + 287 + void kvm_vgic_destroy(struct kvm *kvm) 288 + { 289 + struct kvm_vcpu *vcpu; 290 + int i; 291 + 292 + kvm_vgic_dist_destroy(kvm); 293 + 294 + kvm_for_each_vcpu(i, vcpu, kvm) 295 + kvm_vgic_vcpu_destroy(vcpu); 296 + } 297 + 298 + /** 299 + * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest 300 + * is a GICv2. A GICv3 must be explicitly initialized by the guest using the 301 + * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group. 302 + * @kvm: kvm struct pointer 303 + */ 304 + int vgic_lazy_init(struct kvm *kvm) 305 + { 306 + int ret = 0; 307 + 308 + if (unlikely(!vgic_initialized(kvm))) { 309 + /* 310 + * We only provide the automatic initialization of the VGIC 311 + * for the legacy case of a GICv2. Any other type must 312 + * be explicitly initialized once setup with the respective 313 + * KVM device call. 314 + */ 315 + if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) 316 + return -EBUSY; 317 + 318 + mutex_lock(&kvm->lock); 319 + ret = vgic_init(kvm); 320 + mutex_unlock(&kvm->lock); 321 + } 322 + 323 + return ret; 324 + } 325 + 326 + /* RESOURCE MAPPING */ 327 + 328 + /** 329 + * Map the MMIO regions depending on the VGIC model exposed to the guest 330 + * called on the first VCPU run. 331 + * Also map the virtual CPU interface into the VM. 332 + * v2/v3 derivatives call vgic_init if not already done. 333 + * vgic_ready() returns true if this function has succeeded. 334 + * @kvm: kvm struct pointer 335 + */ 336 + int kvm_vgic_map_resources(struct kvm *kvm) 337 + { 338 + struct vgic_dist *dist = &kvm->arch.vgic; 339 + int ret = 0; 340 + 341 + mutex_lock(&kvm->lock); 342 + if (!irqchip_in_kernel(kvm)) 343 + goto out; 344 + 345 + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) 346 + ret = vgic_v2_map_resources(kvm); 347 + else 348 + ret = vgic_v3_map_resources(kvm); 349 + out: 350 + mutex_unlock(&kvm->lock); 351 + return ret; 352 + } 353 + 354 + /* GENERIC PROBE */ 355 + 356 + static void vgic_init_maintenance_interrupt(void *info) 357 + { 358 + enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0); 359 + } 360 + 361 + static int vgic_cpu_notify(struct notifier_block *self, 362 + unsigned long action, void *cpu) 363 + { 364 + switch (action) { 365 + case CPU_STARTING: 366 + case CPU_STARTING_FROZEN: 367 + vgic_init_maintenance_interrupt(NULL); 368 + break; 369 + case CPU_DYING: 370 + case CPU_DYING_FROZEN: 371 + disable_percpu_irq(kvm_vgic_global_state.maint_irq); 372 + break; 373 + } 374 + 375 + return NOTIFY_OK; 376 + } 377 + 378 + static struct notifier_block vgic_cpu_nb = { 379 + .notifier_call = vgic_cpu_notify, 380 + }; 381 + 382 + static irqreturn_t vgic_maintenance_handler(int irq, void *data) 383 + { 384 + /* 385 + * We cannot rely on the vgic maintenance interrupt to be 386 + * delivered synchronously. This means we can only use it to 387 + * exit the VM, and we perform the handling of EOIed 388 + * interrupts on the exit path (see vgic_process_maintenance). 389 + */ 390 + return IRQ_HANDLED; 391 + } 392 + 393 + /** 394 + * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable 395 + * according to the host GIC model. Accordingly calls either 396 + * vgic_v2/v3_probe which registers the KVM_DEVICE that can be 397 + * instantiated by a guest later on . 398 + */ 399 + int kvm_vgic_hyp_init(void) 400 + { 401 + const struct gic_kvm_info *gic_kvm_info; 402 + int ret; 403 + 404 + gic_kvm_info = gic_get_kvm_info(); 405 + if (!gic_kvm_info) 406 + return -ENODEV; 407 + 408 + if (!gic_kvm_info->maint_irq) { 409 + kvm_err("No vgic maintenance irq\n"); 410 + return -ENXIO; 411 + } 412 + 413 + switch (gic_kvm_info->type) { 414 + case GIC_V2: 415 + ret = vgic_v2_probe(gic_kvm_info); 416 + break; 417 + case GIC_V3: 418 + ret = vgic_v3_probe(gic_kvm_info); 419 + break; 420 + default: 421 + ret = -ENODEV; 422 + }; 423 + 424 + if (ret) 425 + return ret; 426 + 427 + kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; 428 + ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, 429 + vgic_maintenance_handler, 430 + "vgic", kvm_get_running_vcpus()); 431 + if (ret) { 432 + kvm_err("Cannot register interrupt %d\n", 433 + kvm_vgic_global_state.maint_irq); 434 + return ret; 435 + } 436 + 437 + ret = __register_cpu_notifier(&vgic_cpu_nb); 438 + if (ret) { 439 + kvm_err("Cannot register vgic CPU notifier\n"); 440 + goto out_free_irq; 441 + } 442 + 443 + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); 444 + 445 + kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq); 446 + return 0; 447 + 448 + out_free_irq: 449 + free_percpu_irq(kvm_vgic_global_state.maint_irq, 450 + kvm_get_running_vcpus()); 451 + return ret; 452 + }

+52

virt/kvm/arm/vgic/vgic-irqfd.c

··· 1 + /* 2 + * Copyright (C) 2015, 2016 ARM Ltd. 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + 17 + #include <linux/kvm.h> 18 + #include <linux/kvm_host.h> 19 + #include <trace/events/kvm.h> 20 + 21 + int kvm_irq_map_gsi(struct kvm *kvm, 22 + struct kvm_kernel_irq_routing_entry *entries, 23 + int gsi) 24 + { 25 + return 0; 26 + } 27 + 28 + int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned int irqchip, 29 + unsigned int pin) 30 + { 31 + return pin; 32 + } 33 + 34 + int kvm_set_irq(struct kvm *kvm, int irq_source_id, 35 + u32 irq, int level, bool line_status) 36 + { 37 + unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; 38 + 39 + trace_kvm_set_irq(irq, level, irq_source_id); 40 + 41 + BUG_ON(!vgic_initialized(kvm)); 42 + 43 + return kvm_vgic_inject_irq(kvm, 0, spi, level); 44 + } 45 + 46 + /* MSI not implemented yet */ 47 + int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, 48 + struct kvm *kvm, int irq_source_id, 49 + int level, bool line_status) 50 + { 51 + return 0; 52 + }

+431

virt/kvm/arm/vgic/vgic-kvm-device.c

··· 1 + /* 2 + * VGIC: KVM DEVICE API 3 + * 4 + * Copyright (C) 2015 ARM Ltd. 5 + * Author: Marc Zyngier <marc.zyngier@arm.com> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + */ 16 + #include <linux/kvm_host.h> 17 + #include <kvm/arm_vgic.h> 18 + #include <linux/uaccess.h> 19 + #include <asm/kvm_mmu.h> 20 + #include "vgic.h" 21 + 22 + /* common helpers */ 23 + 24 + static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, 25 + phys_addr_t addr, phys_addr_t alignment) 26 + { 27 + if (addr & ~KVM_PHYS_MASK) 28 + return -E2BIG; 29 + 30 + if (!IS_ALIGNED(addr, alignment)) 31 + return -EINVAL; 32 + 33 + if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) 34 + return -EEXIST; 35 + 36 + return 0; 37 + } 38 + 39 + /** 40 + * kvm_vgic_addr - set or get vgic VM base addresses 41 + * @kvm: pointer to the vm struct 42 + * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX 43 + * @addr: pointer to address value 44 + * @write: if true set the address in the VM address space, if false read the 45 + * address 46 + * 47 + * Set or get the vgic base addresses for the distributor and the virtual CPU 48 + * interface in the VM physical address space. These addresses are properties 49 + * of the emulated core/SoC and therefore user space initially knows this 50 + * information. 51 + * Check them for sanity (alignment, double assignment). We can't check for 52 + * overlapping regions in case of a virtual GICv3 here, since we don't know 53 + * the number of VCPUs yet, so we defer this check to map_resources(). 54 + */ 55 + int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) 56 + { 57 + int r = 0; 58 + struct vgic_dist *vgic = &kvm->arch.vgic; 59 + int type_needed; 60 + phys_addr_t *addr_ptr, alignment; 61 + 62 + mutex_lock(&kvm->lock); 63 + switch (type) { 64 + case KVM_VGIC_V2_ADDR_TYPE_DIST: 65 + type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; 66 + addr_ptr = &vgic->vgic_dist_base; 67 + alignment = SZ_4K; 68 + break; 69 + case KVM_VGIC_V2_ADDR_TYPE_CPU: 70 + type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; 71 + addr_ptr = &vgic->vgic_cpu_base; 72 + alignment = SZ_4K; 73 + break; 74 + #ifdef CONFIG_KVM_ARM_VGIC_V3 75 + case KVM_VGIC_V3_ADDR_TYPE_DIST: 76 + type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; 77 + addr_ptr = &vgic->vgic_dist_base; 78 + alignment = SZ_64K; 79 + break; 80 + case KVM_VGIC_V3_ADDR_TYPE_REDIST: 81 + type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; 82 + addr_ptr = &vgic->vgic_redist_base; 83 + alignment = SZ_64K; 84 + break; 85 + #endif 86 + default: 87 + r = -ENODEV; 88 + goto out; 89 + } 90 + 91 + if (vgic->vgic_model != type_needed) { 92 + r = -ENODEV; 93 + goto out; 94 + } 95 + 96 + if (write) { 97 + r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment); 98 + if (!r) 99 + *addr_ptr = *addr; 100 + } else { 101 + *addr = *addr_ptr; 102 + } 103 + 104 + out: 105 + mutex_unlock(&kvm->lock); 106 + return r; 107 + } 108 + 109 + static int vgic_set_common_attr(struct kvm_device *dev, 110 + struct kvm_device_attr *attr) 111 + { 112 + int r; 113 + 114 + switch (attr->group) { 115 + case KVM_DEV_ARM_VGIC_GRP_ADDR: { 116 + u64 __user *uaddr = (u64 __user *)(long)attr->addr; 117 + u64 addr; 118 + unsigned long type = (unsigned long)attr->attr; 119 + 120 + if (copy_from_user(&addr, uaddr, sizeof(addr))) 121 + return -EFAULT; 122 + 123 + r = kvm_vgic_addr(dev->kvm, type, &addr, true); 124 + return (r == -ENODEV) ? -ENXIO : r; 125 + } 126 + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { 127 + u32 __user *uaddr = (u32 __user *)(long)attr->addr; 128 + u32 val; 129 + int ret = 0; 130 + 131 + if (get_user(val, uaddr)) 132 + return -EFAULT; 133 + 134 + /* 135 + * We require: 136 + * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs 137 + * - at most 1024 interrupts 138 + * - a multiple of 32 interrupts 139 + */ 140 + if (val < (VGIC_NR_PRIVATE_IRQS + 32) || 141 + val > VGIC_MAX_RESERVED || 142 + (val & 31)) 143 + return -EINVAL; 144 + 145 + mutex_lock(&dev->kvm->lock); 146 + 147 + if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) 148 + ret = -EBUSY; 149 + else 150 + dev->kvm->arch.vgic.nr_spis = 151 + val - VGIC_NR_PRIVATE_IRQS; 152 + 153 + mutex_unlock(&dev->kvm->lock); 154 + 155 + return ret; 156 + } 157 + case KVM_DEV_ARM_VGIC_GRP_CTRL: { 158 + switch (attr->attr) { 159 + case KVM_DEV_ARM_VGIC_CTRL_INIT: 160 + mutex_lock(&dev->kvm->lock); 161 + r = vgic_init(dev->kvm); 162 + mutex_unlock(&dev->kvm->lock); 163 + return r; 164 + } 165 + break; 166 + } 167 + } 168 + 169 + return -ENXIO; 170 + } 171 + 172 + static int vgic_get_common_attr(struct kvm_device *dev, 173 + struct kvm_device_attr *attr) 174 + { 175 + int r = -ENXIO; 176 + 177 + switch (attr->group) { 178 + case KVM_DEV_ARM_VGIC_GRP_ADDR: { 179 + u64 __user *uaddr = (u64 __user *)(long)attr->addr; 180 + u64 addr; 181 + unsigned long type = (unsigned long)attr->attr; 182 + 183 + r = kvm_vgic_addr(dev->kvm, type, &addr, false); 184 + if (r) 185 + return (r == -ENODEV) ? -ENXIO : r; 186 + 187 + if (copy_to_user(uaddr, &addr, sizeof(addr))) 188 + return -EFAULT; 189 + break; 190 + } 191 + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { 192 + u32 __user *uaddr = (u32 __user *)(long)attr->addr; 193 + 194 + r = put_user(dev->kvm->arch.vgic.nr_spis + 195 + VGIC_NR_PRIVATE_IRQS, uaddr); 196 + break; 197 + } 198 + } 199 + 200 + return r; 201 + } 202 + 203 + static int vgic_create(struct kvm_device *dev, u32 type) 204 + { 205 + return kvm_vgic_create(dev->kvm, type); 206 + } 207 + 208 + static void vgic_destroy(struct kvm_device *dev) 209 + { 210 + kfree(dev); 211 + } 212 + 213 + void kvm_register_vgic_device(unsigned long type) 214 + { 215 + switch (type) { 216 + case KVM_DEV_TYPE_ARM_VGIC_V2: 217 + kvm_register_device_ops(&kvm_arm_vgic_v2_ops, 218 + KVM_DEV_TYPE_ARM_VGIC_V2); 219 + break; 220 + #ifdef CONFIG_KVM_ARM_VGIC_V3 221 + case KVM_DEV_TYPE_ARM_VGIC_V3: 222 + kvm_register_device_ops(&kvm_arm_vgic_v3_ops, 223 + KVM_DEV_TYPE_ARM_VGIC_V3); 224 + break; 225 + #endif 226 + } 227 + } 228 + 229 + /** vgic_attr_regs_access: allows user space to read/write VGIC registers 230 + * 231 + * @dev: kvm device handle 232 + * @attr: kvm device attribute 233 + * @reg: address the value is read or written 234 + * @is_write: write flag 235 + * 236 + */ 237 + static int vgic_attr_regs_access(struct kvm_device *dev, 238 + struct kvm_device_attr *attr, 239 + u32 *reg, bool is_write) 240 + { 241 + gpa_t addr; 242 + int cpuid, ret, c; 243 + struct kvm_vcpu *vcpu, *tmp_vcpu; 244 + int vcpu_lock_idx = -1; 245 + 246 + cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> 247 + KVM_DEV_ARM_VGIC_CPUID_SHIFT; 248 + vcpu = kvm_get_vcpu(dev->kvm, cpuid); 249 + addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; 250 + 251 + mutex_lock(&dev->kvm->lock); 252 + 253 + ret = vgic_init(dev->kvm); 254 + if (ret) 255 + goto out; 256 + 257 + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { 258 + ret = -EINVAL; 259 + goto out; 260 + } 261 + 262 + /* 263 + * Any time a vcpu is run, vcpu_load is called which tries to grab the 264 + * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure 265 + * that no other VCPUs are run and fiddle with the vgic state while we 266 + * access it. 267 + */ 268 + ret = -EBUSY; 269 + kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { 270 + if (!mutex_trylock(&tmp_vcpu->mutex)) 271 + goto out; 272 + vcpu_lock_idx = c; 273 + } 274 + 275 + switch (attr->group) { 276 + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: 277 + ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg); 278 + break; 279 + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: 280 + ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg); 281 + break; 282 + default: 283 + ret = -EINVAL; 284 + break; 285 + } 286 + 287 + out: 288 + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { 289 + tmp_vcpu = kvm_get_vcpu(dev->kvm, vcpu_lock_idx); 290 + mutex_unlock(&tmp_vcpu->mutex); 291 + } 292 + 293 + mutex_unlock(&dev->kvm->lock); 294 + return ret; 295 + } 296 + 297 + /* V2 ops */ 298 + 299 + static int vgic_v2_set_attr(struct kvm_device *dev, 300 + struct kvm_device_attr *attr) 301 + { 302 + int ret; 303 + 304 + ret = vgic_set_common_attr(dev, attr); 305 + if (ret != -ENXIO) 306 + return ret; 307 + 308 + switch (attr->group) { 309 + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: 310 + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { 311 + u32 __user *uaddr = (u32 __user *)(long)attr->addr; 312 + u32 reg; 313 + 314 + if (get_user(reg, uaddr)) 315 + return -EFAULT; 316 + 317 + return vgic_attr_regs_access(dev, attr, &reg, true); 318 + } 319 + } 320 + 321 + return -ENXIO; 322 + } 323 + 324 + static int vgic_v2_get_attr(struct kvm_device *dev, 325 + struct kvm_device_attr *attr) 326 + { 327 + int ret; 328 + 329 + ret = vgic_get_common_attr(dev, attr); 330 + if (ret != -ENXIO) 331 + return ret; 332 + 333 + switch (attr->group) { 334 + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: 335 + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { 336 + u32 __user *uaddr = (u32 __user *)(long)attr->addr; 337 + u32 reg = 0; 338 + 339 + ret = vgic_attr_regs_access(dev, attr, &reg, false); 340 + if (ret) 341 + return ret; 342 + return put_user(reg, uaddr); 343 + } 344 + } 345 + 346 + return -ENXIO; 347 + } 348 + 349 + static int vgic_v2_has_attr(struct kvm_device *dev, 350 + struct kvm_device_attr *attr) 351 + { 352 + switch (attr->group) { 353 + case KVM_DEV_ARM_VGIC_GRP_ADDR: 354 + switch (attr->attr) { 355 + case KVM_VGIC_V2_ADDR_TYPE_DIST: 356 + case KVM_VGIC_V2_ADDR_TYPE_CPU: 357 + return 0; 358 + } 359 + break; 360 + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: 361 + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: 362 + return vgic_v2_has_attr_regs(dev, attr); 363 + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: 364 + return 0; 365 + case KVM_DEV_ARM_VGIC_GRP_CTRL: 366 + switch (attr->attr) { 367 + case KVM_DEV_ARM_VGIC_CTRL_INIT: 368 + return 0; 369 + } 370 + } 371 + return -ENXIO; 372 + } 373 + 374 + struct kvm_device_ops kvm_arm_vgic_v2_ops = { 375 + .name = "kvm-arm-vgic-v2", 376 + .create = vgic_create, 377 + .destroy = vgic_destroy, 378 + .set_attr = vgic_v2_set_attr, 379 + .get_attr = vgic_v2_get_attr, 380 + .has_attr = vgic_v2_has_attr, 381 + }; 382 + 383 + /* V3 ops */ 384 + 385 + #ifdef CONFIG_KVM_ARM_VGIC_V3 386 + 387 + static int vgic_v3_set_attr(struct kvm_device *dev, 388 + struct kvm_device_attr *attr) 389 + { 390 + return vgic_set_common_attr(dev, attr); 391 + } 392 + 393 + static int vgic_v3_get_attr(struct kvm_device *dev, 394 + struct kvm_device_attr *attr) 395 + { 396 + return vgic_get_common_attr(dev, attr); 397 + } 398 + 399 + static int vgic_v3_has_attr(struct kvm_device *dev, 400 + struct kvm_device_attr *attr) 401 + { 402 + switch (attr->group) { 403 + case KVM_DEV_ARM_VGIC_GRP_ADDR: 404 + switch (attr->attr) { 405 + case KVM_VGIC_V3_ADDR_TYPE_DIST: 406 + case KVM_VGIC_V3_ADDR_TYPE_REDIST: 407 + return 0; 408 + } 409 + break; 410 + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: 411 + return 0; 412 + case KVM_DEV_ARM_VGIC_GRP_CTRL: 413 + switch (attr->attr) { 414 + case KVM_DEV_ARM_VGIC_CTRL_INIT: 415 + return 0; 416 + } 417 + } 418 + return -ENXIO; 419 + } 420 + 421 + struct kvm_device_ops kvm_arm_vgic_v3_ops = { 422 + .name = "kvm-arm-vgic-v3", 423 + .create = vgic_create, 424 + .destroy = vgic_destroy, 425 + .set_attr = vgic_v3_set_attr, 426 + .get_attr = vgic_v3_get_attr, 427 + .has_attr = vgic_v3_has_attr, 428 + }; 429 + 430 + #endif /* CONFIG_KVM_ARM_VGIC_V3 */ 431 +

+446

virt/kvm/arm/vgic/vgic-mmio-v2.c

··· 1 + /* 2 + * VGICv2 MMIO handling functions 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + */ 13 + 14 + #include <linux/irqchip/arm-gic.h> 15 + #include <linux/kvm.h> 16 + #include <linux/kvm_host.h> 17 + #include <kvm/iodev.h> 18 + #include <kvm/arm_vgic.h> 19 + 20 + #include "vgic.h" 21 + #include "vgic-mmio.h" 22 + 23 + static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu, 24 + gpa_t addr, unsigned int len) 25 + { 26 + u32 value; 27 + 28 + switch (addr & 0x0c) { 29 + case GIC_DIST_CTRL: 30 + value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0; 31 + break; 32 + case GIC_DIST_CTR: 33 + value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; 34 + value = (value >> 5) - 1; 35 + value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; 36 + break; 37 + case GIC_DIST_IIDR: 38 + value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); 39 + break; 40 + default: 41 + return 0; 42 + } 43 + 44 + return value; 45 + } 46 + 47 + static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu, 48 + gpa_t addr, unsigned int len, 49 + unsigned long val) 50 + { 51 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 52 + bool was_enabled = dist->enabled; 53 + 54 + switch (addr & 0x0c) { 55 + case GIC_DIST_CTRL: 56 + dist->enabled = val & GICD_ENABLE; 57 + if (!was_enabled && dist->enabled) 58 + vgic_kick_vcpus(vcpu->kvm); 59 + break; 60 + case GIC_DIST_CTR: 61 + case GIC_DIST_IIDR: 62 + /* Nothing to do */ 63 + return; 64 + } 65 + } 66 + 67 + static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, 68 + gpa_t addr, unsigned int len, 69 + unsigned long val) 70 + { 71 + int nr_vcpus = atomic_read(&source_vcpu->kvm->online_vcpus); 72 + int intid = val & 0xf; 73 + int targets = (val >> 16) & 0xff; 74 + int mode = (val >> 24) & 0x03; 75 + int c; 76 + struct kvm_vcpu *vcpu; 77 + 78 + switch (mode) { 79 + case 0x0: /* as specified by targets */ 80 + break; 81 + case 0x1: 82 + targets = (1U << nr_vcpus) - 1; /* all, ... */ 83 + targets &= ~(1U << source_vcpu->vcpu_id); /* but self */ 84 + break; 85 + case 0x2: /* this very vCPU only */ 86 + targets = (1U << source_vcpu->vcpu_id); 87 + break; 88 + case 0x3: /* reserved */ 89 + return; 90 + } 91 + 92 + kvm_for_each_vcpu(c, vcpu, source_vcpu->kvm) { 93 + struct vgic_irq *irq; 94 + 95 + if (!(targets & (1U << c))) 96 + continue; 97 + 98 + irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); 99 + 100 + spin_lock(&irq->irq_lock); 101 + irq->pending = true; 102 + irq->source |= 1U << source_vcpu->vcpu_id; 103 + 104 + vgic_queue_irq_unlock(source_vcpu->kvm, irq); 105 + } 106 + } 107 + 108 + static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, 109 + gpa_t addr, unsigned int len) 110 + { 111 + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); 112 + int i; 113 + u64 val = 0; 114 + 115 + for (i = 0; i < len; i++) { 116 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 117 + 118 + val |= (u64)irq->targets << (i * 8); 119 + } 120 + 121 + return val; 122 + } 123 + 124 + static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, 125 + gpa_t addr, unsigned int len, 126 + unsigned long val) 127 + { 128 + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); 129 + int i; 130 + 131 + /* GICD_ITARGETSR[0-7] are read-only */ 132 + if (intid < VGIC_NR_PRIVATE_IRQS) 133 + return; 134 + 135 + for (i = 0; i < len; i++) { 136 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); 137 + int target; 138 + 139 + spin_lock(&irq->irq_lock); 140 + 141 + irq->targets = (val >> (i * 8)) & 0xff; 142 + target = irq->targets ? __ffs(irq->targets) : 0; 143 + irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); 144 + 145 + spin_unlock(&irq->irq_lock); 146 + } 147 + } 148 + 149 + static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, 150 + gpa_t addr, unsigned int len) 151 + { 152 + u32 intid = addr & 0x0f; 153 + int i; 154 + u64 val = 0; 155 + 156 + for (i = 0; i < len; i++) { 157 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 158 + 159 + val |= (u64)irq->source << (i * 8); 160 + } 161 + return val; 162 + } 163 + 164 + static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, 165 + gpa_t addr, unsigned int len, 166 + unsigned long val) 167 + { 168 + u32 intid = addr & 0x0f; 169 + int i; 170 + 171 + for (i = 0; i < len; i++) { 172 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 173 + 174 + spin_lock(&irq->irq_lock); 175 + 176 + irq->source &= ~((val >> (i * 8)) & 0xff); 177 + if (!irq->source) 178 + irq->pending = false; 179 + 180 + spin_unlock(&irq->irq_lock); 181 + } 182 + } 183 + 184 + static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, 185 + gpa_t addr, unsigned int len, 186 + unsigned long val) 187 + { 188 + u32 intid = addr & 0x0f; 189 + int i; 190 + 191 + for (i = 0; i < len; i++) { 192 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 193 + 194 + spin_lock(&irq->irq_lock); 195 + 196 + irq->source |= (val >> (i * 8)) & 0xff; 197 + 198 + if (irq->source) { 199 + irq->pending = true; 200 + vgic_queue_irq_unlock(vcpu->kvm, irq); 201 + } else { 202 + spin_unlock(&irq->irq_lock); 203 + } 204 + } 205 + } 206 + 207 + static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) 208 + { 209 + if (kvm_vgic_global_state.type == VGIC_V2) 210 + vgic_v2_set_vmcr(vcpu, vmcr); 211 + else 212 + vgic_v3_set_vmcr(vcpu, vmcr); 213 + } 214 + 215 + static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) 216 + { 217 + if (kvm_vgic_global_state.type == VGIC_V2) 218 + vgic_v2_get_vmcr(vcpu, vmcr); 219 + else 220 + vgic_v3_get_vmcr(vcpu, vmcr); 221 + } 222 + 223 + #define GICC_ARCH_VERSION_V2 0x2 224 + 225 + /* These are for userland accesses only, there is no guest-facing emulation. */ 226 + static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, 227 + gpa_t addr, unsigned int len) 228 + { 229 + struct vgic_vmcr vmcr; 230 + u32 val; 231 + 232 + vgic_get_vmcr(vcpu, &vmcr); 233 + 234 + switch (addr & 0xff) { 235 + case GIC_CPU_CTRL: 236 + val = vmcr.ctlr; 237 + break; 238 + case GIC_CPU_PRIMASK: 239 + val = vmcr.pmr; 240 + break; 241 + case GIC_CPU_BINPOINT: 242 + val = vmcr.bpr; 243 + break; 244 + case GIC_CPU_ALIAS_BINPOINT: 245 + val = vmcr.abpr; 246 + break; 247 + case GIC_CPU_IDENT: 248 + val = ((PRODUCT_ID_KVM << 20) | 249 + (GICC_ARCH_VERSION_V2 << 16) | 250 + IMPLEMENTER_ARM); 251 + break; 252 + default: 253 + return 0; 254 + } 255 + 256 + return val; 257 + } 258 + 259 + static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, 260 + gpa_t addr, unsigned int len, 261 + unsigned long val) 262 + { 263 + struct vgic_vmcr vmcr; 264 + 265 + vgic_get_vmcr(vcpu, &vmcr); 266 + 267 + switch (addr & 0xff) { 268 + case GIC_CPU_CTRL: 269 + vmcr.ctlr = val; 270 + break; 271 + case GIC_CPU_PRIMASK: 272 + vmcr.pmr = val; 273 + break; 274 + case GIC_CPU_BINPOINT: 275 + vmcr.bpr = val; 276 + break; 277 + case GIC_CPU_ALIAS_BINPOINT: 278 + vmcr.abpr = val; 279 + break; 280 + } 281 + 282 + vgic_set_vmcr(vcpu, &vmcr); 283 + } 284 + 285 + static const struct vgic_register_region vgic_v2_dist_registers[] = { 286 + REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL, 287 + vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, 288 + VGIC_ACCESS_32bit), 289 + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, 290 + vgic_mmio_read_rao, vgic_mmio_write_wi, 1, 291 + VGIC_ACCESS_32bit), 292 + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, 293 + vgic_mmio_read_enable, vgic_mmio_write_senable, 1, 294 + VGIC_ACCESS_32bit), 295 + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, 296 + vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, 297 + VGIC_ACCESS_32bit), 298 + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, 299 + vgic_mmio_read_pending, vgic_mmio_write_spending, 1, 300 + VGIC_ACCESS_32bit), 301 + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, 302 + vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, 303 + VGIC_ACCESS_32bit), 304 + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, 305 + vgic_mmio_read_active, vgic_mmio_write_sactive, 1, 306 + VGIC_ACCESS_32bit), 307 + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, 308 + vgic_mmio_read_active, vgic_mmio_write_cactive, 1, 309 + VGIC_ACCESS_32bit), 310 + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, 311 + vgic_mmio_read_priority, vgic_mmio_write_priority, 8, 312 + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), 313 + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, 314 + vgic_mmio_read_target, vgic_mmio_write_target, 8, 315 + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), 316 + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, 317 + vgic_mmio_read_config, vgic_mmio_write_config, 2, 318 + VGIC_ACCESS_32bit), 319 + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, 320 + vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, 321 + VGIC_ACCESS_32bit), 322 + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_CLEAR, 323 + vgic_mmio_read_sgipend, vgic_mmio_write_sgipendc, 16, 324 + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), 325 + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_SET, 326 + vgic_mmio_read_sgipend, vgic_mmio_write_sgipends, 16, 327 + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), 328 + }; 329 + 330 + static const struct vgic_register_region vgic_v2_cpu_registers[] = { 331 + REGISTER_DESC_WITH_LENGTH(GIC_CPU_CTRL, 332 + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, 333 + VGIC_ACCESS_32bit), 334 + REGISTER_DESC_WITH_LENGTH(GIC_CPU_PRIMASK, 335 + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, 336 + VGIC_ACCESS_32bit), 337 + REGISTER_DESC_WITH_LENGTH(GIC_CPU_BINPOINT, 338 + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, 339 + VGIC_ACCESS_32bit), 340 + REGISTER_DESC_WITH_LENGTH(GIC_CPU_ALIAS_BINPOINT, 341 + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, 342 + VGIC_ACCESS_32bit), 343 + REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, 344 + vgic_mmio_read_raz, vgic_mmio_write_wi, 16, 345 + VGIC_ACCESS_32bit), 346 + REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, 347 + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, 348 + VGIC_ACCESS_32bit), 349 + }; 350 + 351 + unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev) 352 + { 353 + dev->regions = vgic_v2_dist_registers; 354 + dev->nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); 355 + 356 + kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); 357 + 358 + return SZ_4K; 359 + } 360 + 361 + int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) 362 + { 363 + int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; 364 + const struct vgic_register_region *regions; 365 + gpa_t addr; 366 + int nr_regions, i, len; 367 + 368 + addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; 369 + 370 + switch (attr->group) { 371 + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: 372 + regions = vgic_v2_dist_registers; 373 + nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); 374 + break; 375 + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: 376 + regions = vgic_v2_cpu_registers; 377 + nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); 378 + break; 379 + default: 380 + return -ENXIO; 381 + } 382 + 383 + /* We only support aligned 32-bit accesses. */ 384 + if (addr & 3) 385 + return -ENXIO; 386 + 387 + for (i = 0; i < nr_regions; i++) { 388 + if (regions[i].bits_per_irq) 389 + len = (regions[i].bits_per_irq * nr_irqs) / 8; 390 + else 391 + len = regions[i].len; 392 + 393 + if (regions[i].reg_offset <= addr && 394 + regions[i].reg_offset + len > addr) 395 + return 0; 396 + } 397 + 398 + return -ENXIO; 399 + } 400 + 401 + /* 402 + * When userland tries to access the VGIC register handlers, we need to 403 + * create a usable struct vgic_io_device to be passed to the handlers and we 404 + * have to set up a buffer similar to what would have happened if a guest MMIO 405 + * access occurred, including doing endian conversions on BE systems. 406 + */ 407 + static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, 408 + bool is_write, int offset, u32 *val) 409 + { 410 + unsigned int len = 4; 411 + u8 buf[4]; 412 + int ret; 413 + 414 + if (is_write) { 415 + vgic_data_host_to_mmio_bus(buf, len, *val); 416 + ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf); 417 + } else { 418 + ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf); 419 + if (!ret) 420 + *val = vgic_data_mmio_bus_to_host(buf, len); 421 + } 422 + 423 + return ret; 424 + } 425 + 426 + int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, 427 + int offset, u32 *val) 428 + { 429 + struct vgic_io_device dev = { 430 + .regions = vgic_v2_cpu_registers, 431 + .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers), 432 + }; 433 + 434 + return vgic_uaccess(vcpu, &dev, is_write, offset, val); 435 + } 436 + 437 + int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, 438 + int offset, u32 *val) 439 + { 440 + struct vgic_io_device dev = { 441 + .regions = vgic_v2_dist_registers, 442 + .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers), 443 + }; 444 + 445 + return vgic_uaccess(vcpu, &dev, is_write, offset, val); 446 + }

+455

virt/kvm/arm/vgic/vgic-mmio-v3.c

··· 1 + /* 2 + * VGICv3 MMIO handling functions 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + */ 13 + 14 + #include <linux/irqchip/arm-gic-v3.h> 15 + #include <linux/kvm.h> 16 + #include <linux/kvm_host.h> 17 + #include <kvm/iodev.h> 18 + #include <kvm/arm_vgic.h> 19 + 20 + #include <asm/kvm_emulate.h> 21 + 22 + #include "vgic.h" 23 + #include "vgic-mmio.h" 24 + 25 + /* extract @num bytes at @offset bytes offset in data */ 26 + static unsigned long extract_bytes(unsigned long data, unsigned int offset, 27 + unsigned int num) 28 + { 29 + return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0); 30 + } 31 + 32 + static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, 33 + gpa_t addr, unsigned int len) 34 + { 35 + u32 value = 0; 36 + 37 + switch (addr & 0x0c) { 38 + case GICD_CTLR: 39 + if (vcpu->kvm->arch.vgic.enabled) 40 + value |= GICD_CTLR_ENABLE_SS_G1; 41 + value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; 42 + break; 43 + case GICD_TYPER: 44 + value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; 45 + value = (value >> 5) - 1; 46 + value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; 47 + break; 48 + case GICD_IIDR: 49 + value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); 50 + break; 51 + default: 52 + return 0; 53 + } 54 + 55 + return value; 56 + } 57 + 58 + static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, 59 + gpa_t addr, unsigned int len, 60 + unsigned long val) 61 + { 62 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 63 + bool was_enabled = dist->enabled; 64 + 65 + switch (addr & 0x0c) { 66 + case GICD_CTLR: 67 + dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; 68 + 69 + if (!was_enabled && dist->enabled) 70 + vgic_kick_vcpus(vcpu->kvm); 71 + break; 72 + case GICD_TYPER: 73 + case GICD_IIDR: 74 + return; 75 + } 76 + } 77 + 78 + static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, 79 + gpa_t addr, unsigned int len) 80 + { 81 + int intid = VGIC_ADDR_TO_INTID(addr, 64); 82 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); 83 + 84 + if (!irq) 85 + return 0; 86 + 87 + /* The upper word is RAZ for us. */ 88 + if (addr & 4) 89 + return 0; 90 + 91 + return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len); 92 + } 93 + 94 + static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, 95 + gpa_t addr, unsigned int len, 96 + unsigned long val) 97 + { 98 + int intid = VGIC_ADDR_TO_INTID(addr, 64); 99 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); 100 + 101 + if (!irq) 102 + return; 103 + 104 + /* The upper word is WI for us since we don't implement Aff3. */ 105 + if (addr & 4) 106 + return; 107 + 108 + spin_lock(&irq->irq_lock); 109 + 110 + /* We only care about and preserve Aff0, Aff1 and Aff2. */ 111 + irq->mpidr = val & GENMASK(23, 0); 112 + irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); 113 + 114 + spin_unlock(&irq->irq_lock); 115 + } 116 + 117 + static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, 118 + gpa_t addr, unsigned int len) 119 + { 120 + unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); 121 + int target_vcpu_id = vcpu->vcpu_id; 122 + u64 value; 123 + 124 + value = (mpidr & GENMASK(23, 0)) << 32; 125 + value |= ((target_vcpu_id & 0xffff) << 8); 126 + if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) 127 + value |= GICR_TYPER_LAST; 128 + 129 + return extract_bytes(value, addr & 7, len); 130 + } 131 + 132 + static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, 133 + gpa_t addr, unsigned int len) 134 + { 135 + return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); 136 + } 137 + 138 + static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, 139 + gpa_t addr, unsigned int len) 140 + { 141 + switch (addr & 0xffff) { 142 + case GICD_PIDR2: 143 + /* report a GICv3 compliant implementation */ 144 + return 0x3b; 145 + } 146 + 147 + return 0; 148 + } 149 + 150 + /* 151 + * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the 152 + * redistributors, while SPIs are covered by registers in the distributor 153 + * block. Trying to set private IRQs in this block gets ignored. 154 + * We take some special care here to fix the calculation of the register 155 + * offset. 156 + */ 157 + #define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc) \ 158 + { \ 159 + .reg_offset = off, \ 160 + .bits_per_irq = bpi, \ 161 + .len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ 162 + .access_flags = acc, \ 163 + .read = vgic_mmio_read_raz, \ 164 + .write = vgic_mmio_write_wi, \ 165 + }, { \ 166 + .reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ 167 + .bits_per_irq = bpi, \ 168 + .len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8, \ 169 + .access_flags = acc, \ 170 + .read = rd, \ 171 + .write = wr, \ 172 + } 173 + 174 + static const struct vgic_register_region vgic_v3_dist_registers[] = { 175 + REGISTER_DESC_WITH_LENGTH(GICD_CTLR, 176 + vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16, 177 + VGIC_ACCESS_32bit), 178 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR, 179 + vgic_mmio_read_rao, vgic_mmio_write_wi, 1, 180 + VGIC_ACCESS_32bit), 181 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, 182 + vgic_mmio_read_enable, vgic_mmio_write_senable, 1, 183 + VGIC_ACCESS_32bit), 184 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER, 185 + vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, 186 + VGIC_ACCESS_32bit), 187 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, 188 + vgic_mmio_read_pending, vgic_mmio_write_spending, 1, 189 + VGIC_ACCESS_32bit), 190 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, 191 + vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, 192 + VGIC_ACCESS_32bit), 193 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, 194 + vgic_mmio_read_active, vgic_mmio_write_sactive, 1, 195 + VGIC_ACCESS_32bit), 196 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, 197 + vgic_mmio_read_active, vgic_mmio_write_cactive, 1, 198 + VGIC_ACCESS_32bit), 199 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, 200 + vgic_mmio_read_priority, vgic_mmio_write_priority, 8, 201 + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), 202 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR, 203 + vgic_mmio_read_raz, vgic_mmio_write_wi, 8, 204 + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), 205 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR, 206 + vgic_mmio_read_config, vgic_mmio_write_config, 2, 207 + VGIC_ACCESS_32bit), 208 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR, 209 + vgic_mmio_read_raz, vgic_mmio_write_wi, 1, 210 + VGIC_ACCESS_32bit), 211 + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER, 212 + vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64, 213 + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), 214 + REGISTER_DESC_WITH_LENGTH(GICD_IDREGS, 215 + vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, 216 + VGIC_ACCESS_32bit), 217 + }; 218 + 219 + static const struct vgic_register_region vgic_v3_rdbase_registers[] = { 220 + REGISTER_DESC_WITH_LENGTH(GICR_CTLR, 221 + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, 222 + VGIC_ACCESS_32bit), 223 + REGISTER_DESC_WITH_LENGTH(GICR_IIDR, 224 + vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, 225 + VGIC_ACCESS_32bit), 226 + REGISTER_DESC_WITH_LENGTH(GICR_TYPER, 227 + vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, 228 + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), 229 + REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, 230 + vgic_mmio_read_raz, vgic_mmio_write_wi, 8, 231 + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), 232 + REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER, 233 + vgic_mmio_read_raz, vgic_mmio_write_wi, 8, 234 + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), 235 + REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, 236 + vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, 237 + VGIC_ACCESS_32bit), 238 + }; 239 + 240 + static const struct vgic_register_region vgic_v3_sgibase_registers[] = { 241 + REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0, 242 + vgic_mmio_read_rao, vgic_mmio_write_wi, 4, 243 + VGIC_ACCESS_32bit), 244 + REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0, 245 + vgic_mmio_read_enable, vgic_mmio_write_senable, 4, 246 + VGIC_ACCESS_32bit), 247 + REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0, 248 + vgic_mmio_read_enable, vgic_mmio_write_cenable, 4, 249 + VGIC_ACCESS_32bit), 250 + REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0, 251 + vgic_mmio_read_pending, vgic_mmio_write_spending, 4, 252 + VGIC_ACCESS_32bit), 253 + REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0, 254 + vgic_mmio_read_pending, vgic_mmio_write_cpending, 4, 255 + VGIC_ACCESS_32bit), 256 + REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0, 257 + vgic_mmio_read_active, vgic_mmio_write_sactive, 4, 258 + VGIC_ACCESS_32bit), 259 + REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0, 260 + vgic_mmio_read_active, vgic_mmio_write_cactive, 4, 261 + VGIC_ACCESS_32bit), 262 + REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0, 263 + vgic_mmio_read_priority, vgic_mmio_write_priority, 32, 264 + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), 265 + REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0, 266 + vgic_mmio_read_config, vgic_mmio_write_config, 8, 267 + VGIC_ACCESS_32bit), 268 + REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0, 269 + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, 270 + VGIC_ACCESS_32bit), 271 + REGISTER_DESC_WITH_LENGTH(GICR_NSACR, 272 + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, 273 + VGIC_ACCESS_32bit), 274 + }; 275 + 276 + unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev) 277 + { 278 + dev->regions = vgic_v3_dist_registers; 279 + dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); 280 + 281 + kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); 282 + 283 + return SZ_64K; 284 + } 285 + 286 + int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) 287 + { 288 + int nr_vcpus = atomic_read(&kvm->online_vcpus); 289 + struct kvm_vcpu *vcpu; 290 + struct vgic_io_device *devices; 291 + int c, ret = 0; 292 + 293 + devices = kmalloc(sizeof(struct vgic_io_device) * nr_vcpus * 2, 294 + GFP_KERNEL); 295 + if (!devices) 296 + return -ENOMEM; 297 + 298 + kvm_for_each_vcpu(c, vcpu, kvm) { 299 + gpa_t rd_base = redist_base_address + c * SZ_64K * 2; 300 + gpa_t sgi_base = rd_base + SZ_64K; 301 + struct vgic_io_device *rd_dev = &devices[c * 2]; 302 + struct vgic_io_device *sgi_dev = &devices[c * 2 + 1]; 303 + 304 + kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); 305 + rd_dev->base_addr = rd_base; 306 + rd_dev->regions = vgic_v3_rdbase_registers; 307 + rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); 308 + rd_dev->redist_vcpu = vcpu; 309 + 310 + mutex_lock(&kvm->slots_lock); 311 + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, 312 + SZ_64K, &rd_dev->dev); 313 + mutex_unlock(&kvm->slots_lock); 314 + 315 + if (ret) 316 + break; 317 + 318 + kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops); 319 + sgi_dev->base_addr = sgi_base; 320 + sgi_dev->regions = vgic_v3_sgibase_registers; 321 + sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers); 322 + sgi_dev->redist_vcpu = vcpu; 323 + 324 + mutex_lock(&kvm->slots_lock); 325 + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base, 326 + SZ_64K, &sgi_dev->dev); 327 + mutex_unlock(&kvm->slots_lock); 328 + if (ret) { 329 + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, 330 + &rd_dev->dev); 331 + break; 332 + } 333 + } 334 + 335 + if (ret) { 336 + /* The current c failed, so we start with the previous one. */ 337 + for (c--; c >= 0; c--) { 338 + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, 339 + &devices[c * 2].dev); 340 + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, 341 + &devices[c * 2 + 1].dev); 342 + } 343 + kfree(devices); 344 + } else { 345 + kvm->arch.vgic.redist_iodevs = devices; 346 + } 347 + 348 + return ret; 349 + } 350 + 351 + /* 352 + * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI 353 + * generation register ICC_SGI1R_EL1) with a given VCPU. 354 + * If the VCPU's MPIDR matches, return the level0 affinity, otherwise 355 + * return -1. 356 + */ 357 + static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) 358 + { 359 + unsigned long affinity; 360 + int level0; 361 + 362 + /* 363 + * Split the current VCPU's MPIDR into affinity level 0 and the 364 + * rest as this is what we have to compare against. 365 + */ 366 + affinity = kvm_vcpu_get_mpidr_aff(vcpu); 367 + level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); 368 + affinity &= ~MPIDR_LEVEL_MASK; 369 + 370 + /* bail out if the upper three levels don't match */ 371 + if (sgi_aff != affinity) 372 + return -1; 373 + 374 + /* Is this VCPU's bit set in the mask ? */ 375 + if (!(sgi_cpu_mask & BIT(level0))) 376 + return -1; 377 + 378 + return level0; 379 + } 380 + 381 + /* 382 + * The ICC_SGI* registers encode the affinity differently from the MPIDR, 383 + * so provide a wrapper to use the existing defines to isolate a certain 384 + * affinity level. 385 + */ 386 + #define SGI_AFFINITY_LEVEL(reg, level) \ 387 + ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ 388 + >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) 389 + 390 + /** 391 + * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs 392 + * @vcpu: The VCPU requesting a SGI 393 + * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU 394 + * 395 + * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. 396 + * This will trap in sys_regs.c and call this function. 397 + * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the 398 + * target processors as well as a bitmask of 16 Aff0 CPUs. 399 + * If the interrupt routing mode bit is not set, we iterate over all VCPUs to 400 + * check for matching ones. If this bit is set, we signal all, but not the 401 + * calling VCPU. 402 + */ 403 + void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) 404 + { 405 + struct kvm *kvm = vcpu->kvm; 406 + struct kvm_vcpu *c_vcpu; 407 + u16 target_cpus; 408 + u64 mpidr; 409 + int sgi, c; 410 + int vcpu_id = vcpu->vcpu_id; 411 + bool broadcast; 412 + 413 + sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; 414 + broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); 415 + target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; 416 + mpidr = SGI_AFFINITY_LEVEL(reg, 3); 417 + mpidr |= SGI_AFFINITY_LEVEL(reg, 2); 418 + mpidr |= SGI_AFFINITY_LEVEL(reg, 1); 419 + 420 + /* 421 + * We iterate over all VCPUs to find the MPIDRs matching the request. 422 + * If we have handled one CPU, we clear its bit to detect early 423 + * if we are already finished. This avoids iterating through all 424 + * VCPUs when most of the times we just signal a single VCPU. 425 + */ 426 + kvm_for_each_vcpu(c, c_vcpu, kvm) { 427 + struct vgic_irq *irq; 428 + 429 + /* Exit early if we have dealt with all requested CPUs */ 430 + if (!broadcast && target_cpus == 0) 431 + break; 432 + 433 + /* Don't signal the calling VCPU */ 434 + if (broadcast && c == vcpu_id) 435 + continue; 436 + 437 + if (!broadcast) { 438 + int level0; 439 + 440 + level0 = match_mpidr(mpidr, target_cpus, c_vcpu); 441 + if (level0 == -1) 442 + continue; 443 + 444 + /* remove this matching VCPU from the mask */ 445 + target_cpus &= ~BIT(level0); 446 + } 447 + 448 + irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); 449 + 450 + spin_lock(&irq->irq_lock); 451 + irq->pending = true; 452 + 453 + vgic_queue_irq_unlock(vcpu->kvm, irq); 454 + } 455 + }

+526

virt/kvm/arm/vgic/vgic-mmio.c

··· 1 + /* 2 + * VGIC MMIO handling functions 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + */ 13 + 14 + #include <linux/bitops.h> 15 + #include <linux/bsearch.h> 16 + #include <linux/kvm.h> 17 + #include <linux/kvm_host.h> 18 + #include <kvm/iodev.h> 19 + #include <kvm/arm_vgic.h> 20 + 21 + #include "vgic.h" 22 + #include "vgic-mmio.h" 23 + 24 + unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, 25 + gpa_t addr, unsigned int len) 26 + { 27 + return 0; 28 + } 29 + 30 + unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, 31 + gpa_t addr, unsigned int len) 32 + { 33 + return -1UL; 34 + } 35 + 36 + void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, 37 + unsigned int len, unsigned long val) 38 + { 39 + /* Ignore */ 40 + } 41 + 42 + /* 43 + * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value 44 + * of the enabled bit, so there is only one function for both here. 45 + */ 46 + unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, 47 + gpa_t addr, unsigned int len) 48 + { 49 + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 50 + u32 value = 0; 51 + int i; 52 + 53 + /* Loop over all IRQs affected by this read */ 54 + for (i = 0; i < len * 8; i++) { 55 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 56 + 57 + if (irq->enabled) 58 + value |= (1U << i); 59 + } 60 + 61 + return value; 62 + } 63 + 64 + void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, 65 + gpa_t addr, unsigned int len, 66 + unsigned long val) 67 + { 68 + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 69 + int i; 70 + 71 + for_each_set_bit(i, &val, len * 8) { 72 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 73 + 74 + spin_lock(&irq->irq_lock); 75 + irq->enabled = true; 76 + vgic_queue_irq_unlock(vcpu->kvm, irq); 77 + } 78 + } 79 + 80 + void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, 81 + gpa_t addr, unsigned int len, 82 + unsigned long val) 83 + { 84 + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 85 + int i; 86 + 87 + for_each_set_bit(i, &val, len * 8) { 88 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 89 + 90 + spin_lock(&irq->irq_lock); 91 + 92 + irq->enabled = false; 93 + 94 + spin_unlock(&irq->irq_lock); 95 + } 96 + } 97 + 98 + unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, 99 + gpa_t addr, unsigned int len) 100 + { 101 + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 102 + u32 value = 0; 103 + int i; 104 + 105 + /* Loop over all IRQs affected by this read */ 106 + for (i = 0; i < len * 8; i++) { 107 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 108 + 109 + if (irq->pending) 110 + value |= (1U << i); 111 + } 112 + 113 + return value; 114 + } 115 + 116 + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, 117 + gpa_t addr, unsigned int len, 118 + unsigned long val) 119 + { 120 + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 121 + int i; 122 + 123 + for_each_set_bit(i, &val, len * 8) { 124 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 125 + 126 + spin_lock(&irq->irq_lock); 127 + irq->pending = true; 128 + if (irq->config == VGIC_CONFIG_LEVEL) 129 + irq->soft_pending = true; 130 + 131 + vgic_queue_irq_unlock(vcpu->kvm, irq); 132 + } 133 + } 134 + 135 + void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, 136 + gpa_t addr, unsigned int len, 137 + unsigned long val) 138 + { 139 + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 140 + int i; 141 + 142 + for_each_set_bit(i, &val, len * 8) { 143 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 144 + 145 + spin_lock(&irq->irq_lock); 146 + 147 + if (irq->config == VGIC_CONFIG_LEVEL) { 148 + irq->soft_pending = false; 149 + irq->pending = irq->line_level; 150 + } else { 151 + irq->pending = false; 152 + } 153 + 154 + spin_unlock(&irq->irq_lock); 155 + } 156 + } 157 + 158 + unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, 159 + gpa_t addr, unsigned int len) 160 + { 161 + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 162 + u32 value = 0; 163 + int i; 164 + 165 + /* Loop over all IRQs affected by this read */ 166 + for (i = 0; i < len * 8; i++) { 167 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 168 + 169 + if (irq->active) 170 + value |= (1U << i); 171 + } 172 + 173 + return value; 174 + } 175 + 176 + static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, 177 + bool new_active_state) 178 + { 179 + spin_lock(&irq->irq_lock); 180 + /* 181 + * If this virtual IRQ was written into a list register, we 182 + * have to make sure the CPU that runs the VCPU thread has 183 + * synced back LR state to the struct vgic_irq. We can only 184 + * know this for sure, when either this irq is not assigned to 185 + * anyone's AP list anymore, or the VCPU thread is not 186 + * running on any CPUs. 187 + * 188 + * In the opposite case, we know the VCPU thread may be on its 189 + * way back from the guest and still has to sync back this 190 + * IRQ, so we release and re-acquire the spin_lock to let the 191 + * other thread sync back the IRQ. 192 + */ 193 + while (irq->vcpu && /* IRQ may have state in an LR somewhere */ 194 + irq->vcpu->cpu != -1) { /* VCPU thread is running */ 195 + BUG_ON(irq->intid < VGIC_NR_PRIVATE_IRQS); 196 + cond_resched_lock(&irq->irq_lock); 197 + } 198 + 199 + irq->active = new_active_state; 200 + if (new_active_state) 201 + vgic_queue_irq_unlock(vcpu->kvm, irq); 202 + else 203 + spin_unlock(&irq->irq_lock); 204 + } 205 + 206 + /* 207 + * If we are fiddling with an IRQ's active state, we have to make sure the IRQ 208 + * is not queued on some running VCPU's LRs, because then the change to the 209 + * active state can be overwritten when the VCPU's state is synced coming back 210 + * from the guest. 211 + * 212 + * For shared interrupts, we have to stop all the VCPUs because interrupts can 213 + * be migrated while we don't hold the IRQ locks and we don't want to be 214 + * chasing moving targets. 215 + * 216 + * For private interrupts, we only have to make sure the single and only VCPU 217 + * that can potentially queue the IRQ is stopped. 218 + */ 219 + static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) 220 + { 221 + if (intid < VGIC_NR_PRIVATE_IRQS) 222 + kvm_arm_halt_vcpu(vcpu); 223 + else 224 + kvm_arm_halt_guest(vcpu->kvm); 225 + } 226 + 227 + /* See vgic_change_active_prepare */ 228 + static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) 229 + { 230 + if (intid < VGIC_NR_PRIVATE_IRQS) 231 + kvm_arm_resume_vcpu(vcpu); 232 + else 233 + kvm_arm_resume_guest(vcpu->kvm); 234 + } 235 + 236 + void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, 237 + gpa_t addr, unsigned int len, 238 + unsigned long val) 239 + { 240 + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 241 + int i; 242 + 243 + vgic_change_active_prepare(vcpu, intid); 244 + for_each_set_bit(i, &val, len * 8) { 245 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 246 + vgic_mmio_change_active(vcpu, irq, false); 247 + } 248 + vgic_change_active_finish(vcpu, intid); 249 + } 250 + 251 + void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, 252 + gpa_t addr, unsigned int len, 253 + unsigned long val) 254 + { 255 + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 256 + int i; 257 + 258 + vgic_change_active_prepare(vcpu, intid); 259 + for_each_set_bit(i, &val, len * 8) { 260 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 261 + vgic_mmio_change_active(vcpu, irq, true); 262 + } 263 + vgic_change_active_finish(vcpu, intid); 264 + } 265 + 266 + unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, 267 + gpa_t addr, unsigned int len) 268 + { 269 + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); 270 + int i; 271 + u64 val = 0; 272 + 273 + for (i = 0; i < len; i++) { 274 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 275 + 276 + val |= (u64)irq->priority << (i * 8); 277 + } 278 + 279 + return val; 280 + } 281 + 282 + /* 283 + * We currently don't handle changing the priority of an interrupt that 284 + * is already pending on a VCPU. If there is a need for this, we would 285 + * need to make this VCPU exit and re-evaluate the priorities, potentially 286 + * leading to this interrupt getting presented now to the guest (if it has 287 + * been masked by the priority mask before). 288 + */ 289 + void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, 290 + gpa_t addr, unsigned int len, 291 + unsigned long val) 292 + { 293 + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); 294 + int i; 295 + 296 + for (i = 0; i < len; i++) { 297 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 298 + 299 + spin_lock(&irq->irq_lock); 300 + /* Narrow the priority range to what we actually support */ 301 + irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); 302 + spin_unlock(&irq->irq_lock); 303 + } 304 + } 305 + 306 + unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, 307 + gpa_t addr, unsigned int len) 308 + { 309 + u32 intid = VGIC_ADDR_TO_INTID(addr, 2); 310 + u32 value = 0; 311 + int i; 312 + 313 + for (i = 0; i < len * 4; i++) { 314 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 315 + 316 + if (irq->config == VGIC_CONFIG_EDGE) 317 + value |= (2U << (i * 2)); 318 + } 319 + 320 + return value; 321 + } 322 + 323 + void vgic_mmio_write_config(struct kvm_vcpu *vcpu, 324 + gpa_t addr, unsigned int len, 325 + unsigned long val) 326 + { 327 + u32 intid = VGIC_ADDR_TO_INTID(addr, 2); 328 + int i; 329 + 330 + for (i = 0; i < len * 4; i++) { 331 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 332 + 333 + /* 334 + * The configuration cannot be changed for SGIs in general, 335 + * for PPIs this is IMPLEMENTATION DEFINED. The arch timer 336 + * code relies on PPIs being level triggered, so we also 337 + * make them read-only here. 338 + */ 339 + if (intid + i < VGIC_NR_PRIVATE_IRQS) 340 + continue; 341 + 342 + spin_lock(&irq->irq_lock); 343 + if (test_bit(i * 2 + 1, &val)) { 344 + irq->config = VGIC_CONFIG_EDGE; 345 + } else { 346 + irq->config = VGIC_CONFIG_LEVEL; 347 + irq->pending = irq->line_level | irq->soft_pending; 348 + } 349 + spin_unlock(&irq->irq_lock); 350 + } 351 + } 352 + 353 + static int match_region(const void *key, const void *elt) 354 + { 355 + const unsigned int offset = (unsigned long)key; 356 + const struct vgic_register_region *region = elt; 357 + 358 + if (offset < region->reg_offset) 359 + return -1; 360 + 361 + if (offset >= region->reg_offset + region->len) 362 + return 1; 363 + 364 + return 0; 365 + } 366 + 367 + /* Find the proper register handler entry given a certain address offset. */ 368 + static const struct vgic_register_region * 369 + vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, 370 + unsigned int offset) 371 + { 372 + return bsearch((void *)(uintptr_t)offset, region, nr_regions, 373 + sizeof(region[0]), match_region); 374 + } 375 + 376 + /* 377 + * kvm_mmio_read_buf() returns a value in a format where it can be converted 378 + * to a byte array and be directly observed as the guest wanted it to appear 379 + * in memory if it had done the store itself, which is LE for the GIC, as the 380 + * guest knows the GIC is always LE. 381 + * 382 + * We convert this value to the CPUs native format to deal with it as a data 383 + * value. 384 + */ 385 + unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) 386 + { 387 + unsigned long data = kvm_mmio_read_buf(val, len); 388 + 389 + switch (len) { 390 + case 1: 391 + return data; 392 + case 2: 393 + return le16_to_cpu(data); 394 + case 4: 395 + return le32_to_cpu(data); 396 + default: 397 + return le64_to_cpu(data); 398 + } 399 + } 400 + 401 + /* 402 + * kvm_mmio_write_buf() expects a value in a format such that if converted to 403 + * a byte array it is observed as the guest would see it if it could perform 404 + * the load directly. Since the GIC is LE, and the guest knows this, the 405 + * guest expects a value in little endian format. 406 + * 407 + * We convert the data value from the CPUs native format to LE so that the 408 + * value is returned in the proper format. 409 + */ 410 + void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, 411 + unsigned long data) 412 + { 413 + switch (len) { 414 + case 1: 415 + break; 416 + case 2: 417 + data = cpu_to_le16(data); 418 + break; 419 + case 4: 420 + data = cpu_to_le32(data); 421 + break; 422 + default: 423 + data = cpu_to_le64(data); 424 + } 425 + 426 + kvm_mmio_write_buf(buf, len, data); 427 + } 428 + 429 + static 430 + struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) 431 + { 432 + return container_of(dev, struct vgic_io_device, dev); 433 + } 434 + 435 + static bool check_region(const struct vgic_register_region *region, 436 + gpa_t addr, int len) 437 + { 438 + if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) 439 + return true; 440 + if ((region->access_flags & VGIC_ACCESS_32bit) && 441 + len == sizeof(u32) && !(addr & 3)) 442 + return true; 443 + if ((region->access_flags & VGIC_ACCESS_64bit) && 444 + len == sizeof(u64) && !(addr & 7)) 445 + return true; 446 + 447 + return false; 448 + } 449 + 450 + static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, 451 + gpa_t addr, int len, void *val) 452 + { 453 + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); 454 + const struct vgic_register_region *region; 455 + struct kvm_vcpu *r_vcpu; 456 + unsigned long data; 457 + 458 + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, 459 + addr - iodev->base_addr); 460 + if (!region || !check_region(region, addr, len)) { 461 + memset(val, 0, len); 462 + return 0; 463 + } 464 + 465 + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; 466 + data = region->read(r_vcpu, addr, len); 467 + vgic_data_host_to_mmio_bus(val, len, data); 468 + return 0; 469 + } 470 + 471 + static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, 472 + gpa_t addr, int len, const void *val) 473 + { 474 + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); 475 + const struct vgic_register_region *region; 476 + struct kvm_vcpu *r_vcpu; 477 + unsigned long data = vgic_data_mmio_bus_to_host(val, len); 478 + 479 + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, 480 + addr - iodev->base_addr); 481 + if (!region) 482 + return 0; 483 + 484 + if (!check_region(region, addr, len)) 485 + return 0; 486 + 487 + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; 488 + region->write(r_vcpu, addr, len, data); 489 + return 0; 490 + } 491 + 492 + struct kvm_io_device_ops kvm_io_gic_ops = { 493 + .read = dispatch_mmio_read, 494 + .write = dispatch_mmio_write, 495 + }; 496 + 497 + int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, 498 + enum vgic_type type) 499 + { 500 + struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev; 501 + int ret = 0; 502 + unsigned int len; 503 + 504 + switch (type) { 505 + case VGIC_V2: 506 + len = vgic_v2_init_dist_iodev(io_device); 507 + break; 508 + #ifdef CONFIG_KVM_ARM_VGIC_V3 509 + case VGIC_V3: 510 + len = vgic_v3_init_dist_iodev(io_device); 511 + break; 512 + #endif 513 + default: 514 + BUG_ON(1); 515 + } 516 + 517 + io_device->base_addr = dist_base_address; 518 + io_device->redist_vcpu = NULL; 519 + 520 + mutex_lock(&kvm->slots_lock); 521 + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, 522 + len, &io_device->dev); 523 + mutex_unlock(&kvm->slots_lock); 524 + 525 + return ret; 526 + }

+150

virt/kvm/arm/vgic/vgic-mmio.h

··· 1 + /* 2 + * Copyright (C) 2015, 2016 ARM Ltd. 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + #ifndef __KVM_ARM_VGIC_MMIO_H__ 17 + #define __KVM_ARM_VGIC_MMIO_H__ 18 + 19 + struct vgic_register_region { 20 + unsigned int reg_offset; 21 + unsigned int len; 22 + unsigned int bits_per_irq; 23 + unsigned int access_flags; 24 + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, 25 + unsigned int len); 26 + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, 27 + unsigned long val); 28 + }; 29 + 30 + extern struct kvm_io_device_ops kvm_io_gic_ops; 31 + 32 + #define VGIC_ACCESS_8bit 1 33 + #define VGIC_ACCESS_32bit 2 34 + #define VGIC_ACCESS_64bit 4 35 + 36 + /* 37 + * Generate a mask that covers the number of bytes required to address 38 + * up to 1024 interrupts, each represented by <bits> bits. This assumes 39 + * that <bits> is a power of two. 40 + */ 41 + #define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1) 42 + 43 + /* 44 + * (addr & mask) gives us the byte offset for the INT ID, so we want to 45 + * divide this with 'bytes per irq' to get the INT ID, which is given 46 + * by '(bits) / 8'. But we do this with fixed-point-arithmetic and 47 + * take advantage of the fact that division by a fraction equals 48 + * multiplication with the inverted fraction, and scale up both the 49 + * numerator and denominator with 8 to support at most 64 bits per IRQ: 50 + */ 51 + #define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ 52 + 64 / (bits) / 8) 53 + 54 + /* 55 + * Some VGIC registers store per-IRQ information, with a different number 56 + * of bits per IRQ. For those registers this macro is used. 57 + * The _WITH_LENGTH version instantiates registers with a fixed length 58 + * and is mutually exclusive with the _PER_IRQ version. 59 + */ 60 + #define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ 61 + { \ 62 + .reg_offset = off, \ 63 + .bits_per_irq = bpi, \ 64 + .len = bpi * 1024 / 8, \ 65 + .access_flags = acc, \ 66 + .read = rd, \ 67 + .write = wr, \ 68 + } 69 + 70 + #define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ 71 + { \ 72 + .reg_offset = off, \ 73 + .bits_per_irq = 0, \ 74 + .len = length, \ 75 + .access_flags = acc, \ 76 + .read = rd, \ 77 + .write = wr, \ 78 + } 79 + 80 + int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, 81 + struct vgic_register_region *reg_desc, 82 + struct vgic_io_device *region, 83 + int nr_irqs, bool offset_private); 84 + 85 + unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); 86 + 87 + void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, 88 + unsigned long data); 89 + 90 + unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, 91 + gpa_t addr, unsigned int len); 92 + 93 + unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, 94 + gpa_t addr, unsigned int len); 95 + 96 + void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, 97 + unsigned int len, unsigned long val); 98 + 99 + unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, 100 + gpa_t addr, unsigned int len); 101 + 102 + void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, 103 + gpa_t addr, unsigned int len, 104 + unsigned long val); 105 + 106 + void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, 107 + gpa_t addr, unsigned int len, 108 + unsigned long val); 109 + 110 + unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, 111 + gpa_t addr, unsigned int len); 112 + 113 + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, 114 + gpa_t addr, unsigned int len, 115 + unsigned long val); 116 + 117 + void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, 118 + gpa_t addr, unsigned int len, 119 + unsigned long val); 120 + 121 + unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, 122 + gpa_t addr, unsigned int len); 123 + 124 + void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, 125 + gpa_t addr, unsigned int len, 126 + unsigned long val); 127 + 128 + void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, 129 + gpa_t addr, unsigned int len, 130 + unsigned long val); 131 + 132 + unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, 133 + gpa_t addr, unsigned int len); 134 + 135 + void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, 136 + gpa_t addr, unsigned int len, 137 + unsigned long val); 138 + 139 + unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, 140 + gpa_t addr, unsigned int len); 141 + 142 + void vgic_mmio_write_config(struct kvm_vcpu *vcpu, 143 + gpa_t addr, unsigned int len, 144 + unsigned long val); 145 + 146 + unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); 147 + 148 + unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); 149 + 150 + #endif

+352

virt/kvm/arm/vgic/vgic-v2.c

··· 1 + /* 2 + * Copyright (C) 2015, 2016 ARM Ltd. 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + 17 + #include <linux/irqchip/arm-gic.h> 18 + #include <linux/kvm.h> 19 + #include <linux/kvm_host.h> 20 + #include <kvm/arm_vgic.h> 21 + #include <asm/kvm_mmu.h> 22 + 23 + #include "vgic.h" 24 + 25 + /* 26 + * Call this function to convert a u64 value to an unsigned long * bitmask 27 + * in a way that works on both 32-bit and 64-bit LE and BE platforms. 28 + * 29 + * Warning: Calling this function may modify *val. 30 + */ 31 + static unsigned long *u64_to_bitmask(u64 *val) 32 + { 33 + #if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32 34 + *val = (*val >> 32) | (*val << 32); 35 + #endif 36 + return (unsigned long *)val; 37 + } 38 + 39 + void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu) 40 + { 41 + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; 42 + 43 + if (cpuif->vgic_misr & GICH_MISR_EOI) { 44 + u64 eisr = cpuif->vgic_eisr; 45 + unsigned long *eisr_bmap = u64_to_bitmask(&eisr); 46 + int lr; 47 + 48 + for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) { 49 + u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID; 50 + 51 + WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE); 52 + 53 + kvm_notify_acked_irq(vcpu->kvm, 0, 54 + intid - VGIC_NR_PRIVATE_IRQS); 55 + } 56 + } 57 + 58 + /* check and disable underflow maintenance IRQ */ 59 + cpuif->vgic_hcr &= ~GICH_HCR_UIE; 60 + 61 + /* 62 + * In the next iterations of the vcpu loop, if we sync the 63 + * vgic state after flushing it, but before entering the guest 64 + * (this happens for pending signals and vmid rollovers), then 65 + * make sure we don't pick up any old maintenance interrupts 66 + * here. 67 + */ 68 + cpuif->vgic_eisr = 0; 69 + } 70 + 71 + void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) 72 + { 73 + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; 74 + 75 + cpuif->vgic_hcr |= GICH_HCR_UIE; 76 + } 77 + 78 + /* 79 + * transfer the content of the LRs back into the corresponding ap_list: 80 + * - active bit is transferred as is 81 + * - pending bit is 82 + * - transferred as is in case of edge sensitive IRQs 83 + * - set to the line-level (resample time) for level sensitive IRQs 84 + */ 85 + void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) 86 + { 87 + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; 88 + int lr; 89 + 90 + for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) { 91 + u32 val = cpuif->vgic_lr[lr]; 92 + u32 intid = val & GICH_LR_VIRTUALID; 93 + struct vgic_irq *irq; 94 + 95 + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); 96 + 97 + spin_lock(&irq->irq_lock); 98 + 99 + /* Always preserve the active bit */ 100 + irq->active = !!(val & GICH_LR_ACTIVE_BIT); 101 + 102 + /* Edge is the only case where we preserve the pending bit */ 103 + if (irq->config == VGIC_CONFIG_EDGE && 104 + (val & GICH_LR_PENDING_BIT)) { 105 + irq->pending = true; 106 + 107 + if (vgic_irq_is_sgi(intid)) { 108 + u32 cpuid = val & GICH_LR_PHYSID_CPUID; 109 + 110 + cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; 111 + irq->source |= (1 << cpuid); 112 + } 113 + } 114 + 115 + /* Clear soft pending state when level IRQs have been acked */ 116 + if (irq->config == VGIC_CONFIG_LEVEL && 117 + !(val & GICH_LR_PENDING_BIT)) { 118 + irq->soft_pending = false; 119 + irq->pending = irq->line_level; 120 + } 121 + 122 + spin_unlock(&irq->irq_lock); 123 + } 124 + } 125 + 126 + /* 127 + * Populates the particular LR with the state of a given IRQ: 128 + * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq 129 + * - for a level sensitive IRQ the pending state value is unchanged; 130 + * it is dictated directly by the input level 131 + * 132 + * If @irq describes an SGI with multiple sources, we choose the 133 + * lowest-numbered source VCPU and clear that bit in the source bitmap. 134 + * 135 + * The irq_lock must be held by the caller. 136 + */ 137 + void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 138 + { 139 + u32 val = irq->intid; 140 + 141 + if (irq->pending) { 142 + val |= GICH_LR_PENDING_BIT; 143 + 144 + if (irq->config == VGIC_CONFIG_EDGE) 145 + irq->pending = false; 146 + 147 + if (vgic_irq_is_sgi(irq->intid)) { 148 + u32 src = ffs(irq->source); 149 + 150 + BUG_ON(!src); 151 + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 152 + irq->source &= ~(1 << (src - 1)); 153 + if (irq->source) 154 + irq->pending = true; 155 + } 156 + } 157 + 158 + if (irq->active) 159 + val |= GICH_LR_ACTIVE_BIT; 160 + 161 + if (irq->hw) { 162 + val |= GICH_LR_HW; 163 + val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; 164 + } else { 165 + if (irq->config == VGIC_CONFIG_LEVEL) 166 + val |= GICH_LR_EOI; 167 + } 168 + 169 + /* The GICv2 LR only holds five bits of priority. */ 170 + val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; 171 + 172 + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val; 173 + } 174 + 175 + void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr) 176 + { 177 + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = 0; 178 + } 179 + 180 + void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) 181 + { 182 + u32 vmcr; 183 + 184 + vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; 185 + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & 186 + GICH_VMCR_ALIAS_BINPOINT_MASK; 187 + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & 188 + GICH_VMCR_BINPOINT_MASK; 189 + vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & 190 + GICH_VMCR_PRIMASK_MASK; 191 + 192 + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; 193 + } 194 + 195 + void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) 196 + { 197 + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; 198 + 199 + vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> 200 + GICH_VMCR_CTRL_SHIFT; 201 + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> 202 + GICH_VMCR_ALIAS_BINPOINT_SHIFT; 203 + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> 204 + GICH_VMCR_BINPOINT_SHIFT; 205 + vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> 206 + GICH_VMCR_PRIMASK_SHIFT; 207 + } 208 + 209 + void vgic_v2_enable(struct kvm_vcpu *vcpu) 210 + { 211 + /* 212 + * By forcing VMCR to zero, the GIC will restore the binary 213 + * points to their reset values. Anything else resets to zero 214 + * anyway. 215 + */ 216 + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; 217 + vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; 218 + 219 + /* Get the show on the road... */ 220 + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; 221 + } 222 + 223 + /* check for overlapping regions and for regions crossing the end of memory */ 224 + static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base) 225 + { 226 + if (dist_base + KVM_VGIC_V2_DIST_SIZE < dist_base) 227 + return false; 228 + if (cpu_base + KVM_VGIC_V2_CPU_SIZE < cpu_base) 229 + return false; 230 + 231 + if (dist_base + KVM_VGIC_V2_DIST_SIZE <= cpu_base) 232 + return true; 233 + if (cpu_base + KVM_VGIC_V2_CPU_SIZE <= dist_base) 234 + return true; 235 + 236 + return false; 237 + } 238 + 239 + int vgic_v2_map_resources(struct kvm *kvm) 240 + { 241 + struct vgic_dist *dist = &kvm->arch.vgic; 242 + int ret = 0; 243 + 244 + if (vgic_ready(kvm)) 245 + goto out; 246 + 247 + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || 248 + IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { 249 + kvm_err("Need to set vgic cpu and dist addresses first\n"); 250 + ret = -ENXIO; 251 + goto out; 252 + } 253 + 254 + if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) { 255 + kvm_err("VGIC CPU and dist frames overlap\n"); 256 + ret = -EINVAL; 257 + goto out; 258 + } 259 + 260 + /* 261 + * Initialize the vgic if this hasn't already been done on demand by 262 + * accessing the vgic state from userspace. 263 + */ 264 + ret = vgic_init(kvm); 265 + if (ret) { 266 + kvm_err("Unable to initialize VGIC dynamic data structures\n"); 267 + goto out; 268 + } 269 + 270 + ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); 271 + if (ret) { 272 + kvm_err("Unable to register VGIC MMIO regions\n"); 273 + goto out; 274 + } 275 + 276 + ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, 277 + kvm_vgic_global_state.vcpu_base, 278 + KVM_VGIC_V2_CPU_SIZE, true); 279 + if (ret) { 280 + kvm_err("Unable to remap VGIC CPU to VCPU\n"); 281 + goto out; 282 + } 283 + 284 + dist->ready = true; 285 + 286 + out: 287 + if (ret) 288 + kvm_vgic_destroy(kvm); 289 + return ret; 290 + } 291 + 292 + /** 293 + * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT 294 + * @node: pointer to the DT node 295 + * 296 + * Returns 0 if a GICv2 has been found, returns an error code otherwise 297 + */ 298 + int vgic_v2_probe(const struct gic_kvm_info *info) 299 + { 300 + int ret; 301 + u32 vtr; 302 + 303 + if (!info->vctrl.start) { 304 + kvm_err("GICH not present in the firmware table\n"); 305 + return -ENXIO; 306 + } 307 + 308 + if (!PAGE_ALIGNED(info->vcpu.start)) { 309 + kvm_err("GICV physical address 0x%llx not page aligned\n", 310 + (unsigned long long)info->vcpu.start); 311 + return -ENXIO; 312 + } 313 + 314 + if (!PAGE_ALIGNED(resource_size(&info->vcpu))) { 315 + kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", 316 + (unsigned long long)resource_size(&info->vcpu), 317 + PAGE_SIZE); 318 + return -ENXIO; 319 + } 320 + 321 + kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start, 322 + resource_size(&info->vctrl)); 323 + if (!kvm_vgic_global_state.vctrl_base) { 324 + kvm_err("Cannot ioremap GICH\n"); 325 + return -ENOMEM; 326 + } 327 + 328 + vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); 329 + kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; 330 + 331 + ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base, 332 + kvm_vgic_global_state.vctrl_base + 333 + resource_size(&info->vctrl), 334 + info->vctrl.start); 335 + 336 + if (ret) { 337 + kvm_err("Cannot map VCTRL into hyp\n"); 338 + iounmap(kvm_vgic_global_state.vctrl_base); 339 + return ret; 340 + } 341 + 342 + kvm_vgic_global_state.can_emulate_gicv2 = true; 343 + kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); 344 + 345 + kvm_vgic_global_state.vcpu_base = info->vcpu.start; 346 + kvm_vgic_global_state.type = VGIC_V2; 347 + kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; 348 + 349 + kvm_info("vgic-v2@%llx\n", info->vctrl.start); 350 + 351 + return 0; 352 + }

+330

virt/kvm/arm/vgic/vgic-v3.c

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License version 2 as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 13 + */ 14 + 15 + #include <linux/irqchip/arm-gic-v3.h> 16 + #include <linux/kvm.h> 17 + #include <linux/kvm_host.h> 18 + #include <kvm/arm_vgic.h> 19 + #include <asm/kvm_mmu.h> 20 + #include <asm/kvm_asm.h> 21 + 22 + #include "vgic.h" 23 + 24 + void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) 25 + { 26 + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; 27 + u32 model = vcpu->kvm->arch.vgic.vgic_model; 28 + 29 + if (cpuif->vgic_misr & ICH_MISR_EOI) { 30 + unsigned long eisr_bmap = cpuif->vgic_eisr; 31 + int lr; 32 + 33 + for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) { 34 + u32 intid; 35 + u64 val = cpuif->vgic_lr[lr]; 36 + 37 + if (model == KVM_DEV_TYPE_ARM_VGIC_V3) 38 + intid = val & ICH_LR_VIRTUAL_ID_MASK; 39 + else 40 + intid = val & GICH_LR_VIRTUALID; 41 + 42 + WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE); 43 + 44 + kvm_notify_acked_irq(vcpu->kvm, 0, 45 + intid - VGIC_NR_PRIVATE_IRQS); 46 + } 47 + 48 + /* 49 + * In the next iterations of the vcpu loop, if we sync 50 + * the vgic state after flushing it, but before 51 + * entering the guest (this happens for pending 52 + * signals and vmid rollovers), then make sure we 53 + * don't pick up any old maintenance interrupts here. 54 + */ 55 + cpuif->vgic_eisr = 0; 56 + } 57 + 58 + cpuif->vgic_hcr &= ~ICH_HCR_UIE; 59 + } 60 + 61 + void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) 62 + { 63 + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; 64 + 65 + cpuif->vgic_hcr |= ICH_HCR_UIE; 66 + } 67 + 68 + void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) 69 + { 70 + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; 71 + u32 model = vcpu->kvm->arch.vgic.vgic_model; 72 + int lr; 73 + 74 + for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) { 75 + u64 val = cpuif->vgic_lr[lr]; 76 + u32 intid; 77 + struct vgic_irq *irq; 78 + 79 + if (model == KVM_DEV_TYPE_ARM_VGIC_V3) 80 + intid = val & ICH_LR_VIRTUAL_ID_MASK; 81 + else 82 + intid = val & GICH_LR_VIRTUALID; 83 + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); 84 + 85 + spin_lock(&irq->irq_lock); 86 + 87 + /* Always preserve the active bit */ 88 + irq->active = !!(val & ICH_LR_ACTIVE_BIT); 89 + 90 + /* Edge is the only case where we preserve the pending bit */ 91 + if (irq->config == VGIC_CONFIG_EDGE && 92 + (val & ICH_LR_PENDING_BIT)) { 93 + irq->pending = true; 94 + 95 + if (vgic_irq_is_sgi(intid) && 96 + model == KVM_DEV_TYPE_ARM_VGIC_V2) { 97 + u32 cpuid = val & GICH_LR_PHYSID_CPUID; 98 + 99 + cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; 100 + irq->source |= (1 << cpuid); 101 + } 102 + } 103 + 104 + /* Clear soft pending state when level irqs have been acked */ 105 + if (irq->config == VGIC_CONFIG_LEVEL && 106 + !(val & ICH_LR_PENDING_BIT)) { 107 + irq->soft_pending = false; 108 + irq->pending = irq->line_level; 109 + } 110 + 111 + spin_unlock(&irq->irq_lock); 112 + } 113 + } 114 + 115 + /* Requires the irq to be locked already */ 116 + void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 117 + { 118 + u32 model = vcpu->kvm->arch.vgic.vgic_model; 119 + u64 val = irq->intid; 120 + 121 + if (irq->pending) { 122 + val |= ICH_LR_PENDING_BIT; 123 + 124 + if (irq->config == VGIC_CONFIG_EDGE) 125 + irq->pending = false; 126 + 127 + if (vgic_irq_is_sgi(irq->intid) && 128 + model == KVM_DEV_TYPE_ARM_VGIC_V2) { 129 + u32 src = ffs(irq->source); 130 + 131 + BUG_ON(!src); 132 + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 133 + irq->source &= ~(1 << (src - 1)); 134 + if (irq->source) 135 + irq->pending = true; 136 + } 137 + } 138 + 139 + if (irq->active) 140 + val |= ICH_LR_ACTIVE_BIT; 141 + 142 + if (irq->hw) { 143 + val |= ICH_LR_HW; 144 + val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; 145 + } else { 146 + if (irq->config == VGIC_CONFIG_LEVEL) 147 + val |= ICH_LR_EOI; 148 + } 149 + 150 + /* 151 + * We currently only support Group1 interrupts, which is a 152 + * known defect. This needs to be addressed at some point. 153 + */ 154 + if (model == KVM_DEV_TYPE_ARM_VGIC_V3) 155 + val |= ICH_LR_GROUP; 156 + 157 + val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; 158 + 159 + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; 160 + } 161 + 162 + void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) 163 + { 164 + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0; 165 + } 166 + 167 + void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) 168 + { 169 + u32 vmcr; 170 + 171 + vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; 172 + vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; 173 + vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; 174 + vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; 175 + 176 + vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; 177 + } 178 + 179 + void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) 180 + { 181 + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; 182 + 183 + vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; 184 + vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; 185 + vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; 186 + vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; 187 + } 188 + 189 + void vgic_v3_enable(struct kvm_vcpu *vcpu) 190 + { 191 + struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; 192 + 193 + /* 194 + * By forcing VMCR to zero, the GIC will restore the binary 195 + * points to their reset values. Anything else resets to zero 196 + * anyway. 197 + */ 198 + vgic_v3->vgic_vmcr = 0; 199 + vgic_v3->vgic_elrsr = ~0; 200 + 201 + /* 202 + * If we are emulating a GICv3, we do it in an non-GICv2-compatible 203 + * way, so we force SRE to 1 to demonstrate this to the guest. 204 + * This goes with the spec allowing the value to be RAO/WI. 205 + */ 206 + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) 207 + vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; 208 + else 209 + vgic_v3->vgic_sre = 0; 210 + 211 + /* Get the show on the road... */ 212 + vgic_v3->vgic_hcr = ICH_HCR_EN; 213 + } 214 + 215 + /* check for overlapping regions and for regions crossing the end of memory */ 216 + static bool vgic_v3_check_base(struct kvm *kvm) 217 + { 218 + struct vgic_dist *d = &kvm->arch.vgic; 219 + gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE; 220 + 221 + redist_size *= atomic_read(&kvm->online_vcpus); 222 + 223 + if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) 224 + return false; 225 + if (d->vgic_redist_base + redist_size < d->vgic_redist_base) 226 + return false; 227 + 228 + if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base) 229 + return true; 230 + if (d->vgic_redist_base + redist_size <= d->vgic_dist_base) 231 + return true; 232 + 233 + return false; 234 + } 235 + 236 + int vgic_v3_map_resources(struct kvm *kvm) 237 + { 238 + int ret = 0; 239 + struct vgic_dist *dist = &kvm->arch.vgic; 240 + 241 + if (vgic_ready(kvm)) 242 + goto out; 243 + 244 + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || 245 + IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) { 246 + kvm_err("Need to set vgic distributor addresses first\n"); 247 + ret = -ENXIO; 248 + goto out; 249 + } 250 + 251 + if (!vgic_v3_check_base(kvm)) { 252 + kvm_err("VGIC redist and dist frames overlap\n"); 253 + ret = -EINVAL; 254 + goto out; 255 + } 256 + 257 + /* 258 + * For a VGICv3 we require the userland to explicitly initialize 259 + * the VGIC before we need to use it. 260 + */ 261 + if (!vgic_initialized(kvm)) { 262 + ret = -EBUSY; 263 + goto out; 264 + } 265 + 266 + ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); 267 + if (ret) { 268 + kvm_err("Unable to register VGICv3 dist MMIO regions\n"); 269 + goto out; 270 + } 271 + 272 + ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base); 273 + if (ret) { 274 + kvm_err("Unable to register VGICv3 redist MMIO regions\n"); 275 + goto out; 276 + } 277 + 278 + dist->ready = true; 279 + 280 + out: 281 + if (ret) 282 + kvm_vgic_destroy(kvm); 283 + return ret; 284 + } 285 + 286 + /** 287 + * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT 288 + * @node: pointer to the DT node 289 + * 290 + * Returns 0 if a GICv3 has been found, returns an error code otherwise 291 + */ 292 + int vgic_v3_probe(const struct gic_kvm_info *info) 293 + { 294 + u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); 295 + 296 + /* 297 + * The ListRegs field is 5 bits, but there is a architectural 298 + * maximum of 16 list registers. Just ignore bit 4... 299 + */ 300 + kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; 301 + kvm_vgic_global_state.can_emulate_gicv2 = false; 302 + 303 + if (!info->vcpu.start) { 304 + kvm_info("GICv3: no GICV resource entry\n"); 305 + kvm_vgic_global_state.vcpu_base = 0; 306 + } else if (!PAGE_ALIGNED(info->vcpu.start)) { 307 + pr_warn("GICV physical address 0x%llx not page aligned\n", 308 + (unsigned long long)info->vcpu.start); 309 + kvm_vgic_global_state.vcpu_base = 0; 310 + } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) { 311 + pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", 312 + (unsigned long long)resource_size(&info->vcpu), 313 + PAGE_SIZE); 314 + kvm_vgic_global_state.vcpu_base = 0; 315 + } else { 316 + kvm_vgic_global_state.vcpu_base = info->vcpu.start; 317 + kvm_vgic_global_state.can_emulate_gicv2 = true; 318 + kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); 319 + kvm_info("vgic-v2@%llx\n", info->vcpu.start); 320 + } 321 + if (kvm_vgic_global_state.vcpu_base == 0) 322 + kvm_info("disabling GICv2 emulation\n"); 323 + kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); 324 + 325 + kvm_vgic_global_state.vctrl_base = NULL; 326 + kvm_vgic_global_state.type = VGIC_V3; 327 + kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; 328 + 329 + return 0; 330 + }

+619

virt/kvm/arm/vgic/vgic.c

··· 1 + /* 2 + * Copyright (C) 2015, 2016 ARM Ltd. 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + 17 + #include <linux/kvm.h> 18 + #include <linux/kvm_host.h> 19 + #include <linux/list_sort.h> 20 + 21 + #include "vgic.h" 22 + 23 + #define CREATE_TRACE_POINTS 24 + #include "../trace.h" 25 + 26 + #ifdef CONFIG_DEBUG_SPINLOCK 27 + #define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p) 28 + #else 29 + #define DEBUG_SPINLOCK_BUG_ON(p) 30 + #endif 31 + 32 + struct vgic_global __section(.hyp.text) kvm_vgic_global_state; 33 + 34 + /* 35 + * Locking order is always: 36 + * vgic_cpu->ap_list_lock 37 + * vgic_irq->irq_lock 38 + * 39 + * (that is, always take the ap_list_lock before the struct vgic_irq lock). 40 + * 41 + * When taking more than one ap_list_lock at the same time, always take the 42 + * lowest numbered VCPU's ap_list_lock first, so: 43 + * vcpuX->vcpu_id < vcpuY->vcpu_id: 44 + * spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); 45 + * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); 46 + */ 47 + 48 + struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, 49 + u32 intid) 50 + { 51 + /* SGIs and PPIs */ 52 + if (intid <= VGIC_MAX_PRIVATE) 53 + return &vcpu->arch.vgic_cpu.private_irqs[intid]; 54 + 55 + /* SPIs */ 56 + if (intid <= VGIC_MAX_SPI) 57 + return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; 58 + 59 + /* LPIs are not yet covered */ 60 + if (intid >= VGIC_MIN_LPI) 61 + return NULL; 62 + 63 + WARN(1, "Looking up struct vgic_irq for reserved INTID"); 64 + return NULL; 65 + } 66 + 67 + /** 68 + * kvm_vgic_target_oracle - compute the target vcpu for an irq 69 + * 70 + * @irq: The irq to route. Must be already locked. 71 + * 72 + * Based on the current state of the interrupt (enabled, pending, 73 + * active, vcpu and target_vcpu), compute the next vcpu this should be 74 + * given to. Return NULL if this shouldn't be injected at all. 75 + * 76 + * Requires the IRQ lock to be held. 77 + */ 78 + static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) 79 + { 80 + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); 81 + 82 + /* If the interrupt is active, it must stay on the current vcpu */ 83 + if (irq->active) 84 + return irq->vcpu ? : irq->target_vcpu; 85 + 86 + /* 87 + * If the IRQ is not active but enabled and pending, we should direct 88 + * it to its configured target VCPU. 89 + * If the distributor is disabled, pending interrupts shouldn't be 90 + * forwarded. 91 + */ 92 + if (irq->enabled && irq->pending) { 93 + if (unlikely(irq->target_vcpu && 94 + !irq->target_vcpu->kvm->arch.vgic.enabled)) 95 + return NULL; 96 + 97 + return irq->target_vcpu; 98 + } 99 + 100 + /* If neither active nor pending and enabled, then this IRQ should not 101 + * be queued to any VCPU. 102 + */ 103 + return NULL; 104 + } 105 + 106 + /* 107 + * The order of items in the ap_lists defines how we'll pack things in LRs as 108 + * well, the first items in the list being the first things populated in the 109 + * LRs. 110 + * 111 + * A hard rule is that active interrupts can never be pushed out of the LRs 112 + * (and therefore take priority) since we cannot reliably trap on deactivation 113 + * of IRQs and therefore they have to be present in the LRs. 114 + * 115 + * Otherwise things should be sorted by the priority field and the GIC 116 + * hardware support will take care of preemption of priority groups etc. 117 + * 118 + * Return negative if "a" sorts before "b", 0 to preserve order, and positive 119 + * to sort "b" before "a". 120 + */ 121 + static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) 122 + { 123 + struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); 124 + struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); 125 + bool penda, pendb; 126 + int ret; 127 + 128 + spin_lock(&irqa->irq_lock); 129 + spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); 130 + 131 + if (irqa->active || irqb->active) { 132 + ret = (int)irqb->active - (int)irqa->active; 133 + goto out; 134 + } 135 + 136 + penda = irqa->enabled && irqa->pending; 137 + pendb = irqb->enabled && irqb->pending; 138 + 139 + if (!penda || !pendb) { 140 + ret = (int)pendb - (int)penda; 141 + goto out; 142 + } 143 + 144 + /* Both pending and enabled, sort by priority */ 145 + ret = irqa->priority - irqb->priority; 146 + out: 147 + spin_unlock(&irqb->irq_lock); 148 + spin_unlock(&irqa->irq_lock); 149 + return ret; 150 + } 151 + 152 + /* Must be called with the ap_list_lock held */ 153 + static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) 154 + { 155 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 156 + 157 + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); 158 + 159 + list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); 160 + } 161 + 162 + /* 163 + * Only valid injection if changing level for level-triggered IRQs or for a 164 + * rising edge. 165 + */ 166 + static bool vgic_validate_injection(struct vgic_irq *irq, bool level) 167 + { 168 + switch (irq->config) { 169 + case VGIC_CONFIG_LEVEL: 170 + return irq->line_level != level; 171 + case VGIC_CONFIG_EDGE: 172 + return level; 173 + } 174 + 175 + return false; 176 + } 177 + 178 + /* 179 + * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. 180 + * Do the queuing if necessary, taking the right locks in the right order. 181 + * Returns true when the IRQ was queued, false otherwise. 182 + * 183 + * Needs to be entered with the IRQ lock already held, but will return 184 + * with all locks dropped. 185 + */ 186 + bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq) 187 + { 188 + struct kvm_vcpu *vcpu; 189 + 190 + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); 191 + 192 + retry: 193 + vcpu = vgic_target_oracle(irq); 194 + if (irq->vcpu || !vcpu) { 195 + /* 196 + * If this IRQ is already on a VCPU's ap_list, then it 197 + * cannot be moved or modified and there is no more work for 198 + * us to do. 199 + * 200 + * Otherwise, if the irq is not pending and enabled, it does 201 + * not need to be inserted into an ap_list and there is also 202 + * no more work for us to do. 203 + */ 204 + spin_unlock(&irq->irq_lock); 205 + return false; 206 + } 207 + 208 + /* 209 + * We must unlock the irq lock to take the ap_list_lock where 210 + * we are going to insert this new pending interrupt. 211 + */ 212 + spin_unlock(&irq->irq_lock); 213 + 214 + /* someone can do stuff here, which we re-check below */ 215 + 216 + spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); 217 + spin_lock(&irq->irq_lock); 218 + 219 + /* 220 + * Did something change behind our backs? 221 + * 222 + * There are two cases: 223 + * 1) The irq lost its pending state or was disabled behind our 224 + * backs and/or it was queued to another VCPU's ap_list. 225 + * 2) Someone changed the affinity on this irq behind our 226 + * backs and we are now holding the wrong ap_list_lock. 227 + * 228 + * In both cases, drop the locks and retry. 229 + */ 230 + 231 + if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { 232 + spin_unlock(&irq->irq_lock); 233 + spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); 234 + 235 + spin_lock(&irq->irq_lock); 236 + goto retry; 237 + } 238 + 239 + list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); 240 + irq->vcpu = vcpu; 241 + 242 + spin_unlock(&irq->irq_lock); 243 + spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); 244 + 245 + kvm_vcpu_kick(vcpu); 246 + 247 + return true; 248 + } 249 + 250 + static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, 251 + unsigned int intid, bool level, 252 + bool mapped_irq) 253 + { 254 + struct kvm_vcpu *vcpu; 255 + struct vgic_irq *irq; 256 + int ret; 257 + 258 + trace_vgic_update_irq_pending(cpuid, intid, level); 259 + 260 + ret = vgic_lazy_init(kvm); 261 + if (ret) 262 + return ret; 263 + 264 + vcpu = kvm_get_vcpu(kvm, cpuid); 265 + if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) 266 + return -EINVAL; 267 + 268 + irq = vgic_get_irq(kvm, vcpu, intid); 269 + if (!irq) 270 + return -EINVAL; 271 + 272 + if (irq->hw != mapped_irq) 273 + return -EINVAL; 274 + 275 + spin_lock(&irq->irq_lock); 276 + 277 + if (!vgic_validate_injection(irq, level)) { 278 + /* Nothing to see here, move along... */ 279 + spin_unlock(&irq->irq_lock); 280 + return 0; 281 + } 282 + 283 + if (irq->config == VGIC_CONFIG_LEVEL) { 284 + irq->line_level = level; 285 + irq->pending = level || irq->soft_pending; 286 + } else { 287 + irq->pending = true; 288 + } 289 + 290 + vgic_queue_irq_unlock(kvm, irq); 291 + 292 + return 0; 293 + } 294 + 295 + /** 296 + * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic 297 + * @kvm: The VM structure pointer 298 + * @cpuid: The CPU for PPIs 299 + * @intid: The INTID to inject a new state to. 300 + * @level: Edge-triggered: true: to trigger the interrupt 301 + * false: to ignore the call 302 + * Level-sensitive true: raise the input signal 303 + * false: lower the input signal 304 + * 305 + * The VGIC is not concerned with devices being active-LOW or active-HIGH for 306 + * level-sensitive interrupts. You can think of the level parameter as 1 307 + * being HIGH and 0 being LOW and all devices being active-HIGH. 308 + */ 309 + int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, 310 + bool level) 311 + { 312 + return vgic_update_irq_pending(kvm, cpuid, intid, level, false); 313 + } 314 + 315 + int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, 316 + bool level) 317 + { 318 + return vgic_update_irq_pending(kvm, cpuid, intid, level, true); 319 + } 320 + 321 + int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) 322 + { 323 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); 324 + 325 + BUG_ON(!irq); 326 + 327 + spin_lock(&irq->irq_lock); 328 + 329 + irq->hw = true; 330 + irq->hwintid = phys_irq; 331 + 332 + spin_unlock(&irq->irq_lock); 333 + 334 + return 0; 335 + } 336 + 337 + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) 338 + { 339 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); 340 + 341 + BUG_ON(!irq); 342 + 343 + if (!vgic_initialized(vcpu->kvm)) 344 + return -EAGAIN; 345 + 346 + spin_lock(&irq->irq_lock); 347 + 348 + irq->hw = false; 349 + irq->hwintid = 0; 350 + 351 + spin_unlock(&irq->irq_lock); 352 + 353 + return 0; 354 + } 355 + 356 + /** 357 + * vgic_prune_ap_list - Remove non-relevant interrupts from the list 358 + * 359 + * @vcpu: The VCPU pointer 360 + * 361 + * Go over the list of "interesting" interrupts, and prune those that we 362 + * won't have to consider in the near future. 363 + */ 364 + static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) 365 + { 366 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 367 + struct vgic_irq *irq, *tmp; 368 + 369 + retry: 370 + spin_lock(&vgic_cpu->ap_list_lock); 371 + 372 + list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { 373 + struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; 374 + 375 + spin_lock(&irq->irq_lock); 376 + 377 + BUG_ON(vcpu != irq->vcpu); 378 + 379 + target_vcpu = vgic_target_oracle(irq); 380 + 381 + if (!target_vcpu) { 382 + /* 383 + * We don't need to process this interrupt any 384 + * further, move it off the list. 385 + */ 386 + list_del(&irq->ap_list); 387 + irq->vcpu = NULL; 388 + spin_unlock(&irq->irq_lock); 389 + continue; 390 + } 391 + 392 + if (target_vcpu == vcpu) { 393 + /* We're on the right CPU */ 394 + spin_unlock(&irq->irq_lock); 395 + continue; 396 + } 397 + 398 + /* This interrupt looks like it has to be migrated. */ 399 + 400 + spin_unlock(&irq->irq_lock); 401 + spin_unlock(&vgic_cpu->ap_list_lock); 402 + 403 + /* 404 + * Ensure locking order by always locking the smallest 405 + * ID first. 406 + */ 407 + if (vcpu->vcpu_id < target_vcpu->vcpu_id) { 408 + vcpuA = vcpu; 409 + vcpuB = target_vcpu; 410 + } else { 411 + vcpuA = target_vcpu; 412 + vcpuB = vcpu; 413 + } 414 + 415 + spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); 416 + spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, 417 + SINGLE_DEPTH_NESTING); 418 + spin_lock(&irq->irq_lock); 419 + 420 + /* 421 + * If the affinity has been preserved, move the 422 + * interrupt around. Otherwise, it means things have 423 + * changed while the interrupt was unlocked, and we 424 + * need to replay this. 425 + * 426 + * In all cases, we cannot trust the list not to have 427 + * changed, so we restart from the beginning. 428 + */ 429 + if (target_vcpu == vgic_target_oracle(irq)) { 430 + struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu; 431 + 432 + list_del(&irq->ap_list); 433 + irq->vcpu = target_vcpu; 434 + list_add_tail(&irq->ap_list, &new_cpu->ap_list_head); 435 + } 436 + 437 + spin_unlock(&irq->irq_lock); 438 + spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); 439 + spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); 440 + goto retry; 441 + } 442 + 443 + spin_unlock(&vgic_cpu->ap_list_lock); 444 + } 445 + 446 + static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu) 447 + { 448 + if (kvm_vgic_global_state.type == VGIC_V2) 449 + vgic_v2_process_maintenance(vcpu); 450 + else 451 + vgic_v3_process_maintenance(vcpu); 452 + } 453 + 454 + static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) 455 + { 456 + if (kvm_vgic_global_state.type == VGIC_V2) 457 + vgic_v2_fold_lr_state(vcpu); 458 + else 459 + vgic_v3_fold_lr_state(vcpu); 460 + } 461 + 462 + /* Requires the irq_lock to be held. */ 463 + static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, 464 + struct vgic_irq *irq, int lr) 465 + { 466 + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); 467 + 468 + if (kvm_vgic_global_state.type == VGIC_V2) 469 + vgic_v2_populate_lr(vcpu, irq, lr); 470 + else 471 + vgic_v3_populate_lr(vcpu, irq, lr); 472 + } 473 + 474 + static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr) 475 + { 476 + if (kvm_vgic_global_state.type == VGIC_V2) 477 + vgic_v2_clear_lr(vcpu, lr); 478 + else 479 + vgic_v3_clear_lr(vcpu, lr); 480 + } 481 + 482 + static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) 483 + { 484 + if (kvm_vgic_global_state.type == VGIC_V2) 485 + vgic_v2_set_underflow(vcpu); 486 + else 487 + vgic_v3_set_underflow(vcpu); 488 + } 489 + 490 + /* Requires the ap_list_lock to be held. */ 491 + static int compute_ap_list_depth(struct kvm_vcpu *vcpu) 492 + { 493 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 494 + struct vgic_irq *irq; 495 + int count = 0; 496 + 497 + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); 498 + 499 + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 500 + spin_lock(&irq->irq_lock); 501 + /* GICv2 SGIs can count for more than one... */ 502 + if (vgic_irq_is_sgi(irq->intid) && irq->source) 503 + count += hweight8(irq->source); 504 + else 505 + count++; 506 + spin_unlock(&irq->irq_lock); 507 + } 508 + return count; 509 + } 510 + 511 + /* Requires the VCPU's ap_list_lock to be held. */ 512 + static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) 513 + { 514 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 515 + struct vgic_irq *irq; 516 + int count = 0; 517 + 518 + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); 519 + 520 + if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) { 521 + vgic_set_underflow(vcpu); 522 + vgic_sort_ap_list(vcpu); 523 + } 524 + 525 + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 526 + spin_lock(&irq->irq_lock); 527 + 528 + if (unlikely(vgic_target_oracle(irq) != vcpu)) 529 + goto next; 530 + 531 + /* 532 + * If we get an SGI with multiple sources, try to get 533 + * them in all at once. 534 + */ 535 + do { 536 + vgic_populate_lr(vcpu, irq, count++); 537 + } while (irq->source && count < kvm_vgic_global_state.nr_lr); 538 + 539 + next: 540 + spin_unlock(&irq->irq_lock); 541 + 542 + if (count == kvm_vgic_global_state.nr_lr) 543 + break; 544 + } 545 + 546 + vcpu->arch.vgic_cpu.used_lrs = count; 547 + 548 + /* Nuke remaining LRs */ 549 + for ( ; count < kvm_vgic_global_state.nr_lr; count++) 550 + vgic_clear_lr(vcpu, count); 551 + } 552 + 553 + /* Sync back the hardware VGIC state into our emulation after a guest's run. */ 554 + void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 555 + { 556 + vgic_process_maintenance_interrupt(vcpu); 557 + vgic_fold_lr_state(vcpu); 558 + vgic_prune_ap_list(vcpu); 559 + } 560 + 561 + /* Flush our emulation state into the GIC hardware before entering the guest. */ 562 + void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) 563 + { 564 + spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); 565 + vgic_flush_lr_state(vcpu); 566 + spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); 567 + } 568 + 569 + int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) 570 + { 571 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 572 + struct vgic_irq *irq; 573 + bool pending = false; 574 + 575 + if (!vcpu->kvm->arch.vgic.enabled) 576 + return false; 577 + 578 + spin_lock(&vgic_cpu->ap_list_lock); 579 + 580 + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 581 + spin_lock(&irq->irq_lock); 582 + pending = irq->pending && irq->enabled; 583 + spin_unlock(&irq->irq_lock); 584 + 585 + if (pending) 586 + break; 587 + } 588 + 589 + spin_unlock(&vgic_cpu->ap_list_lock); 590 + 591 + return pending; 592 + } 593 + 594 + void vgic_kick_vcpus(struct kvm *kvm) 595 + { 596 + struct kvm_vcpu *vcpu; 597 + int c; 598 + 599 + /* 600 + * We've injected an interrupt, time to find out who deserves 601 + * a good kick... 602 + */ 603 + kvm_for_each_vcpu(c, vcpu, kvm) { 604 + if (kvm_vgic_vcpu_pending_irq(vcpu)) 605 + kvm_vcpu_kick(vcpu); 606 + } 607 + } 608 + 609 + bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) 610 + { 611 + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); 612 + bool map_is_active; 613 + 614 + spin_lock(&irq->irq_lock); 615 + map_is_active = irq->hw && irq->active; 616 + spin_unlock(&irq->irq_lock); 617 + 618 + return map_is_active; 619 + }

+131

virt/kvm/arm/vgic/vgic.h

··· 1 + /* 2 + * Copyright (C) 2015, 2016 ARM Ltd. 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + #ifndef __KVM_ARM_VGIC_NEW_H__ 17 + #define __KVM_ARM_VGIC_NEW_H__ 18 + 19 + #include <linux/irqchip/arm-gic-common.h> 20 + 21 + #define PRODUCT_ID_KVM 0x4b /* ASCII code K */ 22 + #define IMPLEMENTER_ARM 0x43b 23 + 24 + #define VGIC_ADDR_UNDEF (-1) 25 + #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) 26 + 27 + #define INTERRUPT_ID_BITS_SPIS 10 28 + #define VGIC_PRI_BITS 5 29 + 30 + #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) 31 + 32 + struct vgic_vmcr { 33 + u32 ctlr; 34 + u32 abpr; 35 + u32 bpr; 36 + u32 pmr; 37 + }; 38 + 39 + struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, 40 + u32 intid); 41 + bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq); 42 + void vgic_kick_vcpus(struct kvm *kvm); 43 + 44 + void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu); 45 + void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); 46 + void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); 47 + void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); 48 + void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); 49 + int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); 50 + int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, 51 + int offset, u32 *val); 52 + int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, 53 + int offset, u32 *val); 54 + void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 55 + void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 56 + void vgic_v2_enable(struct kvm_vcpu *vcpu); 57 + int vgic_v2_probe(const struct gic_kvm_info *info); 58 + int vgic_v2_map_resources(struct kvm *kvm); 59 + int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, 60 + enum vgic_type); 61 + 62 + #ifdef CONFIG_KVM_ARM_VGIC_V3 63 + void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu); 64 + void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); 65 + void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); 66 + void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); 67 + void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); 68 + void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 69 + void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 70 + void vgic_v3_enable(struct kvm_vcpu *vcpu); 71 + int vgic_v3_probe(const struct gic_kvm_info *info); 72 + int vgic_v3_map_resources(struct kvm *kvm); 73 + int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); 74 + #else 75 + static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) 76 + { 77 + } 78 + 79 + static inline void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) 80 + { 81 + } 82 + 83 + static inline void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, 84 + struct vgic_irq *irq, int lr) 85 + { 86 + } 87 + 88 + static inline void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) 89 + { 90 + } 91 + 92 + static inline void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) 93 + { 94 + } 95 + 96 + static inline 97 + void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) 98 + { 99 + } 100 + 101 + static inline 102 + void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) 103 + { 104 + } 105 + 106 + static inline void vgic_v3_enable(struct kvm_vcpu *vcpu) 107 + { 108 + } 109 + 110 + static inline int vgic_v3_probe(const struct gic_kvm_info *info) 111 + { 112 + return -ENODEV; 113 + } 114 + 115 + static inline int vgic_v3_map_resources(struct kvm *kvm) 116 + { 117 + return -ENODEV; 118 + } 119 + 120 + static inline int vgic_register_redist_iodevs(struct kvm *kvm, 121 + gpa_t dist_base_address) 122 + { 123 + return -ENODEV; 124 + } 125 + #endif 126 + 127 + void kvm_register_vgic_device(unsigned long type); 128 + int vgic_lazy_init(struct kvm *kvm); 129 + int vgic_init(struct kvm *kvm); 130 + 131 + #endif

+177 -10

virt/kvm/kvm_main.c

··· 63 63 #define CREATE_TRACE_POINTS 64 64 #include <trace/events/kvm.h> 65 65 66 + /* Worst case buffer size needed for holding an integer. */ 67 + #define ITOA_MAX_LEN 12 68 + 66 69 MODULE_AUTHOR("Qumranet"); 67 70 MODULE_LICENSE("GPL"); 68 71 ··· 102 99 103 100 struct dentry *kvm_debugfs_dir; 104 101 EXPORT_SYMBOL_GPL(kvm_debugfs_dir); 102 + 103 + static int kvm_debugfs_num_entries; 104 + static const struct file_operations *stat_fops_per_vm[]; 105 105 106 106 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 107 107 unsigned long arg); ··· 548 542 kvfree(slots); 549 543 } 550 544 545 + static void kvm_destroy_vm_debugfs(struct kvm *kvm) 546 + { 547 + int i; 548 + 549 + if (!kvm->debugfs_dentry) 550 + return; 551 + 552 + debugfs_remove_recursive(kvm->debugfs_dentry); 553 + 554 + for (i = 0; i < kvm_debugfs_num_entries; i++) 555 + kfree(kvm->debugfs_stat_data[i]); 556 + kfree(kvm->debugfs_stat_data); 557 + } 558 + 559 + static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) 560 + { 561 + char dir_name[ITOA_MAX_LEN * 2]; 562 + struct kvm_stat_data *stat_data; 563 + struct kvm_stats_debugfs_item *p; 564 + 565 + if (!debugfs_initialized()) 566 + return 0; 567 + 568 + snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); 569 + kvm->debugfs_dentry = debugfs_create_dir(dir_name, 570 + kvm_debugfs_dir); 571 + if (!kvm->debugfs_dentry) 572 + return -ENOMEM; 573 + 574 + kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, 575 + sizeof(*kvm->debugfs_stat_data), 576 + GFP_KERNEL); 577 + if (!kvm->debugfs_stat_data) 578 + return -ENOMEM; 579 + 580 + for (p = debugfs_entries; p->name; p++) { 581 + stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL); 582 + if (!stat_data) 583 + return -ENOMEM; 584 + 585 + stat_data->kvm = kvm; 586 + stat_data->offset = p->offset; 587 + kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; 588 + if (!debugfs_create_file(p->name, 0444, 589 + kvm->debugfs_dentry, 590 + stat_data, 591 + stat_fops_per_vm[p->kind])) 592 + return -ENOMEM; 593 + } 594 + return 0; 595 + } 596 + 551 597 static struct kvm *kvm_create_vm(unsigned long type) 552 598 { 553 599 int r, i; ··· 705 647 int i; 706 648 struct mm_struct *mm = kvm->mm; 707 649 650 + kvm_destroy_vm_debugfs(kvm); 708 651 kvm_arch_sync_events(kvm); 709 652 spin_lock(&kvm_lock); 710 653 list_del(&kvm->vm_list); ··· 3058 2999 } 3059 3000 #endif 3060 3001 r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC); 3061 - if (r < 0) 3002 + if (r < 0) { 3062 3003 kvm_put_kvm(kvm); 3004 + return r; 3005 + } 3006 + 3007 + if (kvm_create_vm_debugfs(kvm, r) < 0) { 3008 + kvm_put_kvm(kvm); 3009 + return -ENOMEM; 3010 + } 3063 3011 3064 3012 return r; 3065 3013 } ··· 3491 3425 .notifier_call = kvm_cpu_hotplug, 3492 3426 }; 3493 3427 3428 + static int kvm_debugfs_open(struct inode *inode, struct file *file, 3429 + int (*get)(void *, u64 *), int (*set)(void *, u64), 3430 + const char *fmt) 3431 + { 3432 + struct kvm_stat_data *stat_data = (struct kvm_stat_data *) 3433 + inode->i_private; 3434 + 3435 + /* The debugfs files are a reference to the kvm struct which 3436 + * is still valid when kvm_destroy_vm is called. 3437 + * To avoid the race between open and the removal of the debugfs 3438 + * directory we test against the users count. 3439 + */ 3440 + if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0)) 3441 + return -ENOENT; 3442 + 3443 + if (simple_attr_open(inode, file, get, set, fmt)) { 3444 + kvm_put_kvm(stat_data->kvm); 3445 + return -ENOMEM; 3446 + } 3447 + 3448 + return 0; 3449 + } 3450 + 3451 + static int kvm_debugfs_release(struct inode *inode, struct file *file) 3452 + { 3453 + struct kvm_stat_data *stat_data = (struct kvm_stat_data *) 3454 + inode->i_private; 3455 + 3456 + simple_attr_release(inode, file); 3457 + kvm_put_kvm(stat_data->kvm); 3458 + 3459 + return 0; 3460 + } 3461 + 3462 + static int vm_stat_get_per_vm(void *data, u64 *val) 3463 + { 3464 + struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; 3465 + 3466 + *val = *(u32 *)((void *)stat_data->kvm + stat_data->offset); 3467 + 3468 + return 0; 3469 + } 3470 + 3471 + static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file) 3472 + { 3473 + __simple_attr_check_format("%llu\n", 0ull); 3474 + return kvm_debugfs_open(inode, file, vm_stat_get_per_vm, 3475 + NULL, "%llu\n"); 3476 + } 3477 + 3478 + static const struct file_operations vm_stat_get_per_vm_fops = { 3479 + .owner = THIS_MODULE, 3480 + .open = vm_stat_get_per_vm_open, 3481 + .release = kvm_debugfs_release, 3482 + .read = simple_attr_read, 3483 + .write = simple_attr_write, 3484 + .llseek = generic_file_llseek, 3485 + }; 3486 + 3487 + static int vcpu_stat_get_per_vm(void *data, u64 *val) 3488 + { 3489 + int i; 3490 + struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; 3491 + struct kvm_vcpu *vcpu; 3492 + 3493 + *val = 0; 3494 + 3495 + kvm_for_each_vcpu(i, vcpu, stat_data->kvm) 3496 + *val += *(u32 *)((void *)vcpu + stat_data->offset); 3497 + 3498 + return 0; 3499 + } 3500 + 3501 + static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file) 3502 + { 3503 + __simple_attr_check_format("%llu\n", 0ull); 3504 + return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm, 3505 + NULL, "%llu\n"); 3506 + } 3507 + 3508 + static const struct file_operations vcpu_stat_get_per_vm_fops = { 3509 + .owner = THIS_MODULE, 3510 + .open = vcpu_stat_get_per_vm_open, 3511 + .release = kvm_debugfs_release, 3512 + .read = simple_attr_read, 3513 + .write = simple_attr_write, 3514 + .llseek = generic_file_llseek, 3515 + }; 3516 + 3517 + static const struct file_operations *stat_fops_per_vm[] = { 3518 + [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops, 3519 + [KVM_STAT_VM] = &vm_stat_get_per_vm_fops, 3520 + }; 3521 + 3494 3522 static int vm_stat_get(void *_offset, u64 *val) 3495 3523 { 3496 3524 unsigned offset = (long)_offset; 3497 3525 struct kvm *kvm; 3526 + struct kvm_stat_data stat_tmp = {.offset = offset}; 3527 + u64 tmp_val; 3498 3528 3499 3529 *val = 0; 3500 3530 spin_lock(&kvm_lock); 3501 - list_for_each_entry(kvm, &vm_list, vm_list) 3502 - *val += *(u32 *)((void *)kvm + offset); 3531 + list_for_each_entry(kvm, &vm_list, vm_list) { 3532 + stat_tmp.kvm = kvm; 3533 + vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); 3534 + *val += tmp_val; 3535 + } 3503 3536 spin_unlock(&kvm_lock); 3504 3537 return 0; 3505 3538 } ··· 3609 3444 { 3610 3445 unsigned offset = (long)_offset; 3611 3446 struct kvm *kvm; 3612 - struct kvm_vcpu *vcpu; 3613 - int i; 3447 + struct kvm_stat_data stat_tmp = {.offset = offset}; 3448 + u64 tmp_val; 3614 3449 3615 3450 *val = 0; 3616 3451 spin_lock(&kvm_lock); 3617 - list_for_each_entry(kvm, &vm_list, vm_list) 3618 - kvm_for_each_vcpu(i, vcpu, kvm) 3619 - *val += *(u32 *)((void *)vcpu + offset); 3620 - 3452 + list_for_each_entry(kvm, &vm_list, vm_list) { 3453 + stat_tmp.kvm = kvm; 3454 + vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); 3455 + *val += tmp_val; 3456 + } 3621 3457 spin_unlock(&kvm_lock); 3622 3458 return 0; 3623 3459 } ··· 3639 3473 if (kvm_debugfs_dir == NULL) 3640 3474 goto out; 3641 3475 3642 - for (p = debugfs_entries; p->name; ++p) { 3476 + kvm_debugfs_num_entries = 0; 3477 + for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { 3643 3478 if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir, 3644 3479 (void *)(long)p->offset, 3645 3480 stat_fops[p->kind]))